2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.7"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 4
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
337 /* Global server state structure */
342 dict
*sharingpool
; /* Poll used for object sharing */
343 unsigned int sharingpoolsize
;
344 long long dirty
; /* changes to DB from the last save */
346 list
*slaves
, *monitors
;
347 char neterr
[ANET_ERR_LEN
];
349 int cronloops
; /* number of times the cron function run */
350 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
351 time_t lastsave
; /* Unix time of last save succeeede */
352 /* Fields used only for stats */
353 time_t stat_starttime
; /* server start time */
354 long long stat_numcommands
; /* number of processed commands */
355 long long stat_numconnections
; /* number of connections received */
356 long long stat_expiredkeys
; /* number of expired keys */
369 pid_t bgsavechildpid
;
370 pid_t bgrewritechildpid
;
371 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
372 struct saveparam
*saveparams
;
377 char *appendfilename
;
381 /* Replication related */
386 redisClient
*master
; /* client that is master for this slave */
388 unsigned int maxclients
;
389 unsigned long long maxmemory
;
390 unsigned int blpop_blocked_clients
;
391 unsigned int vm_blocked_clients
;
392 /* Sort parameters - qsort_r() is only available under BSD so we
393 * have to take this state global, in order to pass it to sortCompare() */
397 /* Virtual memory configuration */
402 unsigned long long vm_max_memory
;
404 size_t hash_max_zipmap_entries
;
405 size_t hash_max_zipmap_value
;
406 /* Virtual memory state */
409 off_t vm_next_page
; /* Next probably empty page */
410 off_t vm_near_pages
; /* Number of pages allocated sequentially */
411 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
412 time_t unixtime
; /* Unix time sampled every second. */
413 /* Virtual memory I/O threads stuff */
414 /* An I/O thread process an element taken from the io_jobs queue and
415 * put the result of the operation in the io_done list. While the
416 * job is being processed, it's put on io_processing queue. */
417 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
418 list
*io_processing
; /* List of VM I/O jobs being processed */
419 list
*io_processed
; /* List of VM I/O jobs already processed */
420 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
421 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
422 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
423 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
424 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
425 int io_active_threads
; /* Number of running I/O threads */
426 int vm_max_threads
; /* Max number of I/O threads running at the same time */
427 /* Our main thread is blocked on the event loop, locking for sockets ready
428 * to be read or written, so when a threaded I/O operation is ready to be
429 * processed by the main thread, the I/O thread will use a unix pipe to
430 * awake the main thread. The followings are the two pipe FDs. */
431 int io_ready_pipe_read
;
432 int io_ready_pipe_write
;
433 /* Virtual memory stats */
434 unsigned long long vm_stats_used_pages
;
435 unsigned long long vm_stats_swapped_objects
;
436 unsigned long long vm_stats_swapouts
;
437 unsigned long long vm_stats_swapins
;
441 typedef void redisCommandProc(redisClient
*c
);
442 struct redisCommand
{
444 redisCommandProc
*proc
;
447 /* Use a function to determine which keys need to be loaded
448 * in the background prior to executing this command. Takes precedence
449 * over vm_firstkey and others, ignored when NULL */
450 redisCommandProc
*vm_preload_proc
;
451 /* What keys should be loaded in background when calling this command? */
452 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
453 int vm_lastkey
; /* THe last argument that's a key */
454 int vm_keystep
; /* The step between first and last key */
457 struct redisFunctionSym
{
459 unsigned long pointer
;
462 typedef struct _redisSortObject
{
470 typedef struct _redisSortOperation
{
473 } redisSortOperation
;
475 /* ZSETs use a specialized version of Skiplists */
477 typedef struct zskiplistNode
{
478 struct zskiplistNode
**forward
;
479 struct zskiplistNode
*backward
;
485 typedef struct zskiplist
{
486 struct zskiplistNode
*header
, *tail
;
487 unsigned long length
;
491 typedef struct zset
{
496 /* Our shared "common" objects */
498 struct sharedObjectsStruct
{
499 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
500 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
501 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
502 *outofrangeerr
, *plus
,
503 *select0
, *select1
, *select2
, *select3
, *select4
,
504 *select5
, *select6
, *select7
, *select8
, *select9
;
507 /* Global vars that are actally used as constants. The following double
508 * values are used for double on-disk serialization, and are initialized
509 * at runtime to avoid strange compiler optimizations. */
511 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
513 /* VM threaded I/O request message */
514 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
515 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
516 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
517 typedef struct iojob
{
518 int type
; /* Request type, REDIS_IOJOB_* */
519 redisDb
*db
;/* Redis database */
520 robj
*key
; /* This I/O request is about swapping this key */
521 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
522 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
523 off_t page
; /* Swap page where to read/write the object */
524 off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */
525 int canceled
; /* True if this command was canceled by blocking side of VM */
526 pthread_t thread
; /* ID of the thread processing this entry */
529 /*================================ Prototypes =============================== */
531 static void freeStringObject(robj
*o
);
532 static void freeListObject(robj
*o
);
533 static void freeSetObject(robj
*o
);
534 static void decrRefCount(void *o
);
535 static robj
*createObject(int type
, void *ptr
);
536 static void freeClient(redisClient
*c
);
537 static int rdbLoad(char *filename
);
538 static void addReply(redisClient
*c
, robj
*obj
);
539 static void addReplySds(redisClient
*c
, sds s
);
540 static void incrRefCount(robj
*o
);
541 static int rdbSaveBackground(char *filename
);
542 static robj
*createStringObject(char *ptr
, size_t len
);
543 static robj
*dupStringObject(robj
*o
);
544 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
545 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
546 static int syncWithMaster(void);
547 static robj
*tryObjectSharing(robj
*o
);
548 static int tryObjectEncoding(robj
*o
);
549 static robj
*getDecodedObject(robj
*o
);
550 static int removeExpire(redisDb
*db
, robj
*key
);
551 static int expireIfNeeded(redisDb
*db
, robj
*key
);
552 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
553 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
554 static int deleteKey(redisDb
*db
, robj
*key
);
555 static time_t getExpire(redisDb
*db
, robj
*key
);
556 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
557 static void updateSlavesWaitingBgsave(int bgsaveerr
);
558 static void freeMemoryIfNeeded(void);
559 static int processCommand(redisClient
*c
);
560 static void setupSigSegvAction(void);
561 static void rdbRemoveTempFile(pid_t childpid
);
562 static void aofRemoveTempFile(pid_t childpid
);
563 static size_t stringObjectLen(robj
*o
);
564 static void processInputBuffer(redisClient
*c
);
565 static zskiplist
*zslCreate(void);
566 static void zslFree(zskiplist
*zsl
);
567 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
568 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
569 static void initClientMultiState(redisClient
*c
);
570 static void freeClientMultiState(redisClient
*c
);
571 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
572 static void unblockClientWaitingData(redisClient
*c
);
573 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
574 static void vmInit(void);
575 static void vmMarkPagesFree(off_t page
, off_t count
);
576 static robj
*vmLoadObject(robj
*key
);
577 static robj
*vmPreviewObject(robj
*key
);
578 static int vmSwapOneObjectBlocking(void);
579 static int vmSwapOneObjectThreaded(void);
580 static int vmCanSwapOut(void);
581 static int tryFreeOneObjectFromFreelist(void);
582 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
583 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
584 static void vmCancelThreadedIOJob(robj
*o
);
585 static void lockThreadedIO(void);
586 static void unlockThreadedIO(void);
587 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
588 static void freeIOJob(iojob
*j
);
589 static void queueIOJob(iojob
*j
);
590 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
591 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
592 static void waitEmptyIOJobsQueue(void);
593 static void vmReopenSwapFile(void);
594 static int vmFreePage(off_t page
);
595 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
596 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
597 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
598 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
599 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static struct redisCommand
*lookupCommand(char *name
);
601 static void call(redisClient
*c
, struct redisCommand
*cmd
);
602 static void resetClient(redisClient
*c
);
603 static void convertToRealHash(robj
*o
);
605 static void authCommand(redisClient
*c
);
606 static void pingCommand(redisClient
*c
);
607 static void echoCommand(redisClient
*c
);
608 static void setCommand(redisClient
*c
);
609 static void setnxCommand(redisClient
*c
);
610 static void getCommand(redisClient
*c
);
611 static void delCommand(redisClient
*c
);
612 static void existsCommand(redisClient
*c
);
613 static void incrCommand(redisClient
*c
);
614 static void decrCommand(redisClient
*c
);
615 static void incrbyCommand(redisClient
*c
);
616 static void decrbyCommand(redisClient
*c
);
617 static void selectCommand(redisClient
*c
);
618 static void randomkeyCommand(redisClient
*c
);
619 static void keysCommand(redisClient
*c
);
620 static void dbsizeCommand(redisClient
*c
);
621 static void lastsaveCommand(redisClient
*c
);
622 static void saveCommand(redisClient
*c
);
623 static void bgsaveCommand(redisClient
*c
);
624 static void bgrewriteaofCommand(redisClient
*c
);
625 static void shutdownCommand(redisClient
*c
);
626 static void moveCommand(redisClient
*c
);
627 static void renameCommand(redisClient
*c
);
628 static void renamenxCommand(redisClient
*c
);
629 static void lpushCommand(redisClient
*c
);
630 static void rpushCommand(redisClient
*c
);
631 static void lpopCommand(redisClient
*c
);
632 static void rpopCommand(redisClient
*c
);
633 static void llenCommand(redisClient
*c
);
634 static void lindexCommand(redisClient
*c
);
635 static void lrangeCommand(redisClient
*c
);
636 static void ltrimCommand(redisClient
*c
);
637 static void typeCommand(redisClient
*c
);
638 static void lsetCommand(redisClient
*c
);
639 static void saddCommand(redisClient
*c
);
640 static void sremCommand(redisClient
*c
);
641 static void smoveCommand(redisClient
*c
);
642 static void sismemberCommand(redisClient
*c
);
643 static void scardCommand(redisClient
*c
);
644 static void spopCommand(redisClient
*c
);
645 static void srandmemberCommand(redisClient
*c
);
646 static void sinterCommand(redisClient
*c
);
647 static void sinterstoreCommand(redisClient
*c
);
648 static void sunionCommand(redisClient
*c
);
649 static void sunionstoreCommand(redisClient
*c
);
650 static void sdiffCommand(redisClient
*c
);
651 static void sdiffstoreCommand(redisClient
*c
);
652 static void syncCommand(redisClient
*c
);
653 static void flushdbCommand(redisClient
*c
);
654 static void flushallCommand(redisClient
*c
);
655 static void sortCommand(redisClient
*c
);
656 static void lremCommand(redisClient
*c
);
657 static void rpoplpushcommand(redisClient
*c
);
658 static void infoCommand(redisClient
*c
);
659 static void mgetCommand(redisClient
*c
);
660 static void monitorCommand(redisClient
*c
);
661 static void expireCommand(redisClient
*c
);
662 static void expireatCommand(redisClient
*c
);
663 static void getsetCommand(redisClient
*c
);
664 static void ttlCommand(redisClient
*c
);
665 static void slaveofCommand(redisClient
*c
);
666 static void debugCommand(redisClient
*c
);
667 static void msetCommand(redisClient
*c
);
668 static void msetnxCommand(redisClient
*c
);
669 static void zaddCommand(redisClient
*c
);
670 static void zincrbyCommand(redisClient
*c
);
671 static void zrangeCommand(redisClient
*c
);
672 static void zrangebyscoreCommand(redisClient
*c
);
673 static void zcountCommand(redisClient
*c
);
674 static void zrevrangeCommand(redisClient
*c
);
675 static void zcardCommand(redisClient
*c
);
676 static void zremCommand(redisClient
*c
);
677 static void zscoreCommand(redisClient
*c
);
678 static void zremrangebyscoreCommand(redisClient
*c
);
679 static void multiCommand(redisClient
*c
);
680 static void execCommand(redisClient
*c
);
681 static void discardCommand(redisClient
*c
);
682 static void blpopCommand(redisClient
*c
);
683 static void brpopCommand(redisClient
*c
);
684 static void appendCommand(redisClient
*c
);
685 static void substrCommand(redisClient
*c
);
686 static void zrankCommand(redisClient
*c
);
687 static void zrevrankCommand(redisClient
*c
);
688 static void hsetCommand(redisClient
*c
);
689 static void hgetCommand(redisClient
*c
);
690 static void hdelCommand(redisClient
*c
);
691 static void hlenCommand(redisClient
*c
);
692 static void zremrangebyrankCommand(redisClient
*c
);
693 static void zunionCommand(redisClient
*c
);
694 static void zinterCommand(redisClient
*c
);
695 static void hkeysCommand(redisClient
*c
);
696 static void hvalsCommand(redisClient
*c
);
697 static void hgetallCommand(redisClient
*c
);
698 static void hexistsCommand(redisClient
*c
);
699 static void configCommand(redisClient
*c
);
701 /*================================= Globals ================================= */
704 static struct redisServer server
; /* server global state */
705 static struct redisCommand cmdTable
[] = {
706 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
707 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
708 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
709 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
710 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
711 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
712 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
713 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
714 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
715 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
716 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
717 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
718 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
719 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
720 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
721 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
723 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
725 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
726 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
727 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
728 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
729 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
730 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
731 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
732 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
733 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
737 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
738 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
739 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
740 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
741 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
742 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
745 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
746 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
749 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
750 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
758 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
759 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
760 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
761 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
766 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
770 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
771 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
772 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
773 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
777 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
778 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
779 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
780 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
781 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
782 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
783 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
785 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
786 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
787 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
788 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
791 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
795 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
802 {NULL
,NULL
,0,0,NULL
,0,0,0}
807 /*============================ Utility functions ============================ */
809 /* Glob-style pattern matching. */
810 static int stringmatchlen(const char *pattern
, int patternLen
,
811 const char *string
, int stringLen
, int nocase
)
816 while (pattern
[1] == '*') {
821 return 1; /* match */
823 if (stringmatchlen(pattern
+1, patternLen
-1,
824 string
, stringLen
, nocase
))
825 return 1; /* match */
829 return 0; /* no match */
833 return 0; /* no match */
843 not = pattern
[0] == '^';
850 if (pattern
[0] == '\\') {
853 if (pattern
[0] == string
[0])
855 } else if (pattern
[0] == ']') {
857 } else if (patternLen
== 0) {
861 } else if (pattern
[1] == '-' && patternLen
>= 3) {
862 int start
= pattern
[0];
863 int end
= pattern
[2];
871 start
= tolower(start
);
877 if (c
>= start
&& c
<= end
)
881 if (pattern
[0] == string
[0])
884 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
894 return 0; /* no match */
900 if (patternLen
>= 2) {
907 if (pattern
[0] != string
[0])
908 return 0; /* no match */
910 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
911 return 0; /* no match */
919 if (stringLen
== 0) {
920 while(*pattern
== '*') {
927 if (patternLen
== 0 && stringLen
== 0)
932 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
933 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
936 static void redisLog(int level
, const char *fmt
, ...) {
940 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
944 if (level
>= server
.verbosity
) {
950 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
951 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
952 vfprintf(fp
, fmt
, ap
);
958 if (server
.logfile
) fclose(fp
);
961 /*====================== Hash table type implementation ==================== */
963 /* This is an hash table type that uses the SDS dynamic strings libary as
964 * keys and radis objects as values (objects can hold SDS strings,
967 static void dictVanillaFree(void *privdata
, void *val
)
969 DICT_NOTUSED(privdata
);
973 static void dictListDestructor(void *privdata
, void *val
)
975 DICT_NOTUSED(privdata
);
976 listRelease((list
*)val
);
979 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
983 DICT_NOTUSED(privdata
);
985 l1
= sdslen((sds
)key1
);
986 l2
= sdslen((sds
)key2
);
987 if (l1
!= l2
) return 0;
988 return memcmp(key1
, key2
, l1
) == 0;
991 static void dictRedisObjectDestructor(void *privdata
, void *val
)
993 DICT_NOTUSED(privdata
);
995 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
999 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1002 const robj
*o1
= key1
, *o2
= key2
;
1003 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1006 static unsigned int dictObjHash(const void *key
) {
1007 const robj
*o
= key
;
1008 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1011 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1014 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1017 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1018 o2
->encoding
== REDIS_ENCODING_INT
&&
1019 o1
->ptr
== o2
->ptr
) return 1;
1021 o1
= getDecodedObject(o1
);
1022 o2
= getDecodedObject(o2
);
1023 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1029 static unsigned int dictEncObjHash(const void *key
) {
1030 robj
*o
= (robj
*) key
;
1032 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1033 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1035 if (o
->encoding
== REDIS_ENCODING_INT
) {
1039 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1040 return dictGenHashFunction((unsigned char*)buf
, len
);
1044 o
= getDecodedObject(o
);
1045 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1052 /* Sets type and expires */
1053 static dictType setDictType
= {
1054 dictEncObjHash
, /* hash function */
1057 dictEncObjKeyCompare
, /* key compare */
1058 dictRedisObjectDestructor
, /* key destructor */
1059 NULL
/* val destructor */
1062 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1063 static dictType zsetDictType
= {
1064 dictEncObjHash
, /* hash function */
1067 dictEncObjKeyCompare
, /* key compare */
1068 dictRedisObjectDestructor
, /* key destructor */
1069 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1073 static dictType dbDictType
= {
1074 dictObjHash
, /* hash function */
1077 dictObjKeyCompare
, /* key compare */
1078 dictRedisObjectDestructor
, /* key destructor */
1079 dictRedisObjectDestructor
/* val destructor */
1083 static dictType keyptrDictType
= {
1084 dictObjHash
, /* hash function */
1087 dictObjKeyCompare
, /* key compare */
1088 dictRedisObjectDestructor
, /* key destructor */
1089 NULL
/* val destructor */
1092 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1093 static dictType hashDictType
= {
1094 dictEncObjHash
, /* hash function */
1097 dictEncObjKeyCompare
, /* key compare */
1098 dictRedisObjectDestructor
, /* key destructor */
1099 dictRedisObjectDestructor
/* val destructor */
1102 /* Keylist hash table type has unencoded redis objects as keys and
1103 * lists as values. It's used for blocking operations (BLPOP) and to
1104 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1105 static dictType keylistDictType
= {
1106 dictObjHash
, /* hash function */
1109 dictObjKeyCompare
, /* key compare */
1110 dictRedisObjectDestructor
, /* key destructor */
1111 dictListDestructor
/* val destructor */
1114 static void version();
1116 /* ========================= Random utility functions ======================= */
1118 /* Redis generally does not try to recover from out of memory conditions
1119 * when allocating objects or strings, it is not clear if it will be possible
1120 * to report this condition to the client since the networking layer itself
1121 * is based on heap allocation for send buffers, so we simply abort.
1122 * At least the code will be simpler to read... */
1123 static void oom(const char *msg
) {
1124 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1129 /* ====================== Redis server networking stuff ===================== */
1130 static void closeTimedoutClients(void) {
1133 time_t now
= time(NULL
);
1136 listRewind(server
.clients
,&li
);
1137 while ((ln
= listNext(&li
)) != NULL
) {
1138 c
= listNodeValue(ln
);
1139 if (server
.maxidletime
&&
1140 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1141 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1142 (now
- c
->lastinteraction
> server
.maxidletime
))
1144 redisLog(REDIS_VERBOSE
,"Closing idle client");
1146 } else if (c
->flags
& REDIS_BLOCKED
) {
1147 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1148 addReply(c
,shared
.nullmultibulk
);
1149 unblockClientWaitingData(c
);
1155 static int htNeedsResize(dict
*dict
) {
1156 long long size
, used
;
1158 size
= dictSlots(dict
);
1159 used
= dictSize(dict
);
1160 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1161 (used
*100/size
< REDIS_HT_MINFILL
));
1164 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1165 * we resize the hash table to save memory */
1166 static void tryResizeHashTables(void) {
1169 for (j
= 0; j
< server
.dbnum
; j
++) {
1170 if (htNeedsResize(server
.db
[j
].dict
)) {
1171 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1172 dictResize(server
.db
[j
].dict
);
1173 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1175 if (htNeedsResize(server
.db
[j
].expires
))
1176 dictResize(server
.db
[j
].expires
);
1180 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1181 void backgroundSaveDoneHandler(int statloc
) {
1182 int exitcode
= WEXITSTATUS(statloc
);
1183 int bysignal
= WIFSIGNALED(statloc
);
1185 if (!bysignal
&& exitcode
== 0) {
1186 redisLog(REDIS_NOTICE
,
1187 "Background saving terminated with success");
1189 server
.lastsave
= time(NULL
);
1190 } else if (!bysignal
&& exitcode
!= 0) {
1191 redisLog(REDIS_WARNING
, "Background saving error");
1193 redisLog(REDIS_WARNING
,
1194 "Background saving terminated by signal");
1195 rdbRemoveTempFile(server
.bgsavechildpid
);
1197 server
.bgsavechildpid
= -1;
1198 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1199 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1200 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1203 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1205 void backgroundRewriteDoneHandler(int statloc
) {
1206 int exitcode
= WEXITSTATUS(statloc
);
1207 int bysignal
= WIFSIGNALED(statloc
);
1209 if (!bysignal
&& exitcode
== 0) {
1213 redisLog(REDIS_NOTICE
,
1214 "Background append only file rewriting terminated with success");
1215 /* Now it's time to flush the differences accumulated by the parent */
1216 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1217 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1219 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1222 /* Flush our data... */
1223 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1224 (signed) sdslen(server
.bgrewritebuf
)) {
1225 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1229 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1230 /* Now our work is to rename the temp file into the stable file. And
1231 * switch the file descriptor used by the server for append only. */
1232 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1233 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1237 /* Mission completed... almost */
1238 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1239 if (server
.appendfd
!= -1) {
1240 /* If append only is actually enabled... */
1241 close(server
.appendfd
);
1242 server
.appendfd
= fd
;
1244 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1245 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1247 /* If append only is disabled we just generate a dump in this
1248 * format. Why not? */
1251 } else if (!bysignal
&& exitcode
!= 0) {
1252 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1254 redisLog(REDIS_WARNING
,
1255 "Background append only file rewriting terminated by signal");
1258 sdsfree(server
.bgrewritebuf
);
1259 server
.bgrewritebuf
= sdsempty();
1260 aofRemoveTempFile(server
.bgrewritechildpid
);
1261 server
.bgrewritechildpid
= -1;
1264 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1265 int j
, loops
= server
.cronloops
++;
1266 REDIS_NOTUSED(eventLoop
);
1268 REDIS_NOTUSED(clientData
);
1270 /* We take a cached value of the unix time in the global state because
1271 * with virtual memory and aging there is to store the current time
1272 * in objects at every object access, and accuracy is not needed.
1273 * To access a global var is faster than calling time(NULL) */
1274 server
.unixtime
= time(NULL
);
1276 /* Show some info about non-empty databases */
1277 for (j
= 0; j
< server
.dbnum
; j
++) {
1278 long long size
, used
, vkeys
;
1280 size
= dictSlots(server
.db
[j
].dict
);
1281 used
= dictSize(server
.db
[j
].dict
);
1282 vkeys
= dictSize(server
.db
[j
].expires
);
1283 if (!(loops
% 50) && (used
|| vkeys
)) {
1284 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1285 /* dictPrintStats(server.dict); */
1289 /* We don't want to resize the hash tables while a bacground saving
1290 * is in progress: the saving child is created using fork() that is
1291 * implemented with a copy-on-write semantic in most modern systems, so
1292 * if we resize the HT while there is the saving child at work actually
1293 * a lot of memory movements in the parent will cause a lot of pages
1295 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1297 /* Show information about connected clients */
1298 if (!(loops
% 50)) {
1299 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1300 listLength(server
.clients
)-listLength(server
.slaves
),
1301 listLength(server
.slaves
),
1302 zmalloc_used_memory(),
1303 dictSize(server
.sharingpool
));
1306 /* Close connections of timedout clients */
1307 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1308 closeTimedoutClients();
1310 /* Check if a background saving or AOF rewrite in progress terminated */
1311 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1315 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1316 if (pid
== server
.bgsavechildpid
) {
1317 backgroundSaveDoneHandler(statloc
);
1319 backgroundRewriteDoneHandler(statloc
);
1323 /* If there is not a background saving in progress check if
1324 * we have to save now */
1325 time_t now
= time(NULL
);
1326 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1327 struct saveparam
*sp
= server
.saveparams
+j
;
1329 if (server
.dirty
>= sp
->changes
&&
1330 now
-server
.lastsave
> sp
->seconds
) {
1331 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1332 sp
->changes
, sp
->seconds
);
1333 rdbSaveBackground(server
.dbfilename
);
1339 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1340 * will use few CPU cycles if there are few expiring keys, otherwise
1341 * it will get more aggressive to avoid that too much memory is used by
1342 * keys that can be removed from the keyspace. */
1343 for (j
= 0; j
< server
.dbnum
; j
++) {
1345 redisDb
*db
= server
.db
+j
;
1347 /* Continue to expire if at the end of the cycle more than 25%
1348 * of the keys were expired. */
1350 long num
= dictSize(db
->expires
);
1351 time_t now
= time(NULL
);
1354 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1355 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1360 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1361 t
= (time_t) dictGetEntryVal(de
);
1363 deleteKey(db
,dictGetEntryKey(de
));
1365 server
.stat_expiredkeys
++;
1368 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1371 /* Swap a few keys on disk if we are over the memory limit and VM
1372 * is enbled. Try to free objects from the free list first. */
1373 if (vmCanSwapOut()) {
1374 while (server
.vm_enabled
&& zmalloc_used_memory() >
1375 server
.vm_max_memory
)
1379 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1380 retval
= (server
.vm_max_threads
== 0) ?
1381 vmSwapOneObjectBlocking() :
1382 vmSwapOneObjectThreaded();
1383 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1384 zmalloc_used_memory() >
1385 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1387 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1389 /* Note that when using threade I/O we free just one object,
1390 * because anyway when the I/O thread in charge to swap this
1391 * object out will finish, the handler of completed jobs
1392 * will try to swap more objects if we are still out of memory. */
1393 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1397 /* Check if we should connect to a MASTER */
1398 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1399 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1400 if (syncWithMaster() == REDIS_OK
) {
1401 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1407 /* This function gets called every time Redis is entering the
1408 * main loop of the event driven library, that is, before to sleep
1409 * for ready file descriptors. */
1410 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1411 REDIS_NOTUSED(eventLoop
);
1413 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1417 listRewind(server
.io_ready_clients
,&li
);
1418 while((ln
= listNext(&li
))) {
1419 redisClient
*c
= ln
->value
;
1420 struct redisCommand
*cmd
;
1422 /* Resume the client. */
1423 listDelNode(server
.io_ready_clients
,ln
);
1424 c
->flags
&= (~REDIS_IO_WAIT
);
1425 server
.vm_blocked_clients
--;
1426 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1427 readQueryFromClient
, c
);
1428 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1429 assert(cmd
!= NULL
);
1432 /* There may be more data to process in the input buffer. */
1433 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1434 processInputBuffer(c
);
1439 static void createSharedObjects(void) {
1440 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1441 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1442 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1443 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1444 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1445 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1446 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1447 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1448 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1449 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1450 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1451 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1452 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1453 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1454 "-ERR no such key\r\n"));
1455 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1456 "-ERR syntax error\r\n"));
1457 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1458 "-ERR source and destination objects are the same\r\n"));
1459 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1460 "-ERR index out of range\r\n"));
1461 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1462 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1463 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1464 shared
.select0
= createStringObject("select 0\r\n",10);
1465 shared
.select1
= createStringObject("select 1\r\n",10);
1466 shared
.select2
= createStringObject("select 2\r\n",10);
1467 shared
.select3
= createStringObject("select 3\r\n",10);
1468 shared
.select4
= createStringObject("select 4\r\n",10);
1469 shared
.select5
= createStringObject("select 5\r\n",10);
1470 shared
.select6
= createStringObject("select 6\r\n",10);
1471 shared
.select7
= createStringObject("select 7\r\n",10);
1472 shared
.select8
= createStringObject("select 8\r\n",10);
1473 shared
.select9
= createStringObject("select 9\r\n",10);
1476 static void appendServerSaveParams(time_t seconds
, int changes
) {
1477 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1478 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1479 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1480 server
.saveparamslen
++;
1483 static void resetServerSaveParams() {
1484 zfree(server
.saveparams
);
1485 server
.saveparams
= NULL
;
1486 server
.saveparamslen
= 0;
1489 static void initServerConfig() {
1490 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1491 server
.port
= REDIS_SERVERPORT
;
1492 server
.verbosity
= REDIS_VERBOSE
;
1493 server
.maxidletime
= REDIS_MAXIDLETIME
;
1494 server
.saveparams
= NULL
;
1495 server
.logfile
= NULL
; /* NULL = log on standard output */
1496 server
.bindaddr
= NULL
;
1497 server
.glueoutputbuf
= 1;
1498 server
.daemonize
= 0;
1499 server
.appendonly
= 0;
1500 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1501 server
.lastfsync
= time(NULL
);
1502 server
.appendfd
= -1;
1503 server
.appendseldb
= -1; /* Make sure the first time will not match */
1504 server
.pidfile
= zstrdup("/var/run/redis.pid");
1505 server
.dbfilename
= zstrdup("dump.rdb");
1506 server
.appendfilename
= zstrdup("appendonly.aof");
1507 server
.requirepass
= NULL
;
1508 server
.shareobjects
= 0;
1509 server
.rdbcompression
= 1;
1510 server
.sharingpoolsize
= 1024;
1511 server
.maxclients
= 0;
1512 server
.blpop_blocked_clients
= 0;
1513 server
.maxmemory
= 0;
1514 server
.vm_enabled
= 0;
1515 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1516 server
.vm_page_size
= 256; /* 256 bytes per page */
1517 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1518 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1519 server
.vm_max_threads
= 4;
1520 server
.vm_blocked_clients
= 0;
1521 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1522 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1524 resetServerSaveParams();
1526 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1527 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1528 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1529 /* Replication related */
1531 server
.masterauth
= NULL
;
1532 server
.masterhost
= NULL
;
1533 server
.masterport
= 6379;
1534 server
.master
= NULL
;
1535 server
.replstate
= REDIS_REPL_NONE
;
1537 /* Double constants initialization */
1539 R_PosInf
= 1.0/R_Zero
;
1540 R_NegInf
= -1.0/R_Zero
;
1541 R_Nan
= R_Zero
/R_Zero
;
1544 static void initServer() {
1547 signal(SIGHUP
, SIG_IGN
);
1548 signal(SIGPIPE
, SIG_IGN
);
1549 setupSigSegvAction();
1551 server
.devnull
= fopen("/dev/null","w");
1552 if (server
.devnull
== NULL
) {
1553 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1556 server
.clients
= listCreate();
1557 server
.slaves
= listCreate();
1558 server
.monitors
= listCreate();
1559 server
.objfreelist
= listCreate();
1560 createSharedObjects();
1561 server
.el
= aeCreateEventLoop();
1562 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1563 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1564 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1565 if (server
.fd
== -1) {
1566 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1569 for (j
= 0; j
< server
.dbnum
; j
++) {
1570 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1571 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1572 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1573 if (server
.vm_enabled
)
1574 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1575 server
.db
[j
].id
= j
;
1577 server
.cronloops
= 0;
1578 server
.bgsavechildpid
= -1;
1579 server
.bgrewritechildpid
= -1;
1580 server
.bgrewritebuf
= sdsempty();
1581 server
.lastsave
= time(NULL
);
1583 server
.stat_numcommands
= 0;
1584 server
.stat_numconnections
= 0;
1585 server
.stat_expiredkeys
= 0;
1586 server
.stat_starttime
= time(NULL
);
1587 server
.unixtime
= time(NULL
);
1588 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1589 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1590 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1592 if (server
.appendonly
) {
1593 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1594 if (server
.appendfd
== -1) {
1595 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1601 if (server
.vm_enabled
) vmInit();
1604 /* Empty the whole database */
1605 static long long emptyDb() {
1607 long long removed
= 0;
1609 for (j
= 0; j
< server
.dbnum
; j
++) {
1610 removed
+= dictSize(server
.db
[j
].dict
);
1611 dictEmpty(server
.db
[j
].dict
);
1612 dictEmpty(server
.db
[j
].expires
);
1617 static int yesnotoi(char *s
) {
1618 if (!strcasecmp(s
,"yes")) return 1;
1619 else if (!strcasecmp(s
,"no")) return 0;
1623 /* I agree, this is a very rudimental way to load a configuration...
1624 will improve later if the config gets more complex */
1625 static void loadServerConfig(char *filename
) {
1627 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1630 char *errormsg
= "Fatal error, can't open config file '%s'";
1631 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1632 sprintf(errorbuf
, errormsg
, filename
);
1634 if (filename
[0] == '-' && filename
[1] == '\0')
1637 if ((fp
= fopen(filename
,"r")) == NULL
) {
1638 redisLog(REDIS_WARNING
, errorbuf
);
1643 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1649 line
= sdstrim(line
," \t\r\n");
1651 /* Skip comments and blank lines*/
1652 if (line
[0] == '#' || line
[0] == '\0') {
1657 /* Split into arguments */
1658 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1659 sdstolower(argv
[0]);
1661 /* Execute config directives */
1662 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1663 server
.maxidletime
= atoi(argv
[1]);
1664 if (server
.maxidletime
< 0) {
1665 err
= "Invalid timeout value"; goto loaderr
;
1667 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1668 server
.port
= atoi(argv
[1]);
1669 if (server
.port
< 1 || server
.port
> 65535) {
1670 err
= "Invalid port"; goto loaderr
;
1672 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1673 server
.bindaddr
= zstrdup(argv
[1]);
1674 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1675 int seconds
= atoi(argv
[1]);
1676 int changes
= atoi(argv
[2]);
1677 if (seconds
< 1 || changes
< 0) {
1678 err
= "Invalid save parameters"; goto loaderr
;
1680 appendServerSaveParams(seconds
,changes
);
1681 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1682 if (chdir(argv
[1]) == -1) {
1683 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1684 argv
[1], strerror(errno
));
1687 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1688 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1689 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1690 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1691 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1693 err
= "Invalid log level. Must be one of debug, notice, warning";
1696 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1699 server
.logfile
= zstrdup(argv
[1]);
1700 if (!strcasecmp(server
.logfile
,"stdout")) {
1701 zfree(server
.logfile
);
1702 server
.logfile
= NULL
;
1704 if (server
.logfile
) {
1705 /* Test if we are able to open the file. The server will not
1706 * be able to abort just for this problem later... */
1707 logfp
= fopen(server
.logfile
,"a");
1708 if (logfp
== NULL
) {
1709 err
= sdscatprintf(sdsempty(),
1710 "Can't open the log file: %s", strerror(errno
));
1715 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1716 server
.dbnum
= atoi(argv
[1]);
1717 if (server
.dbnum
< 1) {
1718 err
= "Invalid number of databases"; goto loaderr
;
1720 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1721 loadServerConfig(argv
[1]);
1722 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1723 server
.maxclients
= atoi(argv
[1]);
1724 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1725 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1726 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1727 server
.masterhost
= sdsnew(argv
[1]);
1728 server
.masterport
= atoi(argv
[2]);
1729 server
.replstate
= REDIS_REPL_CONNECT
;
1730 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1731 server
.masterauth
= zstrdup(argv
[1]);
1732 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1733 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1734 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1736 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1737 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1738 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1740 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1741 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1742 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1744 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1745 server
.sharingpoolsize
= atoi(argv
[1]);
1746 if (server
.sharingpoolsize
< 1) {
1747 err
= "invalid object sharing pool size"; goto loaderr
;
1749 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1750 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1751 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1753 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1754 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1755 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1757 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1758 if (!strcasecmp(argv
[1],"no")) {
1759 server
.appendfsync
= APPENDFSYNC_NO
;
1760 } else if (!strcasecmp(argv
[1],"always")) {
1761 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1762 } else if (!strcasecmp(argv
[1],"everysec")) {
1763 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1765 err
= "argument must be 'no', 'always' or 'everysec'";
1768 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1769 server
.requirepass
= zstrdup(argv
[1]);
1770 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1771 zfree(server
.pidfile
);
1772 server
.pidfile
= zstrdup(argv
[1]);
1773 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1774 zfree(server
.dbfilename
);
1775 server
.dbfilename
= zstrdup(argv
[1]);
1776 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1777 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1778 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1780 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1781 zfree(server
.vm_swap_file
);
1782 server
.vm_swap_file
= zstrdup(argv
[1]);
1783 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1784 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1785 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1786 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1787 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1788 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1789 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1790 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1791 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1792 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1793 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1794 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1795 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1796 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1798 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1800 for (j
= 0; j
< argc
; j
++)
1805 if (fp
!= stdin
) fclose(fp
);
1809 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1810 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1811 fprintf(stderr
, ">>> '%s'\n", line
);
1812 fprintf(stderr
, "%s\n", err
);
1816 static void freeClientArgv(redisClient
*c
) {
1819 for (j
= 0; j
< c
->argc
; j
++)
1820 decrRefCount(c
->argv
[j
]);
1821 for (j
= 0; j
< c
->mbargc
; j
++)
1822 decrRefCount(c
->mbargv
[j
]);
1827 static void freeClient(redisClient
*c
) {
1830 /* Note that if the client we are freeing is blocked into a blocking
1831 * call, we have to set querybuf to NULL *before* to call
1832 * unblockClientWaitingData() to avoid processInputBuffer() will get
1833 * called. Also it is important to remove the file events after
1834 * this, because this call adds the READABLE event. */
1835 sdsfree(c
->querybuf
);
1837 if (c
->flags
& REDIS_BLOCKED
)
1838 unblockClientWaitingData(c
);
1840 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1841 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1842 listRelease(c
->reply
);
1845 /* Remove from the list of clients */
1846 ln
= listSearchKey(server
.clients
,c
);
1847 redisAssert(ln
!= NULL
);
1848 listDelNode(server
.clients
,ln
);
1849 /* Remove from the list of clients waiting for swapped keys */
1850 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1851 ln
= listSearchKey(server
.io_ready_clients
,c
);
1853 listDelNode(server
.io_ready_clients
,ln
);
1854 server
.vm_blocked_clients
--;
1857 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1858 ln
= listFirst(c
->io_keys
);
1859 dontWaitForSwappedKey(c
,ln
->value
);
1861 listRelease(c
->io_keys
);
1863 if (c
->flags
& REDIS_SLAVE
) {
1864 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1866 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1867 ln
= listSearchKey(l
,c
);
1868 redisAssert(ln
!= NULL
);
1871 if (c
->flags
& REDIS_MASTER
) {
1872 server
.master
= NULL
;
1873 server
.replstate
= REDIS_REPL_CONNECT
;
1877 freeClientMultiState(c
);
1881 #define GLUEREPLY_UP_TO (1024)
1882 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1884 char buf
[GLUEREPLY_UP_TO
];
1889 listRewind(c
->reply
,&li
);
1890 while((ln
= listNext(&li
))) {
1894 objlen
= sdslen(o
->ptr
);
1895 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1896 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1898 listDelNode(c
->reply
,ln
);
1900 if (copylen
== 0) return;
1904 /* Now the output buffer is empty, add the new single element */
1905 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1906 listAddNodeHead(c
->reply
,o
);
1909 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1910 redisClient
*c
= privdata
;
1911 int nwritten
= 0, totwritten
= 0, objlen
;
1914 REDIS_NOTUSED(mask
);
1916 /* Use writev() if we have enough buffers to send */
1917 if (!server
.glueoutputbuf
&&
1918 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1919 !(c
->flags
& REDIS_MASTER
))
1921 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1925 while(listLength(c
->reply
)) {
1926 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1927 glueReplyBuffersIfNeeded(c
);
1929 o
= listNodeValue(listFirst(c
->reply
));
1930 objlen
= sdslen(o
->ptr
);
1933 listDelNode(c
->reply
,listFirst(c
->reply
));
1937 if (c
->flags
& REDIS_MASTER
) {
1938 /* Don't reply to a master */
1939 nwritten
= objlen
- c
->sentlen
;
1941 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1942 if (nwritten
<= 0) break;
1944 c
->sentlen
+= nwritten
;
1945 totwritten
+= nwritten
;
1946 /* If we fully sent the object on head go to the next one */
1947 if (c
->sentlen
== objlen
) {
1948 listDelNode(c
->reply
,listFirst(c
->reply
));
1951 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1952 * bytes, in a single threaded server it's a good idea to serve
1953 * other clients as well, even if a very large request comes from
1954 * super fast link that is always able to accept data (in real world
1955 * scenario think about 'KEYS *' against the loopback interfae) */
1956 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1958 if (nwritten
== -1) {
1959 if (errno
== EAGAIN
) {
1962 redisLog(REDIS_VERBOSE
,
1963 "Error writing to client: %s", strerror(errno
));
1968 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1969 if (listLength(c
->reply
) == 0) {
1971 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1975 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
1977 redisClient
*c
= privdata
;
1978 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
1980 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
1981 int offset
, ion
= 0;
1983 REDIS_NOTUSED(mask
);
1986 while (listLength(c
->reply
)) {
1987 offset
= c
->sentlen
;
1991 /* fill-in the iov[] array */
1992 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
1993 o
= listNodeValue(node
);
1994 objlen
= sdslen(o
->ptr
);
1996 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
1999 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2000 break; /* no more iovecs */
2002 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2003 iov
[ion
].iov_len
= objlen
- offset
;
2004 willwrite
+= objlen
- offset
;
2005 offset
= 0; /* just for the first item */
2012 /* write all collected blocks at once */
2013 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2014 if (errno
!= EAGAIN
) {
2015 redisLog(REDIS_VERBOSE
,
2016 "Error writing to client: %s", strerror(errno
));
2023 totwritten
+= nwritten
;
2024 offset
= c
->sentlen
;
2026 /* remove written robjs from c->reply */
2027 while (nwritten
&& listLength(c
->reply
)) {
2028 o
= listNodeValue(listFirst(c
->reply
));
2029 objlen
= sdslen(o
->ptr
);
2031 if(nwritten
>= objlen
- offset
) {
2032 listDelNode(c
->reply
, listFirst(c
->reply
));
2033 nwritten
-= objlen
- offset
;
2037 c
->sentlen
+= nwritten
;
2045 c
->lastinteraction
= time(NULL
);
2047 if (listLength(c
->reply
) == 0) {
2049 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2053 static struct redisCommand
*lookupCommand(char *name
) {
2055 while(cmdTable
[j
].name
!= NULL
) {
2056 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2062 /* resetClient prepare the client to process the next command */
2063 static void resetClient(redisClient
*c
) {
2069 /* Call() is the core of Redis execution of a command */
2070 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2073 dirty
= server
.dirty
;
2075 if (server
.appendonly
&& server
.dirty
-dirty
)
2076 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2077 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2078 replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2079 if (listLength(server
.monitors
))
2080 replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2081 server
.stat_numcommands
++;
2084 /* If this function gets called we already read a whole
2085 * command, argments are in the client argv/argc fields.
2086 * processCommand() execute the command or prepare the
2087 * server for a bulk read from the client.
2089 * If 1 is returned the client is still alive and valid and
2090 * and other operations can be performed by the caller. Otherwise
2091 * if 0 is returned the client was destroied (i.e. after QUIT). */
2092 static int processCommand(redisClient
*c
) {
2093 struct redisCommand
*cmd
;
2095 /* Free some memory if needed (maxmemory setting) */
2096 if (server
.maxmemory
) freeMemoryIfNeeded();
2098 /* Handle the multi bulk command type. This is an alternative protocol
2099 * supported by Redis in order to receive commands that are composed of
2100 * multiple binary-safe "bulk" arguments. The latency of processing is
2101 * a bit higher but this allows things like multi-sets, so if this
2102 * protocol is used only for MSET and similar commands this is a big win. */
2103 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2104 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2105 if (c
->multibulk
<= 0) {
2109 decrRefCount(c
->argv
[c
->argc
-1]);
2113 } else if (c
->multibulk
) {
2114 if (c
->bulklen
== -1) {
2115 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2116 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2120 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2121 decrRefCount(c
->argv
[0]);
2122 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2124 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2129 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2133 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2134 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2138 if (c
->multibulk
== 0) {
2142 /* Here we need to swap the multi-bulk argc/argv with the
2143 * normal argc/argv of the client structure. */
2145 c
->argv
= c
->mbargv
;
2146 c
->mbargv
= auxargv
;
2149 c
->argc
= c
->mbargc
;
2150 c
->mbargc
= auxargc
;
2152 /* We need to set bulklen to something different than -1
2153 * in order for the code below to process the command without
2154 * to try to read the last argument of a bulk command as
2155 * a special argument. */
2157 /* continue below and process the command */
2164 /* -- end of multi bulk commands processing -- */
2166 /* The QUIT command is handled as a special case. Normal command
2167 * procs are unable to close the client connection safely */
2168 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2173 /* Now lookup the command and check ASAP about trivial error conditions
2174 * such wrong arity, bad command name and so forth. */
2175 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2178 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2179 (char*)c
->argv
[0]->ptr
));
2182 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2183 (c
->argc
< -cmd
->arity
)) {
2185 sdscatprintf(sdsempty(),
2186 "-ERR wrong number of arguments for '%s' command\r\n",
2190 } else if (server
.maxmemory
&& cmd
->flags
& REDIS_CMD_DENYOOM
&& zmalloc_used_memory() > server
.maxmemory
) {
2191 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2194 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2195 /* This is a bulk command, we have to read the last argument yet. */
2196 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2198 decrRefCount(c
->argv
[c
->argc
-1]);
2199 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2201 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2206 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2207 /* It is possible that the bulk read is already in the
2208 * buffer. Check this condition and handle it accordingly.
2209 * This is just a fast path, alternative to call processInputBuffer().
2210 * It's a good idea since the code is small and this condition
2211 * happens most of the times. */
2212 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2213 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2215 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2217 /* Otherwise return... there is to read the last argument
2218 * from the socket. */
2222 /* Let's try to share objects on the command arguments vector */
2223 if (server
.shareobjects
) {
2225 for(j
= 1; j
< c
->argc
; j
++)
2226 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2228 /* Let's try to encode the bulk object to save space. */
2229 if (cmd
->flags
& REDIS_CMD_BULK
)
2230 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2232 /* Check if the user is authenticated */
2233 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2234 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2239 /* Exec the command */
2240 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2241 queueMultiCommand(c
,cmd
);
2242 addReply(c
,shared
.queued
);
2244 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2245 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2249 /* Prepare the client for the next command */
2254 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
2259 /* (args*2)+1 is enough room for args, spaces, newlines */
2260 robj
*static_outv
[REDIS_STATIC_ARGS
*2+1];
2262 if (argc
<= REDIS_STATIC_ARGS
) {
2265 outv
= zmalloc(sizeof(robj
*)*(argc
*2+1));
2268 for (j
= 0; j
< argc
; j
++) {
2269 if (j
!= 0) outv
[outc
++] = shared
.space
;
2270 if ((cmd
->flags
& REDIS_CMD_BULK
) && j
== argc
-1) {
2273 lenobj
= createObject(REDIS_STRING
,
2274 sdscatprintf(sdsempty(),"%lu\r\n",
2275 (unsigned long) stringObjectLen(argv
[j
])));
2276 lenobj
->refcount
= 0;
2277 outv
[outc
++] = lenobj
;
2279 outv
[outc
++] = argv
[j
];
2281 outv
[outc
++] = shared
.crlf
;
2283 /* Increment all the refcounts at start and decrement at end in order to
2284 * be sure to free objects if there is no slave in a replication state
2285 * able to be feed with commands */
2286 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2287 listRewind(slaves
,&li
);
2288 while((ln
= listNext(&li
))) {
2289 redisClient
*slave
= ln
->value
;
2291 /* Don't feed slaves that are still waiting for BGSAVE to start */
2292 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2294 /* Feed all the other slaves, MONITORs and so on */
2295 if (slave
->slaveseldb
!= dictid
) {
2299 case 0: selectcmd
= shared
.select0
; break;
2300 case 1: selectcmd
= shared
.select1
; break;
2301 case 2: selectcmd
= shared
.select2
; break;
2302 case 3: selectcmd
= shared
.select3
; break;
2303 case 4: selectcmd
= shared
.select4
; break;
2304 case 5: selectcmd
= shared
.select5
; break;
2305 case 6: selectcmd
= shared
.select6
; break;
2306 case 7: selectcmd
= shared
.select7
; break;
2307 case 8: selectcmd
= shared
.select8
; break;
2308 case 9: selectcmd
= shared
.select9
; break;
2310 selectcmd
= createObject(REDIS_STRING
,
2311 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2312 selectcmd
->refcount
= 0;
2315 addReply(slave
,selectcmd
);
2316 slave
->slaveseldb
= dictid
;
2318 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2320 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2321 if (outv
!= static_outv
) zfree(outv
);
2324 static void processInputBuffer(redisClient
*c
) {
2326 /* Before to process the input buffer, make sure the client is not
2327 * waitig for a blocking operation such as BLPOP. Note that the first
2328 * iteration the client is never blocked, otherwise the processInputBuffer
2329 * would not be called at all, but after the execution of the first commands
2330 * in the input buffer the client may be blocked, and the "goto again"
2331 * will try to reiterate. The following line will make it return asap. */
2332 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2333 if (c
->bulklen
== -1) {
2334 /* Read the first line of the query */
2335 char *p
= strchr(c
->querybuf
,'\n');
2342 query
= c
->querybuf
;
2343 c
->querybuf
= sdsempty();
2344 querylen
= 1+(p
-(query
));
2345 if (sdslen(query
) > querylen
) {
2346 /* leave data after the first line of the query in the buffer */
2347 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2349 *p
= '\0'; /* remove "\n" */
2350 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2351 sdsupdatelen(query
);
2353 /* Now we can split the query in arguments */
2354 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2357 if (c
->argv
) zfree(c
->argv
);
2358 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2360 for (j
= 0; j
< argc
; j
++) {
2361 if (sdslen(argv
[j
])) {
2362 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2370 /* Execute the command. If the client is still valid
2371 * after processCommand() return and there is something
2372 * on the query buffer try to process the next command. */
2373 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2375 /* Nothing to process, argc == 0. Just process the query
2376 * buffer if it's not empty or return to the caller */
2377 if (sdslen(c
->querybuf
)) goto again
;
2380 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2381 redisLog(REDIS_VERBOSE
, "Client protocol error");
2386 /* Bulk read handling. Note that if we are at this point
2387 the client already sent a command terminated with a newline,
2388 we are reading the bulk data that is actually the last
2389 argument of the command. */
2390 int qbl
= sdslen(c
->querybuf
);
2392 if (c
->bulklen
<= qbl
) {
2393 /* Copy everything but the final CRLF as final argument */
2394 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2396 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2397 /* Process the command. If the client is still valid after
2398 * the processing and there is more data in the buffer
2399 * try to parse it. */
2400 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2406 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2407 redisClient
*c
= (redisClient
*) privdata
;
2408 char buf
[REDIS_IOBUF_LEN
];
2411 REDIS_NOTUSED(mask
);
2413 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2415 if (errno
== EAGAIN
) {
2418 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2422 } else if (nread
== 0) {
2423 redisLog(REDIS_VERBOSE
, "Client closed connection");
2428 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2429 c
->lastinteraction
= time(NULL
);
2433 if (!(c
->flags
& REDIS_BLOCKED
))
2434 processInputBuffer(c
);
2437 static int selectDb(redisClient
*c
, int id
) {
2438 if (id
< 0 || id
>= server
.dbnum
)
2440 c
->db
= &server
.db
[id
];
2444 static void *dupClientReplyValue(void *o
) {
2445 incrRefCount((robj
*)o
);
2449 static redisClient
*createClient(int fd
) {
2450 redisClient
*c
= zmalloc(sizeof(*c
));
2452 anetNonBlock(NULL
,fd
);
2453 anetTcpNoDelay(NULL
,fd
);
2454 if (!c
) return NULL
;
2457 c
->querybuf
= sdsempty();
2466 c
->lastinteraction
= time(NULL
);
2467 c
->authenticated
= 0;
2468 c
->replstate
= REDIS_REPL_NONE
;
2469 c
->reply
= listCreate();
2470 listSetFreeMethod(c
->reply
,decrRefCount
);
2471 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2472 c
->blockingkeys
= NULL
;
2473 c
->blockingkeysnum
= 0;
2474 c
->io_keys
= listCreate();
2475 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2476 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2477 readQueryFromClient
, c
) == AE_ERR
) {
2481 listAddNodeTail(server
.clients
,c
);
2482 initClientMultiState(c
);
2486 static void addReply(redisClient
*c
, robj
*obj
) {
2487 if (listLength(c
->reply
) == 0 &&
2488 (c
->replstate
== REDIS_REPL_NONE
||
2489 c
->replstate
== REDIS_REPL_ONLINE
) &&
2490 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2491 sendReplyToClient
, c
) == AE_ERR
) return;
2493 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2494 obj
= dupStringObject(obj
);
2495 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2497 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2500 static void addReplySds(redisClient
*c
, sds s
) {
2501 robj
*o
= createObject(REDIS_STRING
,s
);
2506 static void addReplyDouble(redisClient
*c
, double d
) {
2509 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2510 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2511 (unsigned long) strlen(buf
),buf
));
2514 static void addReplyLong(redisClient
*c
, long l
) {
2519 addReply(c
,shared
.czero
);
2521 } else if (l
== 1) {
2522 addReply(c
,shared
.cone
);
2525 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2526 addReplySds(c
,sdsnewlen(buf
,len
));
2529 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2534 addReply(c
,shared
.czero
);
2536 } else if (ul
== 1) {
2537 addReply(c
,shared
.cone
);
2540 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2541 addReplySds(c
,sdsnewlen(buf
,len
));
2544 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2547 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2548 len
= sdslen(obj
->ptr
);
2550 long n
= (long)obj
->ptr
;
2552 /* Compute how many bytes will take this integer as a radix 10 string */
2558 while((n
= n
/10) != 0) {
2562 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2565 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2566 addReplyBulkLen(c
,obj
);
2568 addReply(c
,shared
.crlf
);
2571 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2572 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2574 addReply(c
,shared
.nullbulk
);
2576 robj
*o
= createStringObject(s
,strlen(s
));
2582 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2587 REDIS_NOTUSED(mask
);
2588 REDIS_NOTUSED(privdata
);
2590 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2591 if (cfd
== AE_ERR
) {
2592 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2595 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2596 if ((c
= createClient(cfd
)) == NULL
) {
2597 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2598 close(cfd
); /* May be already closed, just ingore errors */
2601 /* If maxclient directive is set and this is one client more... close the
2602 * connection. Note that we create the client instead to check before
2603 * for this condition, since now the socket is already set in nonblocking
2604 * mode and we can send an error for free using the Kernel I/O */
2605 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2606 char *err
= "-ERR max number of clients reached\r\n";
2608 /* That's a best effort error message, don't check write errors */
2609 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2610 /* Nothing to do, Just to avoid the warning... */
2615 server
.stat_numconnections
++;
2618 /* ======================= Redis objects implementation ===================== */
2620 static robj
*createObject(int type
, void *ptr
) {
2623 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2624 if (listLength(server
.objfreelist
)) {
2625 listNode
*head
= listFirst(server
.objfreelist
);
2626 o
= listNodeValue(head
);
2627 listDelNode(server
.objfreelist
,head
);
2628 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2630 if (server
.vm_enabled
) {
2631 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2632 o
= zmalloc(sizeof(*o
));
2634 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2638 o
->encoding
= REDIS_ENCODING_RAW
;
2641 if (server
.vm_enabled
) {
2642 /* Note that this code may run in the context of an I/O thread
2643 * and accessing to server.unixtime in theory is an error
2644 * (no locks). But in practice this is safe, and even if we read
2645 * garbage Redis will not fail, as it's just a statistical info */
2646 o
->vm
.atime
= server
.unixtime
;
2647 o
->storage
= REDIS_VM_MEMORY
;
2652 static robj
*createStringObject(char *ptr
, size_t len
) {
2653 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2656 static robj
*dupStringObject(robj
*o
) {
2657 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2658 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2661 static robj
*createListObject(void) {
2662 list
*l
= listCreate();
2664 listSetFreeMethod(l
,decrRefCount
);
2665 return createObject(REDIS_LIST
,l
);
2668 static robj
*createSetObject(void) {
2669 dict
*d
= dictCreate(&setDictType
,NULL
);
2670 return createObject(REDIS_SET
,d
);
2673 static robj
*createHashObject(void) {
2674 /* All the Hashes start as zipmaps. Will be automatically converted
2675 * into hash tables if there are enough elements or big elements
2677 unsigned char *zm
= zipmapNew();
2678 robj
*o
= createObject(REDIS_HASH
,zm
);
2679 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2683 static robj
*createZsetObject(void) {
2684 zset
*zs
= zmalloc(sizeof(*zs
));
2686 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2687 zs
->zsl
= zslCreate();
2688 return createObject(REDIS_ZSET
,zs
);
2691 static void freeStringObject(robj
*o
) {
2692 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2697 static void freeListObject(robj
*o
) {
2698 listRelease((list
*) o
->ptr
);
2701 static void freeSetObject(robj
*o
) {
2702 dictRelease((dict
*) o
->ptr
);
2705 static void freeZsetObject(robj
*o
) {
2708 dictRelease(zs
->dict
);
2713 static void freeHashObject(robj
*o
) {
2714 switch (o
->encoding
) {
2715 case REDIS_ENCODING_HT
:
2716 dictRelease((dict
*) o
->ptr
);
2718 case REDIS_ENCODING_ZIPMAP
:
2727 static void incrRefCount(robj
*o
) {
2728 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2732 static void decrRefCount(void *obj
) {
2735 /* Object is a key of a swapped out value, or in the process of being
2737 if (server
.vm_enabled
&&
2738 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2740 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2741 redisAssert(o
->refcount
== 1);
2743 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2744 redisAssert(o
->type
== REDIS_STRING
);
2745 freeStringObject(o
);
2746 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2747 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2748 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2749 !listAddNodeHead(server
.objfreelist
,o
))
2751 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2752 server
.vm_stats_swapped_objects
--;
2755 /* Object is in memory, or in the process of being swapped out. */
2756 if (--(o
->refcount
) == 0) {
2757 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2758 vmCancelThreadedIOJob(obj
);
2760 case REDIS_STRING
: freeStringObject(o
); break;
2761 case REDIS_LIST
: freeListObject(o
); break;
2762 case REDIS_SET
: freeSetObject(o
); break;
2763 case REDIS_ZSET
: freeZsetObject(o
); break;
2764 case REDIS_HASH
: freeHashObject(o
); break;
2765 default: redisAssert(0); break;
2767 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2768 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2769 !listAddNodeHead(server
.objfreelist
,o
))
2771 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2775 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2776 dictEntry
*de
= dictFind(db
->dict
,key
);
2778 robj
*key
= dictGetEntryKey(de
);
2779 robj
*val
= dictGetEntryVal(de
);
2781 if (server
.vm_enabled
) {
2782 if (key
->storage
== REDIS_VM_MEMORY
||
2783 key
->storage
== REDIS_VM_SWAPPING
)
2785 /* If we were swapping the object out, stop it, this key
2787 if (key
->storage
== REDIS_VM_SWAPPING
)
2788 vmCancelThreadedIOJob(key
);
2789 /* Update the access time of the key for the aging algorithm. */
2790 key
->vm
.atime
= server
.unixtime
;
2792 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2794 /* Our value was swapped on disk. Bring it at home. */
2795 redisAssert(val
== NULL
);
2796 val
= vmLoadObject(key
);
2797 dictGetEntryVal(de
) = val
;
2799 /* Clients blocked by the VM subsystem may be waiting for
2801 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2810 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2811 expireIfNeeded(db
,key
);
2812 return lookupKey(db
,key
);
2815 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2816 deleteIfVolatile(db
,key
);
2817 return lookupKey(db
,key
);
2820 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2821 robj
*o
= lookupKeyRead(c
->db
, key
);
2822 if (!o
) addReply(c
,reply
);
2826 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2827 robj
*o
= lookupKeyWrite(c
->db
, key
);
2828 if (!o
) addReply(c
,reply
);
2832 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2833 if (o
->type
!= type
) {
2834 addReply(c
,shared
.wrongtypeerr
);
2840 static int deleteKey(redisDb
*db
, robj
*key
) {
2843 /* We need to protect key from destruction: after the first dictDelete()
2844 * it may happen that 'key' is no longer valid if we don't increment
2845 * it's count. This may happen when we get the object reference directly
2846 * from the hash table with dictRandomKey() or dict iterators */
2848 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2849 retval
= dictDelete(db
->dict
,key
);
2852 return retval
== DICT_OK
;
2855 /* Try to share an object against the shared objects pool */
2856 static robj
*tryObjectSharing(robj
*o
) {
2857 struct dictEntry
*de
;
2860 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2862 redisAssert(o
->type
== REDIS_STRING
);
2863 de
= dictFind(server
.sharingpool
,o
);
2865 robj
*shared
= dictGetEntryKey(de
);
2867 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2868 dictGetEntryVal(de
) = (void*) c
;
2869 incrRefCount(shared
);
2873 /* Here we are using a stream algorihtm: Every time an object is
2874 * shared we increment its count, everytime there is a miss we
2875 * recrement the counter of a random object. If this object reaches
2876 * zero we remove the object and put the current object instead. */
2877 if (dictSize(server
.sharingpool
) >=
2878 server
.sharingpoolsize
) {
2879 de
= dictGetRandomKey(server
.sharingpool
);
2880 redisAssert(de
!= NULL
);
2881 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2882 dictGetEntryVal(de
) = (void*) c
;
2884 dictDelete(server
.sharingpool
,de
->key
);
2887 c
= 0; /* If the pool is empty we want to add this object */
2892 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2893 redisAssert(retval
== DICT_OK
);
2900 /* Check if the nul-terminated string 's' can be represented by a long
2901 * (that is, is a number that fits into long without any other space or
2902 * character before or after the digits).
2904 * If so, the function returns REDIS_OK and *longval is set to the value
2905 * of the number. Otherwise REDIS_ERR is returned */
2906 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2907 char buf
[32], *endptr
;
2911 value
= strtol(s
, &endptr
, 10);
2912 if (endptr
[0] != '\0') return REDIS_ERR
;
2913 slen
= snprintf(buf
,32,"%ld",value
);
2915 /* If the number converted back into a string is not identical
2916 * then it's not possible to encode the string as integer */
2917 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2918 if (longval
) *longval
= value
;
2922 /* Try to encode a string object in order to save space */
2923 static int tryObjectEncoding(robj
*o
) {
2927 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2928 return REDIS_ERR
; /* Already encoded */
2930 /* It's not save to encode shared objects: shared objects can be shared
2931 * everywhere in the "object space" of Redis. Encoded objects can only
2932 * appear as "values" (and not, for instance, as keys) */
2933 if (o
->refcount
> 1) return REDIS_ERR
;
2935 /* Currently we try to encode only strings */
2936 redisAssert(o
->type
== REDIS_STRING
);
2938 /* Check if we can represent this string as a long integer */
2939 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2941 /* Ok, this object can be encoded */
2942 o
->encoding
= REDIS_ENCODING_INT
;
2944 o
->ptr
= (void*) value
;
2948 /* Get a decoded version of an encoded object (returned as a new object).
2949 * If the object is already raw-encoded just increment the ref count. */
2950 static robj
*getDecodedObject(robj
*o
) {
2953 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2957 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2960 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2961 dec
= createStringObject(buf
,strlen(buf
));
2964 redisAssert(1 != 1);
2968 /* Compare two string objects via strcmp() or alike.
2969 * Note that the objects may be integer-encoded. In such a case we
2970 * use snprintf() to get a string representation of the numbers on the stack
2971 * and compare the strings, it's much faster than calling getDecodedObject().
2973 * Important note: if objects are not integer encoded, but binary-safe strings,
2974 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2976 static int compareStringObjects(robj
*a
, robj
*b
) {
2977 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
2978 char bufa
[128], bufb
[128], *astr
, *bstr
;
2981 if (a
== b
) return 0;
2982 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
2983 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
2989 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
2990 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
2996 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
2999 static size_t stringObjectLen(robj
*o
) {
3000 redisAssert(o
->type
== REDIS_STRING
);
3001 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3002 return sdslen(o
->ptr
);
3006 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3010 /*============================ RDB saving/loading =========================== */
3012 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3013 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3017 static int rdbSaveTime(FILE *fp
, time_t t
) {
3018 int32_t t32
= (int32_t) t
;
3019 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3023 /* check rdbLoadLen() comments for more info */
3024 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3025 unsigned char buf
[2];
3028 /* Save a 6 bit len */
3029 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3030 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3031 } else if (len
< (1<<14)) {
3032 /* Save a 14 bit len */
3033 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3035 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3037 /* Save a 32 bit len */
3038 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3039 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3041 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3046 /* String objects in the form "2391" "-100" without any space and with a
3047 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3048 * encoded as integers to save space */
3049 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3051 char *endptr
, buf
[32];
3053 /* Check if it's possible to encode this value as a number */
3054 value
= strtoll(s
, &endptr
, 10);
3055 if (endptr
[0] != '\0') return 0;
3056 snprintf(buf
,32,"%lld",value
);
3058 /* If the number converted back into a string is not identical
3059 * then it's not possible to encode the string as integer */
3060 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3062 /* Finally check if it fits in our ranges */
3063 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3064 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3065 enc
[1] = value
&0xFF;
3067 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3068 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3069 enc
[1] = value
&0xFF;
3070 enc
[2] = (value
>>8)&0xFF;
3072 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3073 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3074 enc
[1] = value
&0xFF;
3075 enc
[2] = (value
>>8)&0xFF;
3076 enc
[3] = (value
>>16)&0xFF;
3077 enc
[4] = (value
>>24)&0xFF;
3084 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3085 size_t comprlen
, outlen
;
3089 /* We require at least four bytes compression for this to be worth it */
3090 if (len
<= 4) return 0;
3092 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3093 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3094 if (comprlen
== 0) {
3098 /* Data compressed! Let's save it on disk */
3099 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3100 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3101 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3102 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3103 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3112 /* Save a string objet as [len][data] on disk. If the object is a string
3113 * representation of an integer value we try to safe it in a special form */
3114 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3117 /* Try integer encoding */
3119 unsigned char buf
[5];
3120 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3121 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3126 /* Try LZF compression - under 20 bytes it's unable to compress even
3127 * aaaaaaaaaaaaaaaaaa so skip it */
3128 if (server
.rdbcompression
&& len
> 20) {
3131 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3132 if (retval
== -1) return -1;
3133 if (retval
> 0) return 0;
3134 /* retval == 0 means data can't be compressed, save the old way */
3137 /* Store verbatim */
3138 if (rdbSaveLen(fp
,len
) == -1) return -1;
3139 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3143 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3144 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3147 /* Avoid incr/decr ref count business when possible.
3148 * This plays well with copy-on-write given that we are probably
3149 * in a child process (BGSAVE). Also this makes sure key objects
3150 * of swapped objects are not incRefCount-ed (an assert does not allow
3151 * this in order to avoid bugs) */
3152 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3153 obj
= getDecodedObject(obj
);
3154 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3157 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3162 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3163 * 8 bit integer specifing the length of the representation.
3164 * This 8 bit integer has special values in order to specify the following
3170 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3171 unsigned char buf
[128];
3177 } else if (!isfinite(val
)) {
3179 buf
[0] = (val
< 0) ? 255 : 254;
3181 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3182 buf
[0] = strlen((char*)buf
+1);
3185 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3189 /* Save a Redis object. */
3190 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3191 if (o
->type
== REDIS_STRING
) {
3192 /* Save a string value */
3193 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3194 } else if (o
->type
== REDIS_LIST
) {
3195 /* Save a list value */
3196 list
*list
= o
->ptr
;
3200 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3201 listRewind(list
,&li
);
3202 while((ln
= listNext(&li
))) {
3203 robj
*eleobj
= listNodeValue(ln
);
3205 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3207 } else if (o
->type
== REDIS_SET
) {
3208 /* Save a set value */
3210 dictIterator
*di
= dictGetIterator(set
);
3213 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3214 while((de
= dictNext(di
)) != NULL
) {
3215 robj
*eleobj
= dictGetEntryKey(de
);
3217 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3219 dictReleaseIterator(di
);
3220 } else if (o
->type
== REDIS_ZSET
) {
3221 /* Save a set value */
3223 dictIterator
*di
= dictGetIterator(zs
->dict
);
3226 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3227 while((de
= dictNext(di
)) != NULL
) {
3228 robj
*eleobj
= dictGetEntryKey(de
);
3229 double *score
= dictGetEntryVal(de
);
3231 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3232 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3234 dictReleaseIterator(di
);
3235 } else if (o
->type
== REDIS_HASH
) {
3236 /* Save a hash value */
3237 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3238 unsigned char *p
= zipmapRewind(o
->ptr
);
3239 unsigned int count
= zipmapLen(o
->ptr
);
3240 unsigned char *key
, *val
;
3241 unsigned int klen
, vlen
;
3243 if (rdbSaveLen(fp
,count
) == -1) return -1;
3244 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3245 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3246 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3249 dictIterator
*di
= dictGetIterator(o
->ptr
);
3252 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3253 while((de
= dictNext(di
)) != NULL
) {
3254 robj
*key
= dictGetEntryKey(de
);
3255 robj
*val
= dictGetEntryVal(de
);
3257 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3258 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3260 dictReleaseIterator(di
);
3268 /* Return the length the object will have on disk if saved with
3269 * the rdbSaveObject() function. Currently we use a trick to get
3270 * this length with very little changes to the code. In the future
3271 * we could switch to a faster solution. */
3272 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3273 if (fp
== NULL
) fp
= server
.devnull
;
3275 assert(rdbSaveObject(fp
,o
) != 1);
3279 /* Return the number of pages required to save this object in the swap file */
3280 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3281 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3283 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3286 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3287 static int rdbSave(char *filename
) {
3288 dictIterator
*di
= NULL
;
3293 time_t now
= time(NULL
);
3295 /* Wait for I/O therads to terminate, just in case this is a
3296 * foreground-saving, to avoid seeking the swap file descriptor at the
3298 if (server
.vm_enabled
)
3299 waitEmptyIOJobsQueue();
3301 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3302 fp
= fopen(tmpfile
,"w");
3304 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3307 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3308 for (j
= 0; j
< server
.dbnum
; j
++) {
3309 redisDb
*db
= server
.db
+j
;
3311 if (dictSize(d
) == 0) continue;
3312 di
= dictGetIterator(d
);
3318 /* Write the SELECT DB opcode */
3319 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3320 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3322 /* Iterate this DB writing every entry */
3323 while((de
= dictNext(di
)) != NULL
) {
3324 robj
*key
= dictGetEntryKey(de
);
3325 robj
*o
= dictGetEntryVal(de
);
3326 time_t expiretime
= getExpire(db
,key
);
3328 /* Save the expire time */
3329 if (expiretime
!= -1) {
3330 /* If this key is already expired skip it */
3331 if (expiretime
< now
) continue;
3332 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3333 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3335 /* Save the key and associated value. This requires special
3336 * handling if the value is swapped out. */
3337 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3338 key
->storage
== REDIS_VM_SWAPPING
) {
3339 /* Save type, key, value */
3340 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3341 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3342 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3344 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3346 /* Get a preview of the object in memory */
3347 po
= vmPreviewObject(key
);
3348 /* Save type, key, value */
3349 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3350 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3351 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3352 /* Remove the loaded object from memory */
3356 dictReleaseIterator(di
);
3359 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3361 /* Make sure data will not remain on the OS's output buffers */
3366 /* Use RENAME to make sure the DB file is changed atomically only
3367 * if the generate DB file is ok. */
3368 if (rename(tmpfile
,filename
) == -1) {
3369 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3373 redisLog(REDIS_NOTICE
,"DB saved on disk");
3375 server
.lastsave
= time(NULL
);
3381 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3382 if (di
) dictReleaseIterator(di
);
3386 static int rdbSaveBackground(char *filename
) {
3389 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3390 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3391 if ((childpid
= fork()) == 0) {
3393 if (server
.vm_enabled
) vmReopenSwapFile();
3395 if (rdbSave(filename
) == REDIS_OK
) {
3402 if (childpid
== -1) {
3403 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3407 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3408 server
.bgsavechildpid
= childpid
;
3411 return REDIS_OK
; /* unreached */
3414 static void rdbRemoveTempFile(pid_t childpid
) {
3417 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3421 static int rdbLoadType(FILE *fp
) {
3423 if (fread(&type
,1,1,fp
) == 0) return -1;
3427 static time_t rdbLoadTime(FILE *fp
) {
3429 if (fread(&t32
,4,1,fp
) == 0) return -1;
3430 return (time_t) t32
;
3433 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3434 * of this file for a description of how this are stored on disk.
3436 * isencoded is set to 1 if the readed length is not actually a length but
3437 * an "encoding type", check the above comments for more info */
3438 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3439 unsigned char buf
[2];
3443 if (isencoded
) *isencoded
= 0;
3444 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3445 type
= (buf
[0]&0xC0)>>6;
3446 if (type
== REDIS_RDB_6BITLEN
) {
3447 /* Read a 6 bit len */
3449 } else if (type
== REDIS_RDB_ENCVAL
) {
3450 /* Read a 6 bit len encoding type */
3451 if (isencoded
) *isencoded
= 1;
3453 } else if (type
== REDIS_RDB_14BITLEN
) {
3454 /* Read a 14 bit len */
3455 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3456 return ((buf
[0]&0x3F)<<8)|buf
[1];
3458 /* Read a 32 bit len */
3459 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3464 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3465 unsigned char enc
[4];
3468 if (enctype
== REDIS_RDB_ENC_INT8
) {
3469 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3470 val
= (signed char)enc
[0];
3471 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3473 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3474 v
= enc
[0]|(enc
[1]<<8);
3476 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3478 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3479 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3482 val
= 0; /* anti-warning */
3485 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3488 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3489 unsigned int len
, clen
;
3490 unsigned char *c
= NULL
;
3493 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3494 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3495 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3496 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3497 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3498 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3500 return createObject(REDIS_STRING
,val
);
3507 static robj
*rdbLoadStringObject(FILE*fp
) {
3512 len
= rdbLoadLen(fp
,&isencoded
);
3515 case REDIS_RDB_ENC_INT8
:
3516 case REDIS_RDB_ENC_INT16
:
3517 case REDIS_RDB_ENC_INT32
:
3518 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3519 case REDIS_RDB_ENC_LZF
:
3520 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3526 if (len
== REDIS_RDB_LENERR
) return NULL
;
3527 val
= sdsnewlen(NULL
,len
);
3528 if (len
&& fread(val
,len
,1,fp
) == 0) {
3532 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3535 /* For information about double serialization check rdbSaveDoubleValue() */
3536 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3540 if (fread(&len
,1,1,fp
) == 0) return -1;
3542 case 255: *val
= R_NegInf
; return 0;
3543 case 254: *val
= R_PosInf
; return 0;
3544 case 253: *val
= R_Nan
; return 0;
3546 if (fread(buf
,len
,1,fp
) == 0) return -1;
3548 sscanf(buf
, "%lg", val
);
3553 /* Load a Redis object of the specified type from the specified file.
3554 * On success a newly allocated object is returned, otherwise NULL. */
3555 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3558 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3559 if (type
== REDIS_STRING
) {
3560 /* Read string value */
3561 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3562 tryObjectEncoding(o
);
3563 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3564 /* Read list/set value */
3567 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3568 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3569 /* It's faster to expand the dict to the right size asap in order
3570 * to avoid rehashing */
3571 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3572 dictExpand(o
->ptr
,listlen
);
3573 /* Load every single element of the list/set */
3577 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3578 tryObjectEncoding(ele
);
3579 if (type
== REDIS_LIST
) {
3580 listAddNodeTail((list
*)o
->ptr
,ele
);
3582 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3585 } else if (type
== REDIS_ZSET
) {
3586 /* Read list/set value */
3590 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3591 o
= createZsetObject();
3593 /* Load every single element of the list/set */
3596 double *score
= zmalloc(sizeof(double));
3598 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3599 tryObjectEncoding(ele
);
3600 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3601 dictAdd(zs
->dict
,ele
,score
);
3602 zslInsert(zs
->zsl
,*score
,ele
);
3603 incrRefCount(ele
); /* added to skiplist */
3605 } else if (type
== REDIS_HASH
) {
3608 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3609 o
= createHashObject();
3610 /* Too many entries? Use an hash table. */
3611 if (hashlen
> server
.hash_max_zipmap_entries
)
3612 convertToRealHash(o
);
3613 /* Load every key/value, then set it into the zipmap or hash
3614 * table, as needed. */
3618 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3619 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3620 /* If we are using a zipmap and there are too big values
3621 * the object is converted to real hash table encoding. */
3622 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3623 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3624 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3626 convertToRealHash(o
);
3629 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3630 unsigned char *zm
= o
->ptr
;
3632 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3633 val
->ptr
,sdslen(val
->ptr
),NULL
);
3638 tryObjectEncoding(key
);
3639 tryObjectEncoding(val
);
3640 dictAdd((dict
*)o
->ptr
,key
,val
);
3649 static int rdbLoad(char *filename
) {
3651 robj
*keyobj
= NULL
;
3653 int type
, retval
, rdbver
;
3654 dict
*d
= server
.db
[0].dict
;
3655 redisDb
*db
= server
.db
+0;
3657 time_t expiretime
= -1, now
= time(NULL
);
3658 long long loadedkeys
= 0;
3660 fp
= fopen(filename
,"r");
3661 if (!fp
) return REDIS_ERR
;
3662 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3664 if (memcmp(buf
,"REDIS",5) != 0) {
3666 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3669 rdbver
= atoi(buf
+5);
3672 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3679 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3680 if (type
== REDIS_EXPIRETIME
) {
3681 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3682 /* We read the time so we need to read the object type again */
3683 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3685 if (type
== REDIS_EOF
) break;
3686 /* Handle SELECT DB opcode as a special case */
3687 if (type
== REDIS_SELECTDB
) {
3688 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3690 if (dbid
>= (unsigned)server
.dbnum
) {
3691 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3694 db
= server
.db
+dbid
;
3699 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3701 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3702 /* Add the new object in the hash table */
3703 retval
= dictAdd(d
,keyobj
,o
);
3704 if (retval
== DICT_ERR
) {
3705 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3708 /* Set the expire time if needed */
3709 if (expiretime
!= -1) {
3710 setExpire(db
,keyobj
,expiretime
);
3711 /* Delete this key if already expired */
3712 if (expiretime
< now
) deleteKey(db
,keyobj
);
3716 /* Handle swapping while loading big datasets when VM is on */
3718 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3719 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3720 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3727 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3728 if (keyobj
) decrRefCount(keyobj
);
3729 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3731 return REDIS_ERR
; /* Just to avoid warning */
3734 /*================================== Commands =============================== */
3736 static void authCommand(redisClient
*c
) {
3737 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3738 c
->authenticated
= 1;
3739 addReply(c
,shared
.ok
);
3741 c
->authenticated
= 0;
3742 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3746 static void pingCommand(redisClient
*c
) {
3747 addReply(c
,shared
.pong
);
3750 static void echoCommand(redisClient
*c
) {
3751 addReplyBulk(c
,c
->argv
[1]);
3754 /*=================================== Strings =============================== */
3756 static void setGenericCommand(redisClient
*c
, int nx
) {
3759 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3760 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3761 if (retval
== DICT_ERR
) {
3763 /* If the key is about a swapped value, we want a new key object
3764 * to overwrite the old. So we delete the old key in the database.
3765 * This will also make sure that swap pages about the old object
3766 * will be marked as free. */
3767 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3768 incrRefCount(c
->argv
[1]);
3769 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3770 incrRefCount(c
->argv
[2]);
3772 addReply(c
,shared
.czero
);
3776 incrRefCount(c
->argv
[1]);
3777 incrRefCount(c
->argv
[2]);
3780 removeExpire(c
->db
,c
->argv
[1]);
3781 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3784 static void setCommand(redisClient
*c
) {
3785 setGenericCommand(c
,0);
3788 static void setnxCommand(redisClient
*c
) {
3789 setGenericCommand(c
,1);
3792 static int getGenericCommand(redisClient
*c
) {
3795 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3798 if (o
->type
!= REDIS_STRING
) {
3799 addReply(c
,shared
.wrongtypeerr
);
3807 static void getCommand(redisClient
*c
) {
3808 getGenericCommand(c
);
3811 static void getsetCommand(redisClient
*c
) {
3812 if (getGenericCommand(c
) == REDIS_ERR
) return;
3813 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3814 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3816 incrRefCount(c
->argv
[1]);
3818 incrRefCount(c
->argv
[2]);
3820 removeExpire(c
->db
,c
->argv
[1]);
3823 static void mgetCommand(redisClient
*c
) {
3826 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3827 for (j
= 1; j
< c
->argc
; j
++) {
3828 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3830 addReply(c
,shared
.nullbulk
);
3832 if (o
->type
!= REDIS_STRING
) {
3833 addReply(c
,shared
.nullbulk
);
3841 static void msetGenericCommand(redisClient
*c
, int nx
) {
3842 int j
, busykeys
= 0;
3844 if ((c
->argc
% 2) == 0) {
3845 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3848 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3849 * set nothing at all if at least one already key exists. */
3851 for (j
= 1; j
< c
->argc
; j
+= 2) {
3852 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3858 addReply(c
, shared
.czero
);
3862 for (j
= 1; j
< c
->argc
; j
+= 2) {
3865 tryObjectEncoding(c
->argv
[j
+1]);
3866 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3867 if (retval
== DICT_ERR
) {
3868 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3869 incrRefCount(c
->argv
[j
+1]);
3871 incrRefCount(c
->argv
[j
]);
3872 incrRefCount(c
->argv
[j
+1]);
3874 removeExpire(c
->db
,c
->argv
[j
]);
3876 server
.dirty
+= (c
->argc
-1)/2;
3877 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3880 static void msetCommand(redisClient
*c
) {
3881 msetGenericCommand(c
,0);
3884 static void msetnxCommand(redisClient
*c
) {
3885 msetGenericCommand(c
,1);
3888 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3893 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3897 if (o
->type
!= REDIS_STRING
) {
3902 if (o
->encoding
== REDIS_ENCODING_RAW
)
3903 value
= strtoll(o
->ptr
, &eptr
, 10);
3904 else if (o
->encoding
== REDIS_ENCODING_INT
)
3905 value
= (long)o
->ptr
;
3907 redisAssert(1 != 1);
3912 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3913 tryObjectEncoding(o
);
3914 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3915 if (retval
== DICT_ERR
) {
3916 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3917 removeExpire(c
->db
,c
->argv
[1]);
3919 incrRefCount(c
->argv
[1]);
3922 addReply(c
,shared
.colon
);
3924 addReply(c
,shared
.crlf
);
3927 static void incrCommand(redisClient
*c
) {
3928 incrDecrCommand(c
,1);
3931 static void decrCommand(redisClient
*c
) {
3932 incrDecrCommand(c
,-1);
3935 static void incrbyCommand(redisClient
*c
) {
3936 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3937 incrDecrCommand(c
,incr
);
3940 static void decrbyCommand(redisClient
*c
) {
3941 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3942 incrDecrCommand(c
,-incr
);
3945 static void appendCommand(redisClient
*c
) {
3950 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3952 /* Create the key */
3953 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3954 incrRefCount(c
->argv
[1]);
3955 incrRefCount(c
->argv
[2]);
3956 totlen
= stringObjectLen(c
->argv
[2]);
3960 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3963 o
= dictGetEntryVal(de
);
3964 if (o
->type
!= REDIS_STRING
) {
3965 addReply(c
,shared
.wrongtypeerr
);
3968 /* If the object is specially encoded or shared we have to make
3970 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
3971 robj
*decoded
= getDecodedObject(o
);
3973 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
3974 decrRefCount(decoded
);
3975 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3978 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
3979 o
->ptr
= sdscatlen(o
->ptr
,
3980 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
3982 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
3983 (unsigned long) c
->argv
[2]->ptr
);
3985 totlen
= sdslen(o
->ptr
);
3988 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
3991 static void substrCommand(redisClient
*c
) {
3993 long start
= atoi(c
->argv
[2]->ptr
);
3994 long end
= atoi(c
->argv
[3]->ptr
);
3995 size_t rangelen
, strlen
;
3998 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
3999 checkType(c
,o
,REDIS_STRING
)) return;
4001 o
= getDecodedObject(o
);
4002 strlen
= sdslen(o
->ptr
);
4004 /* convert negative indexes */
4005 if (start
< 0) start
= strlen
+start
;
4006 if (end
< 0) end
= strlen
+end
;
4007 if (start
< 0) start
= 0;
4008 if (end
< 0) end
= 0;
4010 /* indexes sanity checks */
4011 if (start
> end
|| (size_t)start
>= strlen
) {
4012 /* Out of range start or start > end result in null reply */
4013 addReply(c
,shared
.nullbulk
);
4017 if ((size_t)end
>= strlen
) end
= strlen
-1;
4018 rangelen
= (end
-start
)+1;
4020 /* Return the result */
4021 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4022 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4023 addReplySds(c
,range
);
4024 addReply(c
,shared
.crlf
);
4028 /* ========================= Type agnostic commands ========================= */
4030 static void delCommand(redisClient
*c
) {
4033 for (j
= 1; j
< c
->argc
; j
++) {
4034 if (deleteKey(c
->db
,c
->argv
[j
])) {
4039 addReplyLong(c
,deleted
);
4042 static void existsCommand(redisClient
*c
) {
4043 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4046 static void selectCommand(redisClient
*c
) {
4047 int id
= atoi(c
->argv
[1]->ptr
);
4049 if (selectDb(c
,id
) == REDIS_ERR
) {
4050 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4052 addReply(c
,shared
.ok
);
4056 static void randomkeyCommand(redisClient
*c
) {
4060 de
= dictGetRandomKey(c
->db
->dict
);
4061 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4064 addReply(c
,shared
.plus
);
4065 addReply(c
,shared
.crlf
);
4067 addReply(c
,shared
.plus
);
4068 addReply(c
,dictGetEntryKey(de
));
4069 addReply(c
,shared
.crlf
);
4073 static void keysCommand(redisClient
*c
) {
4076 sds pattern
= c
->argv
[1]->ptr
;
4077 int plen
= sdslen(pattern
);
4078 unsigned long numkeys
= 0;
4079 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4081 di
= dictGetIterator(c
->db
->dict
);
4083 decrRefCount(lenobj
);
4084 while((de
= dictNext(di
)) != NULL
) {
4085 robj
*keyobj
= dictGetEntryKey(de
);
4087 sds key
= keyobj
->ptr
;
4088 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4089 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4090 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4091 addReplyBulk(c
,keyobj
);
4096 dictReleaseIterator(di
);
4097 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4100 static void dbsizeCommand(redisClient
*c
) {
4102 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4105 static void lastsaveCommand(redisClient
*c
) {
4107 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4110 static void typeCommand(redisClient
*c
) {
4114 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4119 case REDIS_STRING
: type
= "+string"; break;
4120 case REDIS_LIST
: type
= "+list"; break;
4121 case REDIS_SET
: type
= "+set"; break;
4122 case REDIS_ZSET
: type
= "+zset"; break;
4123 case REDIS_HASH
: type
= "+hash"; break;
4124 default: type
= "+unknown"; break;
4127 addReplySds(c
,sdsnew(type
));
4128 addReply(c
,shared
.crlf
);
4131 static void saveCommand(redisClient
*c
) {
4132 if (server
.bgsavechildpid
!= -1) {
4133 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4136 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4137 addReply(c
,shared
.ok
);
4139 addReply(c
,shared
.err
);
4143 static void bgsaveCommand(redisClient
*c
) {
4144 if (server
.bgsavechildpid
!= -1) {
4145 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4148 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4149 char *status
= "+Background saving started\r\n";
4150 addReplySds(c
,sdsnew(status
));
4152 addReply(c
,shared
.err
);
4156 static void shutdownCommand(redisClient
*c
) {
4157 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4158 /* Kill the saving child if there is a background saving in progress.
4159 We want to avoid race conditions, for instance our saving child may
4160 overwrite the synchronous saving did by SHUTDOWN. */
4161 if (server
.bgsavechildpid
!= -1) {
4162 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4163 kill(server
.bgsavechildpid
,SIGKILL
);
4164 rdbRemoveTempFile(server
.bgsavechildpid
);
4166 if (server
.appendonly
) {
4167 /* Append only file: fsync() the AOF and exit */
4168 fsync(server
.appendfd
);
4169 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4172 /* Snapshotting. Perform a SYNC SAVE and exit */
4173 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4174 if (server
.daemonize
)
4175 unlink(server
.pidfile
);
4176 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4177 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4178 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4181 /* Ooops.. error saving! The best we can do is to continue
4182 * operating. Note that if there was a background saving process,
4183 * in the next cron() Redis will be notified that the background
4184 * saving aborted, handling special stuff like slaves pending for
4185 * synchronization... */
4186 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4188 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4193 static void renameGenericCommand(redisClient
*c
, int nx
) {
4196 /* To use the same key as src and dst is probably an error */
4197 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4198 addReply(c
,shared
.sameobjecterr
);
4202 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4206 deleteIfVolatile(c
->db
,c
->argv
[2]);
4207 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4210 addReply(c
,shared
.czero
);
4213 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4215 incrRefCount(c
->argv
[2]);
4217 deleteKey(c
->db
,c
->argv
[1]);
4219 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4222 static void renameCommand(redisClient
*c
) {
4223 renameGenericCommand(c
,0);
4226 static void renamenxCommand(redisClient
*c
) {
4227 renameGenericCommand(c
,1);
4230 static void moveCommand(redisClient
*c
) {
4235 /* Obtain source and target DB pointers */
4238 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4239 addReply(c
,shared
.outofrangeerr
);
4243 selectDb(c
,srcid
); /* Back to the source DB */
4245 /* If the user is moving using as target the same
4246 * DB as the source DB it is probably an error. */
4248 addReply(c
,shared
.sameobjecterr
);
4252 /* Check if the element exists and get a reference */
4253 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4255 addReply(c
,shared
.czero
);
4259 /* Try to add the element to the target DB */
4260 deleteIfVolatile(dst
,c
->argv
[1]);
4261 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4262 addReply(c
,shared
.czero
);
4265 incrRefCount(c
->argv
[1]);
4268 /* OK! key moved, free the entry in the source DB */
4269 deleteKey(src
,c
->argv
[1]);
4271 addReply(c
,shared
.cone
);
4274 /* =================================== Lists ================================ */
4275 static void pushGenericCommand(redisClient
*c
, int where
) {
4279 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4281 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4282 addReply(c
,shared
.cone
);
4285 lobj
= createListObject();
4287 if (where
== REDIS_HEAD
) {
4288 listAddNodeHead(list
,c
->argv
[2]);
4290 listAddNodeTail(list
,c
->argv
[2]);
4292 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4293 incrRefCount(c
->argv
[1]);
4294 incrRefCount(c
->argv
[2]);
4296 if (lobj
->type
!= REDIS_LIST
) {
4297 addReply(c
,shared
.wrongtypeerr
);
4300 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4301 addReply(c
,shared
.cone
);
4305 if (where
== REDIS_HEAD
) {
4306 listAddNodeHead(list
,c
->argv
[2]);
4308 listAddNodeTail(list
,c
->argv
[2]);
4310 incrRefCount(c
->argv
[2]);
4313 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4316 static void lpushCommand(redisClient
*c
) {
4317 pushGenericCommand(c
,REDIS_HEAD
);
4320 static void rpushCommand(redisClient
*c
) {
4321 pushGenericCommand(c
,REDIS_TAIL
);
4324 static void llenCommand(redisClient
*c
) {
4328 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4329 checkType(c
,o
,REDIS_LIST
)) return;
4332 addReplyUlong(c
,listLength(l
));
4335 static void lindexCommand(redisClient
*c
) {
4337 int index
= atoi(c
->argv
[2]->ptr
);
4341 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4342 checkType(c
,o
,REDIS_LIST
)) return;
4345 ln
= listIndex(list
, index
);
4347 addReply(c
,shared
.nullbulk
);
4349 robj
*ele
= listNodeValue(ln
);
4350 addReplyBulk(c
,ele
);
4354 static void lsetCommand(redisClient
*c
) {
4356 int index
= atoi(c
->argv
[2]->ptr
);
4360 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4361 checkType(c
,o
,REDIS_LIST
)) return;
4364 ln
= listIndex(list
, index
);
4366 addReply(c
,shared
.outofrangeerr
);
4368 robj
*ele
= listNodeValue(ln
);
4371 listNodeValue(ln
) = c
->argv
[3];
4372 incrRefCount(c
->argv
[3]);
4373 addReply(c
,shared
.ok
);
4378 static void popGenericCommand(redisClient
*c
, int where
) {
4383 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4384 checkType(c
,o
,REDIS_LIST
)) return;
4387 if (where
== REDIS_HEAD
)
4388 ln
= listFirst(list
);
4390 ln
= listLast(list
);
4393 addReply(c
,shared
.nullbulk
);
4395 robj
*ele
= listNodeValue(ln
);
4396 addReplyBulk(c
,ele
);
4397 listDelNode(list
,ln
);
4398 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4403 static void lpopCommand(redisClient
*c
) {
4404 popGenericCommand(c
,REDIS_HEAD
);
4407 static void rpopCommand(redisClient
*c
) {
4408 popGenericCommand(c
,REDIS_TAIL
);
4411 static void lrangeCommand(redisClient
*c
) {
4413 int start
= atoi(c
->argv
[2]->ptr
);
4414 int end
= atoi(c
->argv
[3]->ptr
);
4421 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4422 checkType(c
,o
,REDIS_LIST
)) return;
4424 llen
= listLength(list
);
4426 /* convert negative indexes */
4427 if (start
< 0) start
= llen
+start
;
4428 if (end
< 0) end
= llen
+end
;
4429 if (start
< 0) start
= 0;
4430 if (end
< 0) end
= 0;
4432 /* indexes sanity checks */
4433 if (start
> end
|| start
>= llen
) {
4434 /* Out of range start or start > end result in empty list */
4435 addReply(c
,shared
.emptymultibulk
);
4438 if (end
>= llen
) end
= llen
-1;
4439 rangelen
= (end
-start
)+1;
4441 /* Return the result in form of a multi-bulk reply */
4442 ln
= listIndex(list
, start
);
4443 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4444 for (j
= 0; j
< rangelen
; j
++) {
4445 ele
= listNodeValue(ln
);
4446 addReplyBulk(c
,ele
);
4451 static void ltrimCommand(redisClient
*c
) {
4453 int start
= atoi(c
->argv
[2]->ptr
);
4454 int end
= atoi(c
->argv
[3]->ptr
);
4456 int j
, ltrim
, rtrim
;
4460 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4461 checkType(c
,o
,REDIS_LIST
)) return;
4463 llen
= listLength(list
);
4465 /* convert negative indexes */
4466 if (start
< 0) start
= llen
+start
;
4467 if (end
< 0) end
= llen
+end
;
4468 if (start
< 0) start
= 0;
4469 if (end
< 0) end
= 0;
4471 /* indexes sanity checks */
4472 if (start
> end
|| start
>= llen
) {
4473 /* Out of range start or start > end result in empty list */
4477 if (end
>= llen
) end
= llen
-1;
4482 /* Remove list elements to perform the trim */
4483 for (j
= 0; j
< ltrim
; j
++) {
4484 ln
= listFirst(list
);
4485 listDelNode(list
,ln
);
4487 for (j
= 0; j
< rtrim
; j
++) {
4488 ln
= listLast(list
);
4489 listDelNode(list
,ln
);
4491 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4493 addReply(c
,shared
.ok
);
4496 static void lremCommand(redisClient
*c
) {
4499 listNode
*ln
, *next
;
4500 int toremove
= atoi(c
->argv
[2]->ptr
);
4504 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4505 checkType(c
,o
,REDIS_LIST
)) return;
4509 toremove
= -toremove
;
4512 ln
= fromtail
? list
->tail
: list
->head
;
4514 robj
*ele
= listNodeValue(ln
);
4516 next
= fromtail
? ln
->prev
: ln
->next
;
4517 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4518 listDelNode(list
,ln
);
4521 if (toremove
&& removed
== toremove
) break;
4525 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4526 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4529 /* This is the semantic of this command:
4530 * RPOPLPUSH srclist dstlist:
4531 * IF LLEN(srclist) > 0
4532 * element = RPOP srclist
4533 * LPUSH dstlist element
4540 * The idea is to be able to get an element from a list in a reliable way
4541 * since the element is not just returned but pushed against another list
4542 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4544 static void rpoplpushcommand(redisClient
*c
) {
4549 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4550 checkType(c
,sobj
,REDIS_LIST
)) return;
4551 srclist
= sobj
->ptr
;
4552 ln
= listLast(srclist
);
4555 addReply(c
,shared
.nullbulk
);
4557 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4558 robj
*ele
= listNodeValue(ln
);
4561 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4562 addReply(c
,shared
.wrongtypeerr
);
4566 /* Add the element to the target list (unless it's directly
4567 * passed to some BLPOP-ing client */
4568 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4570 /* Create the list if the key does not exist */
4571 dobj
= createListObject();
4572 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4573 incrRefCount(c
->argv
[2]);
4575 dstlist
= dobj
->ptr
;
4576 listAddNodeHead(dstlist
,ele
);
4580 /* Send the element to the client as reply as well */
4581 addReplyBulk(c
,ele
);
4583 /* Finally remove the element from the source list */
4584 listDelNode(srclist
,ln
);
4585 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4590 /* ==================================== Sets ================================ */
4592 static void saddCommand(redisClient
*c
) {
4595 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4597 set
= createSetObject();
4598 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4599 incrRefCount(c
->argv
[1]);
4601 if (set
->type
!= REDIS_SET
) {
4602 addReply(c
,shared
.wrongtypeerr
);
4606 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4607 incrRefCount(c
->argv
[2]);
4609 addReply(c
,shared
.cone
);
4611 addReply(c
,shared
.czero
);
4615 static void sremCommand(redisClient
*c
) {
4618 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4619 checkType(c
,set
,REDIS_SET
)) return;
4621 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4623 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4624 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4625 addReply(c
,shared
.cone
);
4627 addReply(c
,shared
.czero
);
4631 static void smoveCommand(redisClient
*c
) {
4632 robj
*srcset
, *dstset
;
4634 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4635 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4637 /* If the source key does not exist return 0, if it's of the wrong type
4639 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4640 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4643 /* Error if the destination key is not a set as well */
4644 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4645 addReply(c
,shared
.wrongtypeerr
);
4648 /* Remove the element from the source set */
4649 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4650 /* Key not found in the src set! return zero */
4651 addReply(c
,shared
.czero
);
4654 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4655 deleteKey(c
->db
,c
->argv
[1]);
4657 /* Add the element to the destination set */
4659 dstset
= createSetObject();
4660 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4661 incrRefCount(c
->argv
[2]);
4663 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4664 incrRefCount(c
->argv
[3]);
4665 addReply(c
,shared
.cone
);
4668 static void sismemberCommand(redisClient
*c
) {
4671 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4672 checkType(c
,set
,REDIS_SET
)) return;
4674 if (dictFind(set
->ptr
,c
->argv
[2]))
4675 addReply(c
,shared
.cone
);
4677 addReply(c
,shared
.czero
);
4680 static void scardCommand(redisClient
*c
) {
4684 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4685 checkType(c
,o
,REDIS_SET
)) return;
4688 addReplyUlong(c
,dictSize(s
));
4691 static void spopCommand(redisClient
*c
) {
4695 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4696 checkType(c
,set
,REDIS_SET
)) return;
4698 de
= dictGetRandomKey(set
->ptr
);
4700 addReply(c
,shared
.nullbulk
);
4702 robj
*ele
= dictGetEntryKey(de
);
4704 addReplyBulk(c
,ele
);
4705 dictDelete(set
->ptr
,ele
);
4706 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4707 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4712 static void srandmemberCommand(redisClient
*c
) {
4716 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4717 checkType(c
,set
,REDIS_SET
)) return;
4719 de
= dictGetRandomKey(set
->ptr
);
4721 addReply(c
,shared
.nullbulk
);
4723 robj
*ele
= dictGetEntryKey(de
);
4725 addReplyBulk(c
,ele
);
4729 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4730 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4732 return dictSize(*d1
)-dictSize(*d2
);
4735 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4736 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4739 robj
*lenobj
= NULL
, *dstset
= NULL
;
4740 unsigned long j
, cardinality
= 0;
4742 for (j
= 0; j
< setsnum
; j
++) {
4746 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4747 lookupKeyRead(c
->db
,setskeys
[j
]);
4751 if (deleteKey(c
->db
,dstkey
))
4753 addReply(c
,shared
.czero
);
4755 addReply(c
,shared
.nullmultibulk
);
4759 if (setobj
->type
!= REDIS_SET
) {
4761 addReply(c
,shared
.wrongtypeerr
);
4764 dv
[j
] = setobj
->ptr
;
4766 /* Sort sets from the smallest to largest, this will improve our
4767 * algorithm's performace */
4768 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4770 /* The first thing we should output is the total number of elements...
4771 * since this is a multi-bulk write, but at this stage we don't know
4772 * the intersection set size, so we use a trick, append an empty object
4773 * to the output list and save the pointer to later modify it with the
4776 lenobj
= createObject(REDIS_STRING
,NULL
);
4778 decrRefCount(lenobj
);
4780 /* If we have a target key where to store the resulting set
4781 * create this key with an empty set inside */
4782 dstset
= createSetObject();
4785 /* Iterate all the elements of the first (smallest) set, and test
4786 * the element against all the other sets, if at least one set does
4787 * not include the element it is discarded */
4788 di
= dictGetIterator(dv
[0]);
4790 while((de
= dictNext(di
)) != NULL
) {
4793 for (j
= 1; j
< setsnum
; j
++)
4794 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4796 continue; /* at least one set does not contain the member */
4797 ele
= dictGetEntryKey(de
);
4799 addReplyBulk(c
,ele
);
4802 dictAdd(dstset
->ptr
,ele
,NULL
);
4806 dictReleaseIterator(di
);
4809 /* Store the resulting set into the target, if the intersection
4810 * is not an empty set. */
4811 deleteKey(c
->db
,dstkey
);
4812 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4813 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4814 incrRefCount(dstkey
);
4815 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4817 decrRefCount(dstset
);
4818 addReply(c
,shared
.czero
);
4822 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4827 static void sinterCommand(redisClient
*c
) {
4828 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4831 static void sinterstoreCommand(redisClient
*c
) {
4832 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4835 #define REDIS_OP_UNION 0
4836 #define REDIS_OP_DIFF 1
4837 #define REDIS_OP_INTER 2
4839 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4840 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4843 robj
*dstset
= NULL
;
4844 int j
, cardinality
= 0;
4846 for (j
= 0; j
< setsnum
; j
++) {
4850 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4851 lookupKeyRead(c
->db
,setskeys
[j
]);
4856 if (setobj
->type
!= REDIS_SET
) {
4858 addReply(c
,shared
.wrongtypeerr
);
4861 dv
[j
] = setobj
->ptr
;
4864 /* We need a temp set object to store our union. If the dstkey
4865 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4866 * this set object will be the resulting object to set into the target key*/
4867 dstset
= createSetObject();
4869 /* Iterate all the elements of all the sets, add every element a single
4870 * time to the result set */
4871 for (j
= 0; j
< setsnum
; j
++) {
4872 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4873 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4875 di
= dictGetIterator(dv
[j
]);
4877 while((de
= dictNext(di
)) != NULL
) {
4880 /* dictAdd will not add the same element multiple times */
4881 ele
= dictGetEntryKey(de
);
4882 if (op
== REDIS_OP_UNION
|| j
== 0) {
4883 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4887 } else if (op
== REDIS_OP_DIFF
) {
4888 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4893 dictReleaseIterator(di
);
4895 /* result set is empty? Exit asap. */
4896 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4899 /* Output the content of the resulting set, if not in STORE mode */
4901 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4902 di
= dictGetIterator(dstset
->ptr
);
4903 while((de
= dictNext(di
)) != NULL
) {
4906 ele
= dictGetEntryKey(de
);
4907 addReplyBulk(c
,ele
);
4909 dictReleaseIterator(di
);
4910 decrRefCount(dstset
);
4912 /* If we have a target key where to store the resulting set
4913 * create this key with the result set inside */
4914 deleteKey(c
->db
,dstkey
);
4915 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4916 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4917 incrRefCount(dstkey
);
4918 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4920 decrRefCount(dstset
);
4921 addReply(c
,shared
.czero
);
4928 static void sunionCommand(redisClient
*c
) {
4929 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4932 static void sunionstoreCommand(redisClient
*c
) {
4933 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4936 static void sdiffCommand(redisClient
*c
) {
4937 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4940 static void sdiffstoreCommand(redisClient
*c
) {
4941 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4944 /* ==================================== ZSets =============================== */
4946 /* ZSETs are ordered sets using two data structures to hold the same elements
4947 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4950 * The elements are added to an hash table mapping Redis objects to scores.
4951 * At the same time the elements are added to a skip list mapping scores
4952 * to Redis objects (so objects are sorted by scores in this "view"). */
4954 /* This skiplist implementation is almost a C translation of the original
4955 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4956 * Alternative to Balanced Trees", modified in three ways:
4957 * a) this implementation allows for repeated values.
4958 * b) the comparison is not just by key (our 'score') but by satellite data.
4959 * c) there is a back pointer, so it's a doubly linked list with the back
4960 * pointers being only at "level 1". This allows to traverse the list
4961 * from tail to head, useful for ZREVRANGE. */
4963 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4964 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4966 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4968 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
4974 static zskiplist
*zslCreate(void) {
4978 zsl
= zmalloc(sizeof(*zsl
));
4981 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
4982 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
4983 zsl
->header
->forward
[j
] = NULL
;
4985 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
4986 if (j
< ZSKIPLIST_MAXLEVEL
-1)
4987 zsl
->header
->span
[j
] = 0;
4989 zsl
->header
->backward
= NULL
;
4994 static void zslFreeNode(zskiplistNode
*node
) {
4995 decrRefCount(node
->obj
);
4996 zfree(node
->forward
);
5001 static void zslFree(zskiplist
*zsl
) {
5002 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5004 zfree(zsl
->header
->forward
);
5005 zfree(zsl
->header
->span
);
5008 next
= node
->forward
[0];
5015 static int zslRandomLevel(void) {
5017 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5022 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5023 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5024 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5028 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5029 /* store rank that is crossed to reach the insert position */
5030 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5032 while (x
->forward
[i
] &&
5033 (x
->forward
[i
]->score
< score
||
5034 (x
->forward
[i
]->score
== score
&&
5035 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5036 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5041 /* we assume the key is not already inside, since we allow duplicated
5042 * scores, and the re-insertion of score and redis object should never
5043 * happpen since the caller of zslInsert() should test in the hash table
5044 * if the element is already inside or not. */
5045 level
= zslRandomLevel();
5046 if (level
> zsl
->level
) {
5047 for (i
= zsl
->level
; i
< level
; i
++) {
5049 update
[i
] = zsl
->header
;
5050 update
[i
]->span
[i
-1] = zsl
->length
;
5054 x
= zslCreateNode(level
,score
,obj
);
5055 for (i
= 0; i
< level
; i
++) {
5056 x
->forward
[i
] = update
[i
]->forward
[i
];
5057 update
[i
]->forward
[i
] = x
;
5059 /* update span covered by update[i] as x is inserted here */
5061 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5062 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5066 /* increment span for untouched levels */
5067 for (i
= level
; i
< zsl
->level
; i
++) {
5068 update
[i
]->span
[i
-1]++;
5071 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5073 x
->forward
[0]->backward
= x
;
5079 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5080 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5082 for (i
= 0; i
< zsl
->level
; i
++) {
5083 if (update
[i
]->forward
[i
] == x
) {
5085 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5087 update
[i
]->forward
[i
] = x
->forward
[i
];
5089 /* invariant: i > 0, because update[0]->forward[0]
5090 * is always equal to x */
5091 update
[i
]->span
[i
-1] -= 1;
5094 if (x
->forward
[0]) {
5095 x
->forward
[0]->backward
= x
->backward
;
5097 zsl
->tail
= x
->backward
;
5099 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5104 /* Delete an element with matching score/object from the skiplist. */
5105 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5106 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5110 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5111 while (x
->forward
[i
] &&
5112 (x
->forward
[i
]->score
< score
||
5113 (x
->forward
[i
]->score
== score
&&
5114 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5118 /* We may have multiple elements with the same score, what we need
5119 * is to find the element with both the right score and object. */
5121 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5122 zslDeleteNode(zsl
, x
, update
);
5126 return 0; /* not found */
5128 return 0; /* not found */
5131 /* Delete all the elements with score between min and max from the skiplist.
5132 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5133 * Note that this function takes the reference to the hash table view of the
5134 * sorted set, in order to remove the elements from the hash table too. */
5135 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5136 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5137 unsigned long removed
= 0;
5141 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5142 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5146 /* We may have multiple elements with the same score, what we need
5147 * is to find the element with both the right score and object. */
5149 while (x
&& x
->score
<= max
) {
5150 zskiplistNode
*next
= x
->forward
[0];
5151 zslDeleteNode(zsl
, x
, update
);
5152 dictDelete(dict
,x
->obj
);
5157 return removed
; /* not found */
5160 /* Delete all the elements with rank between start and end from the skiplist.
5161 * Start and end are inclusive. Note that start and end need to be 1-based */
5162 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5163 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5164 unsigned long traversed
= 0, removed
= 0;
5168 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5169 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5170 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5178 while (x
&& traversed
<= end
) {
5179 zskiplistNode
*next
= x
->forward
[0];
5180 zslDeleteNode(zsl
, x
, update
);
5181 dictDelete(dict
,x
->obj
);
5190 /* Find the first node having a score equal or greater than the specified one.
5191 * Returns NULL if there is no match. */
5192 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5197 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5198 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5201 /* We may have multiple elements with the same score, what we need
5202 * is to find the element with both the right score and object. */
5203 return x
->forward
[0];
5206 /* Find the rank for an element by both score and key.
5207 * Returns 0 when the element cannot be found, rank otherwise.
5208 * Note that the rank is 1-based due to the span of zsl->header to the
5210 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5212 unsigned long rank
= 0;
5216 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5217 while (x
->forward
[i
] &&
5218 (x
->forward
[i
]->score
< score
||
5219 (x
->forward
[i
]->score
== score
&&
5220 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5221 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5225 /* x might be equal to zsl->header, so test if obj is non-NULL */
5226 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5233 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5234 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5236 unsigned long traversed
= 0;
5240 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5241 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5243 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5246 if (traversed
== rank
) {
5253 /* The actual Z-commands implementations */
5255 /* This generic command implements both ZADD and ZINCRBY.
5256 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5257 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5258 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5263 zsetobj
= lookupKeyWrite(c
->db
,key
);
5264 if (zsetobj
== NULL
) {
5265 zsetobj
= createZsetObject();
5266 dictAdd(c
->db
->dict
,key
,zsetobj
);
5269 if (zsetobj
->type
!= REDIS_ZSET
) {
5270 addReply(c
,shared
.wrongtypeerr
);
5276 /* Ok now since we implement both ZADD and ZINCRBY here the code
5277 * needs to handle the two different conditions. It's all about setting
5278 * '*score', that is, the new score to set, to the right value. */
5279 score
= zmalloc(sizeof(double));
5283 /* Read the old score. If the element was not present starts from 0 */
5284 de
= dictFind(zs
->dict
,ele
);
5286 double *oldscore
= dictGetEntryVal(de
);
5287 *score
= *oldscore
+ scoreval
;
5295 /* What follows is a simple remove and re-insert operation that is common
5296 * to both ZADD and ZINCRBY... */
5297 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5298 /* case 1: New element */
5299 incrRefCount(ele
); /* added to hash */
5300 zslInsert(zs
->zsl
,*score
,ele
);
5301 incrRefCount(ele
); /* added to skiplist */
5304 addReplyDouble(c
,*score
);
5306 addReply(c
,shared
.cone
);
5311 /* case 2: Score update operation */
5312 de
= dictFind(zs
->dict
,ele
);
5313 redisAssert(de
!= NULL
);
5314 oldscore
= dictGetEntryVal(de
);
5315 if (*score
!= *oldscore
) {
5318 /* Remove and insert the element in the skip list with new score */
5319 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5320 redisAssert(deleted
!= 0);
5321 zslInsert(zs
->zsl
,*score
,ele
);
5323 /* Update the score in the hash table */
5324 dictReplace(zs
->dict
,ele
,score
);
5330 addReplyDouble(c
,*score
);
5332 addReply(c
,shared
.czero
);
5336 static void zaddCommand(redisClient
*c
) {
5339 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5340 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5343 static void zincrbyCommand(redisClient
*c
) {
5346 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5347 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5350 static void zremCommand(redisClient
*c
) {
5357 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5358 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5361 de
= dictFind(zs
->dict
,c
->argv
[2]);
5363 addReply(c
,shared
.czero
);
5366 /* Delete from the skiplist */
5367 oldscore
= dictGetEntryVal(de
);
5368 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5369 redisAssert(deleted
!= 0);
5371 /* Delete from the hash table */
5372 dictDelete(zs
->dict
,c
->argv
[2]);
5373 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5374 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5376 addReply(c
,shared
.cone
);
5379 static void zremrangebyscoreCommand(redisClient
*c
) {
5380 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5381 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5386 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5387 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5390 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5391 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5392 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5393 server
.dirty
+= deleted
;
5394 addReplyLong(c
,deleted
);
5397 static void zremrangebyrankCommand(redisClient
*c
) {
5398 int start
= atoi(c
->argv
[2]->ptr
);
5399 int end
= atoi(c
->argv
[3]->ptr
);
5405 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5406 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5408 llen
= zs
->zsl
->length
;
5410 /* convert negative indexes */
5411 if (start
< 0) start
= llen
+start
;
5412 if (end
< 0) end
= llen
+end
;
5413 if (start
< 0) start
= 0;
5414 if (end
< 0) end
= 0;
5416 /* indexes sanity checks */
5417 if (start
> end
|| start
>= llen
) {
5418 addReply(c
,shared
.czero
);
5421 if (end
>= llen
) end
= llen
-1;
5423 /* increment start and end because zsl*Rank functions
5424 * use 1-based rank */
5425 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5426 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5427 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5428 server
.dirty
+= deleted
;
5429 addReplyLong(c
, deleted
);
5437 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5438 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5439 unsigned long size1
, size2
;
5440 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5441 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5442 return size1
- size2
;
5445 #define REDIS_AGGR_SUM 1
5446 #define REDIS_AGGR_MIN 2
5447 #define REDIS_AGGR_MAX 3
5449 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5450 if (aggregate
== REDIS_AGGR_SUM
) {
5451 *target
= *target
+ val
;
5452 } else if (aggregate
== REDIS_AGGR_MIN
) {
5453 *target
= val
< *target
? val
: *target
;
5454 } else if (aggregate
== REDIS_AGGR_MAX
) {
5455 *target
= val
> *target
? val
: *target
;
5458 redisAssert(0 != 0);
5462 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5464 int aggregate
= REDIS_AGGR_SUM
;
5471 /* expect zsetnum input keys to be given */
5472 zsetnum
= atoi(c
->argv
[2]->ptr
);
5474 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5478 /* test if the expected number of keys would overflow */
5479 if (3+zsetnum
> c
->argc
) {
5480 addReply(c
,shared
.syntaxerr
);
5484 /* read keys to be used for input */
5485 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5486 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5487 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5491 if (zsetobj
->type
!= REDIS_ZSET
) {
5493 addReply(c
,shared
.wrongtypeerr
);
5496 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5499 /* default all weights to 1 */
5500 src
[i
].weight
= 1.0;
5503 /* parse optional extra arguments */
5505 int remaining
= c
->argc
- j
;
5508 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5510 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5511 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5513 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5515 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5516 aggregate
= REDIS_AGGR_SUM
;
5517 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5518 aggregate
= REDIS_AGGR_MIN
;
5519 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5520 aggregate
= REDIS_AGGR_MAX
;
5523 addReply(c
,shared
.syntaxerr
);
5529 addReply(c
,shared
.syntaxerr
);
5535 /* sort sets from the smallest to largest, this will improve our
5536 * algorithm's performance */
5537 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5539 dstobj
= createZsetObject();
5540 dstzset
= dstobj
->ptr
;
5542 if (op
== REDIS_OP_INTER
) {
5543 /* skip going over all entries if the smallest zset is NULL or empty */
5544 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5545 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5546 * from small to large, all src[i > 0].dict are non-empty too */
5547 di
= dictGetIterator(src
[0].dict
);
5548 while((de
= dictNext(di
)) != NULL
) {
5549 double *score
= zmalloc(sizeof(double)), value
;
5550 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5552 for (j
= 1; j
< zsetnum
; j
++) {
5553 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5555 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5556 zunionInterAggregate(score
, value
, aggregate
);
5562 /* skip entry when not present in every source dict */
5566 robj
*o
= dictGetEntryKey(de
);
5567 dictAdd(dstzset
->dict
,o
,score
);
5568 incrRefCount(o
); /* added to dictionary */
5569 zslInsert(dstzset
->zsl
,*score
,o
);
5570 incrRefCount(o
); /* added to skiplist */
5573 dictReleaseIterator(di
);
5575 } else if (op
== REDIS_OP_UNION
) {
5576 for (i
= 0; i
< zsetnum
; i
++) {
5577 if (!src
[i
].dict
) continue;
5579 di
= dictGetIterator(src
[i
].dict
);
5580 while((de
= dictNext(di
)) != NULL
) {
5581 /* skip key when already processed */
5582 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5584 double *score
= zmalloc(sizeof(double)), value
;
5585 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5587 /* because the zsets are sorted by size, its only possible
5588 * for sets at larger indices to hold this entry */
5589 for (j
= (i
+1); j
< zsetnum
; j
++) {
5590 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5592 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5593 zunionInterAggregate(score
, value
, aggregate
);
5597 robj
*o
= dictGetEntryKey(de
);
5598 dictAdd(dstzset
->dict
,o
,score
);
5599 incrRefCount(o
); /* added to dictionary */
5600 zslInsert(dstzset
->zsl
,*score
,o
);
5601 incrRefCount(o
); /* added to skiplist */
5603 dictReleaseIterator(di
);
5606 /* unknown operator */
5607 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5610 deleteKey(c
->db
,dstkey
);
5611 if (dstzset
->zsl
->length
) {
5612 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5613 incrRefCount(dstkey
);
5614 addReplyLong(c
, dstzset
->zsl
->length
);
5617 decrRefCount(dstzset
);
5618 addReply(c
, shared
.czero
);
5623 static void zunionCommand(redisClient
*c
) {
5624 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5627 static void zinterCommand(redisClient
*c
) {
5628 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5631 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5633 int start
= atoi(c
->argv
[2]->ptr
);
5634 int end
= atoi(c
->argv
[3]->ptr
);
5643 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5645 } else if (c
->argc
>= 5) {
5646 addReply(c
,shared
.syntaxerr
);
5650 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5651 checkType(c
,o
,REDIS_ZSET
)) return;
5656 /* convert negative indexes */
5657 if (start
< 0) start
= llen
+start
;
5658 if (end
< 0) end
= llen
+end
;
5659 if (start
< 0) start
= 0;
5660 if (end
< 0) end
= 0;
5662 /* indexes sanity checks */
5663 if (start
> end
|| start
>= llen
) {
5664 /* Out of range start or start > end result in empty list */
5665 addReply(c
,shared
.emptymultibulk
);
5668 if (end
>= llen
) end
= llen
-1;
5669 rangelen
= (end
-start
)+1;
5671 /* check if starting point is trivial, before searching
5672 * the element in log(N) time */
5674 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5677 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5680 /* Return the result in form of a multi-bulk reply */
5681 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5682 withscores
? (rangelen
*2) : rangelen
));
5683 for (j
= 0; j
< rangelen
; j
++) {
5685 addReplyBulk(c
,ele
);
5687 addReplyDouble(c
,ln
->score
);
5688 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5692 static void zrangeCommand(redisClient
*c
) {
5693 zrangeGenericCommand(c
,0);
5696 static void zrevrangeCommand(redisClient
*c
) {
5697 zrangeGenericCommand(c
,1);
5700 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5701 * If justcount is non-zero, just the count is returned. */
5702 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5705 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5706 int offset
= 0, limit
= -1;
5710 /* Parse the min-max interval. If one of the values is prefixed
5711 * by the "(" character, it's considered "open". For instance
5712 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5713 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5714 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5715 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5718 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5720 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5721 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5724 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5727 /* Parse "WITHSCORES": note that if the command was called with
5728 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5729 * enter the following paths to parse WITHSCORES and LIMIT. */
5730 if (c
->argc
== 5 || c
->argc
== 8) {
5731 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5736 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5740 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5745 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5746 addReply(c
,shared
.syntaxerr
);
5748 } else if (c
->argc
== (7 + withscores
)) {
5749 offset
= atoi(c
->argv
[5]->ptr
);
5750 limit
= atoi(c
->argv
[6]->ptr
);
5751 if (offset
< 0) offset
= 0;
5754 /* Ok, lookup the key and get the range */
5755 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5757 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5759 if (o
->type
!= REDIS_ZSET
) {
5760 addReply(c
,shared
.wrongtypeerr
);
5762 zset
*zsetobj
= o
->ptr
;
5763 zskiplist
*zsl
= zsetobj
->zsl
;
5765 robj
*ele
, *lenobj
= NULL
;
5766 unsigned long rangelen
= 0;
5768 /* Get the first node with the score >= min, or with
5769 * score > min if 'minex' is true. */
5770 ln
= zslFirstWithScore(zsl
,min
);
5771 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5774 /* No element matching the speciifed interval */
5775 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5779 /* We don't know in advance how many matching elements there
5780 * are in the list, so we push this object that will represent
5781 * the multi-bulk length in the output buffer, and will "fix"
5784 lenobj
= createObject(REDIS_STRING
,NULL
);
5786 decrRefCount(lenobj
);
5789 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5792 ln
= ln
->forward
[0];
5795 if (limit
== 0) break;
5798 addReplyBulk(c
,ele
);
5800 addReplyDouble(c
,ln
->score
);
5802 ln
= ln
->forward
[0];
5804 if (limit
> 0) limit
--;
5807 addReplyLong(c
,(long)rangelen
);
5809 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5810 withscores
? (rangelen
*2) : rangelen
);
5816 static void zrangebyscoreCommand(redisClient
*c
) {
5817 genericZrangebyscoreCommand(c
,0);
5820 static void zcountCommand(redisClient
*c
) {
5821 genericZrangebyscoreCommand(c
,1);
5824 static void zcardCommand(redisClient
*c
) {
5828 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5829 checkType(c
,o
,REDIS_ZSET
)) return;
5832 addReplyUlong(c
,zs
->zsl
->length
);
5835 static void zscoreCommand(redisClient
*c
) {
5840 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5841 checkType(c
,o
,REDIS_ZSET
)) return;
5844 de
= dictFind(zs
->dict
,c
->argv
[2]);
5846 addReply(c
,shared
.nullbulk
);
5848 double *score
= dictGetEntryVal(de
);
5850 addReplyDouble(c
,*score
);
5854 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5862 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5863 checkType(c
,o
,REDIS_ZSET
)) return;
5867 de
= dictFind(zs
->dict
,c
->argv
[2]);
5869 addReply(c
,shared
.nullbulk
);
5873 score
= dictGetEntryVal(de
);
5874 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5877 addReplyLong(c
, zsl
->length
- rank
);
5879 addReplyLong(c
, rank
-1);
5882 addReply(c
,shared
.nullbulk
);
5886 static void zrankCommand(redisClient
*c
) {
5887 zrankGenericCommand(c
, 0);
5890 static void zrevrankCommand(redisClient
*c
) {
5891 zrankGenericCommand(c
, 1);
5894 /* =================================== Hashes =============================== */
5895 static void hsetCommand(redisClient
*c
) {
5897 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5900 o
= createHashObject();
5901 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5902 incrRefCount(c
->argv
[1]);
5904 if (o
->type
!= REDIS_HASH
) {
5905 addReply(c
,shared
.wrongtypeerr
);
5909 /* We want to convert the zipmap into an hash table right now if the
5910 * entry to be added is too big. Note that we check if the object
5911 * is integer encoded before to try fetching the length in the test below.
5912 * This is because integers are small, but currently stringObjectLen()
5913 * performs a slow conversion: not worth it. */
5914 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5915 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5916 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5917 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5918 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5920 convertToRealHash(o
);
5923 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5924 unsigned char *zm
= o
->ptr
;
5925 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5927 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5928 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5929 decrRefCount(valobj
);
5932 /* And here there is the second check for hash conversion...
5933 * we want to do it only if the operation was not just an update as
5934 * zipmapLen() is O(N). */
5935 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5936 convertToRealHash(o
);
5938 tryObjectEncoding(c
->argv
[2]);
5939 /* note that c->argv[3] is already encoded, as the latest arg
5940 * of a bulk command is always integer encoded if possible. */
5941 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5942 incrRefCount(c
->argv
[2]);
5946 incrRefCount(c
->argv
[3]);
5949 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5952 static void hgetCommand(redisClient
*c
) {
5955 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5956 checkType(c
,o
,REDIS_HASH
)) return;
5958 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5959 unsigned char *zm
= o
->ptr
;
5964 field
= getDecodedObject(c
->argv
[2]);
5965 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
5966 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
5967 addReplySds(c
,sdsnewlen(val
,vlen
));
5968 addReply(c
,shared
.crlf
);
5969 decrRefCount(field
);
5972 addReply(c
,shared
.nullbulk
);
5973 decrRefCount(field
);
5977 struct dictEntry
*de
;
5979 de
= dictFind(o
->ptr
,c
->argv
[2]);
5981 addReply(c
,shared
.nullbulk
);
5983 robj
*e
= dictGetEntryVal(de
);
5990 static void hdelCommand(redisClient
*c
) {
5994 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5995 checkType(c
,o
,REDIS_HASH
)) return;
5997 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5998 robj
*field
= getDecodedObject(c
->argv
[2]);
6000 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6001 (unsigned char*) field
->ptr
,
6002 sdslen(field
->ptr
), &deleted
);
6003 decrRefCount(field
);
6004 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6005 deleteKey(c
->db
,c
->argv
[1]);
6007 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6008 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6009 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6011 if (deleted
) server
.dirty
++;
6012 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6015 static void hlenCommand(redisClient
*c
) {
6019 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6020 checkType(c
,o
,REDIS_HASH
)) return;
6022 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6023 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6024 addReplyUlong(c
,len
);
6027 #define REDIS_GETALL_KEYS 1
6028 #define REDIS_GETALL_VALS 2
6029 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6031 unsigned long count
= 0;
6033 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6034 || checkType(c
,o
,REDIS_HASH
)) return;
6036 lenobj
= createObject(REDIS_STRING
,NULL
);
6038 decrRefCount(lenobj
);
6040 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6041 unsigned char *p
= zipmapRewind(o
->ptr
);
6042 unsigned char *field
, *val
;
6043 unsigned int flen
, vlen
;
6045 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6048 if (flags
& REDIS_GETALL_KEYS
) {
6049 aux
= createStringObject((char*)field
,flen
);
6050 addReplyBulk(c
,aux
);
6054 if (flags
& REDIS_GETALL_VALS
) {
6055 aux
= createStringObject((char*)val
,vlen
);
6056 addReplyBulk(c
,aux
);
6062 dictIterator
*di
= dictGetIterator(o
->ptr
);
6065 while((de
= dictNext(di
)) != NULL
) {
6066 robj
*fieldobj
= dictGetEntryKey(de
);
6067 robj
*valobj
= dictGetEntryVal(de
);
6069 if (flags
& REDIS_GETALL_KEYS
) {
6070 addReplyBulk(c
,fieldobj
);
6073 if (flags
& REDIS_GETALL_VALS
) {
6074 addReplyBulk(c
,valobj
);
6078 dictReleaseIterator(di
);
6080 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6083 static void hkeysCommand(redisClient
*c
) {
6084 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6087 static void hvalsCommand(redisClient
*c
) {
6088 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6091 static void hgetallCommand(redisClient
*c
) {
6092 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6095 static void hexistsCommand(redisClient
*c
) {
6099 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6100 checkType(c
,o
,REDIS_HASH
)) return;
6102 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6104 unsigned char *zm
= o
->ptr
;
6106 field
= getDecodedObject(c
->argv
[2]);
6107 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6108 decrRefCount(field
);
6110 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6112 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6115 static void convertToRealHash(robj
*o
) {
6116 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6117 unsigned int klen
, vlen
;
6118 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6120 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6121 p
= zipmapRewind(zm
);
6122 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6123 robj
*keyobj
, *valobj
;
6125 keyobj
= createStringObject((char*)key
,klen
);
6126 valobj
= createStringObject((char*)val
,vlen
);
6127 tryObjectEncoding(keyobj
);
6128 tryObjectEncoding(valobj
);
6129 dictAdd(dict
,keyobj
,valobj
);
6131 o
->encoding
= REDIS_ENCODING_HT
;
6136 /* ========================= Non type-specific commands ==================== */
6138 static void flushdbCommand(redisClient
*c
) {
6139 server
.dirty
+= dictSize(c
->db
->dict
);
6140 dictEmpty(c
->db
->dict
);
6141 dictEmpty(c
->db
->expires
);
6142 addReply(c
,shared
.ok
);
6145 static void flushallCommand(redisClient
*c
) {
6146 server
.dirty
+= emptyDb();
6147 addReply(c
,shared
.ok
);
6148 if (server
.bgsavechildpid
!= -1) {
6149 kill(server
.bgsavechildpid
,SIGKILL
);
6150 rdbRemoveTempFile(server
.bgsavechildpid
);
6152 rdbSave(server
.dbfilename
);
6156 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6157 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6159 so
->pattern
= pattern
;
6163 /* Return the value associated to the key with a name obtained
6164 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6165 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6169 int prefixlen
, sublen
, postfixlen
;
6170 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6174 char buf
[REDIS_SORTKEY_MAX
+1];
6177 /* If the pattern is "#" return the substitution object itself in order
6178 * to implement the "SORT ... GET #" feature. */
6179 spat
= pattern
->ptr
;
6180 if (spat
[0] == '#' && spat
[1] == '\0') {
6184 /* The substitution object may be specially encoded. If so we create
6185 * a decoded object on the fly. Otherwise getDecodedObject will just
6186 * increment the ref count, that we'll decrement later. */
6187 subst
= getDecodedObject(subst
);
6190 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6191 p
= strchr(spat
,'*');
6193 decrRefCount(subst
);
6198 sublen
= sdslen(ssub
);
6199 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6200 memcpy(keyname
.buf
,spat
,prefixlen
);
6201 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6202 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6203 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6204 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6206 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6207 decrRefCount(subst
);
6209 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6210 return lookupKeyRead(db
,&keyobj
);
6213 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6214 * the additional parameter is not standard but a BSD-specific we have to
6215 * pass sorting parameters via the global 'server' structure */
6216 static int sortCompare(const void *s1
, const void *s2
) {
6217 const redisSortObject
*so1
= s1
, *so2
= s2
;
6220 if (!server
.sort_alpha
) {
6221 /* Numeric sorting. Here it's trivial as we precomputed scores */
6222 if (so1
->u
.score
> so2
->u
.score
) {
6224 } else if (so1
->u
.score
< so2
->u
.score
) {
6230 /* Alphanumeric sorting */
6231 if (server
.sort_bypattern
) {
6232 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6233 /* At least one compare object is NULL */
6234 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6236 else if (so1
->u
.cmpobj
== NULL
)
6241 /* We have both the objects, use strcoll */
6242 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6245 /* Compare elements directly */
6248 dec1
= getDecodedObject(so1
->obj
);
6249 dec2
= getDecodedObject(so2
->obj
);
6250 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6255 return server
.sort_desc
? -cmp
: cmp
;
6258 /* The SORT command is the most complex command in Redis. Warning: this code
6259 * is optimized for speed and a bit less for readability */
6260 static void sortCommand(redisClient
*c
) {
6263 int desc
= 0, alpha
= 0;
6264 int limit_start
= 0, limit_count
= -1, start
, end
;
6265 int j
, dontsort
= 0, vectorlen
;
6266 int getop
= 0; /* GET operation counter */
6267 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6268 redisSortObject
*vector
; /* Resulting vector to sort */
6270 /* Lookup the key to sort. It must be of the right types */
6271 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6272 if (sortval
== NULL
) {
6273 addReply(c
,shared
.nullmultibulk
);
6276 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6277 sortval
->type
!= REDIS_ZSET
)
6279 addReply(c
,shared
.wrongtypeerr
);
6283 /* Create a list of operations to perform for every sorted element.
6284 * Operations can be GET/DEL/INCR/DECR */
6285 operations
= listCreate();
6286 listSetFreeMethod(operations
,zfree
);
6289 /* Now we need to protect sortval incrementing its count, in the future
6290 * SORT may have options able to overwrite/delete keys during the sorting
6291 * and the sorted key itself may get destroied */
6292 incrRefCount(sortval
);
6294 /* The SORT command has an SQL-alike syntax, parse it */
6295 while(j
< c
->argc
) {
6296 int leftargs
= c
->argc
-j
-1;
6297 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6299 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6301 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6303 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6304 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6305 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6307 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6308 storekey
= c
->argv
[j
+1];
6310 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6311 sortby
= c
->argv
[j
+1];
6312 /* If the BY pattern does not contain '*', i.e. it is constant,
6313 * we don't need to sort nor to lookup the weight keys. */
6314 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6316 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6317 listAddNodeTail(operations
,createSortOperation(
6318 REDIS_SORT_GET
,c
->argv
[j
+1]));
6322 decrRefCount(sortval
);
6323 listRelease(operations
);
6324 addReply(c
,shared
.syntaxerr
);
6330 /* Load the sorting vector with all the objects to sort */
6331 switch(sortval
->type
) {
6332 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6333 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6334 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6335 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6337 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6340 if (sortval
->type
== REDIS_LIST
) {
6341 list
*list
= sortval
->ptr
;
6345 listRewind(list
,&li
);
6346 while((ln
= listNext(&li
))) {
6347 robj
*ele
= ln
->value
;
6348 vector
[j
].obj
= ele
;
6349 vector
[j
].u
.score
= 0;
6350 vector
[j
].u
.cmpobj
= NULL
;
6358 if (sortval
->type
== REDIS_SET
) {
6361 zset
*zs
= sortval
->ptr
;
6365 di
= dictGetIterator(set
);
6366 while((setele
= dictNext(di
)) != NULL
) {
6367 vector
[j
].obj
= dictGetEntryKey(setele
);
6368 vector
[j
].u
.score
= 0;
6369 vector
[j
].u
.cmpobj
= NULL
;
6372 dictReleaseIterator(di
);
6374 redisAssert(j
== vectorlen
);
6376 /* Now it's time to load the right scores in the sorting vector */
6377 if (dontsort
== 0) {
6378 for (j
= 0; j
< vectorlen
; j
++) {
6382 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6383 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6385 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6387 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6388 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6390 /* Don't need to decode the object if it's
6391 * integer-encoded (the only encoding supported) so
6392 * far. We can just cast it */
6393 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6394 vector
[j
].u
.score
= (long)byval
->ptr
;
6396 redisAssert(1 != 1);
6401 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6402 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6404 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6405 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6407 redisAssert(1 != 1);
6414 /* We are ready to sort the vector... perform a bit of sanity check
6415 * on the LIMIT option too. We'll use a partial version of quicksort. */
6416 start
= (limit_start
< 0) ? 0 : limit_start
;
6417 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6418 if (start
>= vectorlen
) {
6419 start
= vectorlen
-1;
6422 if (end
>= vectorlen
) end
= vectorlen
-1;
6424 if (dontsort
== 0) {
6425 server
.sort_desc
= desc
;
6426 server
.sort_alpha
= alpha
;
6427 server
.sort_bypattern
= sortby
? 1 : 0;
6428 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6429 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6431 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6434 /* Send command output to the output buffer, performing the specified
6435 * GET/DEL/INCR/DECR operations if any. */
6436 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6437 if (storekey
== NULL
) {
6438 /* STORE option not specified, sent the sorting result to client */
6439 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6440 for (j
= start
; j
<= end
; j
++) {
6444 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6445 listRewind(operations
,&li
);
6446 while((ln
= listNext(&li
))) {
6447 redisSortOperation
*sop
= ln
->value
;
6448 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6451 if (sop
->type
== REDIS_SORT_GET
) {
6452 if (!val
|| val
->type
!= REDIS_STRING
) {
6453 addReply(c
,shared
.nullbulk
);
6455 addReplyBulk(c
,val
);
6458 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6463 robj
*listObject
= createListObject();
6464 list
*listPtr
= (list
*) listObject
->ptr
;
6466 /* STORE option specified, set the sorting result as a List object */
6467 for (j
= start
; j
<= end
; j
++) {
6472 listAddNodeTail(listPtr
,vector
[j
].obj
);
6473 incrRefCount(vector
[j
].obj
);
6475 listRewind(operations
,&li
);
6476 while((ln
= listNext(&li
))) {
6477 redisSortOperation
*sop
= ln
->value
;
6478 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6481 if (sop
->type
== REDIS_SORT_GET
) {
6482 if (!val
|| val
->type
!= REDIS_STRING
) {
6483 listAddNodeTail(listPtr
,createStringObject("",0));
6485 listAddNodeTail(listPtr
,val
);
6489 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6493 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6494 incrRefCount(storekey
);
6496 /* Note: we add 1 because the DB is dirty anyway since even if the
6497 * SORT result is empty a new key is set and maybe the old content
6499 server
.dirty
+= 1+outputlen
;
6500 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6504 decrRefCount(sortval
);
6505 listRelease(operations
);
6506 for (j
= 0; j
< vectorlen
; j
++) {
6507 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6508 decrRefCount(vector
[j
].u
.cmpobj
);
6513 /* Convert an amount of bytes into a human readable string in the form
6514 * of 100B, 2G, 100M, 4K, and so forth. */
6515 static void bytesToHuman(char *s
, unsigned long long n
) {
6520 sprintf(s
,"%lluB",n
);
6522 } else if (n
< (1024*1024)) {
6523 d
= (double)n
/(1024);
6524 sprintf(s
,"%.2fK",d
);
6525 } else if (n
< (1024LL*1024*1024)) {
6526 d
= (double)n
/(1024*1024);
6527 sprintf(s
,"%.2fM",d
);
6528 } else if (n
< (1024LL*1024*1024*1024)) {
6529 d
= (double)n
/(1024LL*1024*1024);
6530 sprintf(s
,"%.2fG",d
);
6534 /* Create the string returned by the INFO command. This is decoupled
6535 * by the INFO command itself as we need to report the same information
6536 * on memory corruption problems. */
6537 static sds
genRedisInfoString(void) {
6539 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6543 bytesToHuman(hmem
,zmalloc_used_memory());
6544 info
= sdscatprintf(sdsempty(),
6545 "redis_version:%s\r\n"
6547 "multiplexing_api:%s\r\n"
6548 "process_id:%ld\r\n"
6549 "uptime_in_seconds:%ld\r\n"
6550 "uptime_in_days:%ld\r\n"
6551 "connected_clients:%d\r\n"
6552 "connected_slaves:%d\r\n"
6553 "blocked_clients:%d\r\n"
6554 "used_memory:%zu\r\n"
6555 "used_memory_human:%s\r\n"
6556 "changes_since_last_save:%lld\r\n"
6557 "bgsave_in_progress:%d\r\n"
6558 "last_save_time:%ld\r\n"
6559 "bgrewriteaof_in_progress:%d\r\n"
6560 "total_connections_received:%lld\r\n"
6561 "total_commands_processed:%lld\r\n"
6562 "expired_keys:%lld\r\n"
6563 "hash_max_zipmap_entries:%ld\r\n"
6564 "hash_max_zipmap_value:%ld\r\n"
6568 (sizeof(long) == 8) ? "64" : "32",
6573 listLength(server
.clients
)-listLength(server
.slaves
),
6574 listLength(server
.slaves
),
6575 server
.blpop_blocked_clients
,
6576 zmalloc_used_memory(),
6579 server
.bgsavechildpid
!= -1,
6581 server
.bgrewritechildpid
!= -1,
6582 server
.stat_numconnections
,
6583 server
.stat_numcommands
,
6584 server
.stat_expiredkeys
,
6585 server
.hash_max_zipmap_entries
,
6586 server
.hash_max_zipmap_value
,
6587 server
.vm_enabled
!= 0,
6588 server
.masterhost
== NULL
? "master" : "slave"
6590 if (server
.masterhost
) {
6591 info
= sdscatprintf(info
,
6592 "master_host:%s\r\n"
6593 "master_port:%d\r\n"
6594 "master_link_status:%s\r\n"
6595 "master_last_io_seconds_ago:%d\r\n"
6598 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6600 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6603 if (server
.vm_enabled
) {
6605 info
= sdscatprintf(info
,
6606 "vm_conf_max_memory:%llu\r\n"
6607 "vm_conf_page_size:%llu\r\n"
6608 "vm_conf_pages:%llu\r\n"
6609 "vm_stats_used_pages:%llu\r\n"
6610 "vm_stats_swapped_objects:%llu\r\n"
6611 "vm_stats_swappin_count:%llu\r\n"
6612 "vm_stats_swappout_count:%llu\r\n"
6613 "vm_stats_io_newjobs_len:%lu\r\n"
6614 "vm_stats_io_processing_len:%lu\r\n"
6615 "vm_stats_io_processed_len:%lu\r\n"
6616 "vm_stats_io_active_threads:%lu\r\n"
6617 "vm_stats_blocked_clients:%lu\r\n"
6618 ,(unsigned long long) server
.vm_max_memory
,
6619 (unsigned long long) server
.vm_page_size
,
6620 (unsigned long long) server
.vm_pages
,
6621 (unsigned long long) server
.vm_stats_used_pages
,
6622 (unsigned long long) server
.vm_stats_swapped_objects
,
6623 (unsigned long long) server
.vm_stats_swapins
,
6624 (unsigned long long) server
.vm_stats_swapouts
,
6625 (unsigned long) listLength(server
.io_newjobs
),
6626 (unsigned long) listLength(server
.io_processing
),
6627 (unsigned long) listLength(server
.io_processed
),
6628 (unsigned long) server
.io_active_threads
,
6629 (unsigned long) server
.vm_blocked_clients
6633 for (j
= 0; j
< server
.dbnum
; j
++) {
6634 long long keys
, vkeys
;
6636 keys
= dictSize(server
.db
[j
].dict
);
6637 vkeys
= dictSize(server
.db
[j
].expires
);
6638 if (keys
|| vkeys
) {
6639 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6646 static void infoCommand(redisClient
*c
) {
6647 sds info
= genRedisInfoString();
6648 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6649 (unsigned long)sdslen(info
)));
6650 addReplySds(c
,info
);
6651 addReply(c
,shared
.crlf
);
6654 static void monitorCommand(redisClient
*c
) {
6655 /* ignore MONITOR if aleady slave or in monitor mode */
6656 if (c
->flags
& REDIS_SLAVE
) return;
6658 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6660 listAddNodeTail(server
.monitors
,c
);
6661 addReply(c
,shared
.ok
);
6664 /* ================================= Expire ================================= */
6665 static int removeExpire(redisDb
*db
, robj
*key
) {
6666 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6673 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6674 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6682 /* Return the expire time of the specified key, or -1 if no expire
6683 * is associated with this key (i.e. the key is non volatile) */
6684 static time_t getExpire(redisDb
*db
, robj
*key
) {
6687 /* No expire? return ASAP */
6688 if (dictSize(db
->expires
) == 0 ||
6689 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6691 return (time_t) dictGetEntryVal(de
);
6694 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6698 /* No expire? return ASAP */
6699 if (dictSize(db
->expires
) == 0 ||
6700 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6702 /* Lookup the expire */
6703 when
= (time_t) dictGetEntryVal(de
);
6704 if (time(NULL
) <= when
) return 0;
6706 /* Delete the key */
6707 dictDelete(db
->expires
,key
);
6708 server
.stat_expiredkeys
++;
6709 return dictDelete(db
->dict
,key
) == DICT_OK
;
6712 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6715 /* No expire? return ASAP */
6716 if (dictSize(db
->expires
) == 0 ||
6717 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6719 /* Delete the key */
6721 server
.stat_expiredkeys
++;
6722 dictDelete(db
->expires
,key
);
6723 return dictDelete(db
->dict
,key
) == DICT_OK
;
6726 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6729 de
= dictFind(c
->db
->dict
,key
);
6731 addReply(c
,shared
.czero
);
6735 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6736 addReply(c
, shared
.cone
);
6739 time_t when
= time(NULL
)+seconds
;
6740 if (setExpire(c
->db
,key
,when
)) {
6741 addReply(c
,shared
.cone
);
6744 addReply(c
,shared
.czero
);
6750 static void expireCommand(redisClient
*c
) {
6751 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6754 static void expireatCommand(redisClient
*c
) {
6755 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6758 static void ttlCommand(redisClient
*c
) {
6762 expire
= getExpire(c
->db
,c
->argv
[1]);
6764 ttl
= (int) (expire
-time(NULL
));
6765 if (ttl
< 0) ttl
= -1;
6767 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6770 /* ================================ MULTI/EXEC ============================== */
6772 /* Client state initialization for MULTI/EXEC */
6773 static void initClientMultiState(redisClient
*c
) {
6774 c
->mstate
.commands
= NULL
;
6775 c
->mstate
.count
= 0;
6778 /* Release all the resources associated with MULTI/EXEC state */
6779 static void freeClientMultiState(redisClient
*c
) {
6782 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6784 multiCmd
*mc
= c
->mstate
.commands
+j
;
6786 for (i
= 0; i
< mc
->argc
; i
++)
6787 decrRefCount(mc
->argv
[i
]);
6790 zfree(c
->mstate
.commands
);
6793 /* Add a new command into the MULTI commands queue */
6794 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6798 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6799 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6800 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6803 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6804 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6805 for (j
= 0; j
< c
->argc
; j
++)
6806 incrRefCount(mc
->argv
[j
]);
6810 static void multiCommand(redisClient
*c
) {
6811 c
->flags
|= REDIS_MULTI
;
6812 addReply(c
,shared
.ok
);
6815 static void discardCommand(redisClient
*c
) {
6816 if (!(c
->flags
& REDIS_MULTI
)) {
6817 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6821 freeClientMultiState(c
);
6822 initClientMultiState(c
);
6823 c
->flags
&= (~REDIS_MULTI
);
6824 addReply(c
,shared
.ok
);
6827 static void execCommand(redisClient
*c
) {
6832 if (!(c
->flags
& REDIS_MULTI
)) {
6833 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6837 orig_argv
= c
->argv
;
6838 orig_argc
= c
->argc
;
6839 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6840 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6841 c
->argc
= c
->mstate
.commands
[j
].argc
;
6842 c
->argv
= c
->mstate
.commands
[j
].argv
;
6843 call(c
,c
->mstate
.commands
[j
].cmd
);
6845 c
->argv
= orig_argv
;
6846 c
->argc
= orig_argc
;
6847 freeClientMultiState(c
);
6848 initClientMultiState(c
);
6849 c
->flags
&= (~REDIS_MULTI
);
6852 /* =========================== Blocking Operations ========================= */
6854 /* Currently Redis blocking operations support is limited to list POP ops,
6855 * so the current implementation is not fully generic, but it is also not
6856 * completely specific so it will not require a rewrite to support new
6857 * kind of blocking operations in the future.
6859 * Still it's important to note that list blocking operations can be already
6860 * used as a notification mechanism in order to implement other blocking
6861 * operations at application level, so there must be a very strong evidence
6862 * of usefulness and generality before new blocking operations are implemented.
6864 * This is how the current blocking POP works, we use BLPOP as example:
6865 * - If the user calls BLPOP and the key exists and contains a non empty list
6866 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6867 * if there is not to block.
6868 * - If instead BLPOP is called and the key does not exists or the list is
6869 * empty we need to block. In order to do so we remove the notification for
6870 * new data to read in the client socket (so that we'll not serve new
6871 * requests if the blocking request is not served). Also we put the client
6872 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6873 * blocking for this keys.
6874 * - If a PUSH operation against a key with blocked clients waiting is
6875 * performed, we serve the first in the list: basically instead to push
6876 * the new element inside the list we return it to the (first / oldest)
6877 * blocking client, unblock the client, and remove it form the list.
6879 * The above comment and the source code should be enough in order to understand
6880 * the implementation and modify / fix it later.
6883 /* Set a client in blocking mode for the specified key, with the specified
6885 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6890 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6891 c
->blockingkeysnum
= numkeys
;
6892 c
->blockingto
= timeout
;
6893 for (j
= 0; j
< numkeys
; j
++) {
6894 /* Add the key in the client structure, to map clients -> keys */
6895 c
->blockingkeys
[j
] = keys
[j
];
6896 incrRefCount(keys
[j
]);
6898 /* And in the other "side", to map keys -> clients */
6899 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
6903 /* For every key we take a list of clients blocked for it */
6905 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
6906 incrRefCount(keys
[j
]);
6907 assert(retval
== DICT_OK
);
6909 l
= dictGetEntryVal(de
);
6911 listAddNodeTail(l
,c
);
6913 /* Mark the client as a blocked client */
6914 c
->flags
|= REDIS_BLOCKED
;
6915 server
.blpop_blocked_clients
++;
6918 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
6919 static void unblockClientWaitingData(redisClient
*c
) {
6924 assert(c
->blockingkeys
!= NULL
);
6925 /* The client may wait for multiple keys, so unblock it for every key. */
6926 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
6927 /* Remove this client from the list of clients waiting for this key. */
6928 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6930 l
= dictGetEntryVal(de
);
6931 listDelNode(l
,listSearchKey(l
,c
));
6932 /* If the list is empty we need to remove it to avoid wasting memory */
6933 if (listLength(l
) == 0)
6934 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6935 decrRefCount(c
->blockingkeys
[j
]);
6937 /* Cleanup the client structure */
6938 zfree(c
->blockingkeys
);
6939 c
->blockingkeys
= NULL
;
6940 c
->flags
&= (~REDIS_BLOCKED
);
6941 server
.blpop_blocked_clients
--;
6942 /* We want to process data if there is some command waiting
6943 * in the input buffer. Note that this is safe even if
6944 * unblockClientWaitingData() gets called from freeClient() because
6945 * freeClient() will be smart enough to call this function
6946 * *after* c->querybuf was set to NULL. */
6947 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
6950 /* This should be called from any function PUSHing into lists.
6951 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
6952 * 'ele' is the element pushed.
6954 * If the function returns 0 there was no client waiting for a list push
6957 * If the function returns 1 there was a client waiting for a list push
6958 * against this key, the element was passed to this client thus it's not
6959 * needed to actually add it to the list and the caller should return asap. */
6960 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
6961 struct dictEntry
*de
;
6962 redisClient
*receiver
;
6966 de
= dictFind(c
->db
->blockingkeys
,key
);
6967 if (de
== NULL
) return 0;
6968 l
= dictGetEntryVal(de
);
6971 receiver
= ln
->value
;
6973 addReplySds(receiver
,sdsnew("*2\r\n"));
6974 addReplyBulk(receiver
,key
);
6975 addReplyBulk(receiver
,ele
);
6976 unblockClientWaitingData(receiver
);
6980 /* Blocking RPOP/LPOP */
6981 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
6986 for (j
= 1; j
< c
->argc
-1; j
++) {
6987 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6989 if (o
->type
!= REDIS_LIST
) {
6990 addReply(c
,shared
.wrongtypeerr
);
6993 list
*list
= o
->ptr
;
6994 if (listLength(list
) != 0) {
6995 /* If the list contains elements fall back to the usual
6996 * non-blocking POP operation */
6997 robj
*argv
[2], **orig_argv
;
7000 /* We need to alter the command arguments before to call
7001 * popGenericCommand() as the command takes a single key. */
7002 orig_argv
= c
->argv
;
7003 orig_argc
= c
->argc
;
7004 argv
[1] = c
->argv
[j
];
7008 /* Also the return value is different, we need to output
7009 * the multi bulk reply header and the key name. The
7010 * "real" command will add the last element (the value)
7011 * for us. If this souds like an hack to you it's just
7012 * because it is... */
7013 addReplySds(c
,sdsnew("*2\r\n"));
7014 addReplyBulk(c
,argv
[1]);
7015 popGenericCommand(c
,where
);
7017 /* Fix the client structure with the original stuff */
7018 c
->argv
= orig_argv
;
7019 c
->argc
= orig_argc
;
7025 /* If the list is empty or the key does not exists we must block */
7026 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7027 if (timeout
> 0) timeout
+= time(NULL
);
7028 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7031 static void blpopCommand(redisClient
*c
) {
7032 blockingPopGenericCommand(c
,REDIS_HEAD
);
7035 static void brpopCommand(redisClient
*c
) {
7036 blockingPopGenericCommand(c
,REDIS_TAIL
);
7039 /* =============================== Replication ============================= */
7041 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7042 ssize_t nwritten
, ret
= size
;
7043 time_t start
= time(NULL
);
7047 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7048 nwritten
= write(fd
,ptr
,size
);
7049 if (nwritten
== -1) return -1;
7053 if ((time(NULL
)-start
) > timeout
) {
7061 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7062 ssize_t nread
, totread
= 0;
7063 time_t start
= time(NULL
);
7067 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7068 nread
= read(fd
,ptr
,size
);
7069 if (nread
== -1) return -1;
7074 if ((time(NULL
)-start
) > timeout
) {
7082 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7089 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7092 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7103 static void syncCommand(redisClient
*c
) {
7104 /* ignore SYNC if aleady slave or in monitor mode */
7105 if (c
->flags
& REDIS_SLAVE
) return;
7107 /* SYNC can't be issued when the server has pending data to send to
7108 * the client about already issued commands. We need a fresh reply
7109 * buffer registering the differences between the BGSAVE and the current
7110 * dataset, so that we can copy to other slaves if needed. */
7111 if (listLength(c
->reply
) != 0) {
7112 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7116 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7117 /* Here we need to check if there is a background saving operation
7118 * in progress, or if it is required to start one */
7119 if (server
.bgsavechildpid
!= -1) {
7120 /* Ok a background save is in progress. Let's check if it is a good
7121 * one for replication, i.e. if there is another slave that is
7122 * registering differences since the server forked to save */
7127 listRewind(server
.slaves
,&li
);
7128 while((ln
= listNext(&li
))) {
7130 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7133 /* Perfect, the server is already registering differences for
7134 * another slave. Set the right state, and copy the buffer. */
7135 listRelease(c
->reply
);
7136 c
->reply
= listDup(slave
->reply
);
7137 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7138 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7140 /* No way, we need to wait for the next BGSAVE in order to
7141 * register differences */
7142 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7143 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7146 /* Ok we don't have a BGSAVE in progress, let's start one */
7147 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7148 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7149 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7150 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7153 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7156 c
->flags
|= REDIS_SLAVE
;
7158 listAddNodeTail(server
.slaves
,c
);
7162 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7163 redisClient
*slave
= privdata
;
7165 REDIS_NOTUSED(mask
);
7166 char buf
[REDIS_IOBUF_LEN
];
7167 ssize_t nwritten
, buflen
;
7169 if (slave
->repldboff
== 0) {
7170 /* Write the bulk write count before to transfer the DB. In theory here
7171 * we don't know how much room there is in the output buffer of the
7172 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7173 * operations) will never be smaller than the few bytes we need. */
7176 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7178 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7186 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7187 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7189 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7190 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7194 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7195 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7200 slave
->repldboff
+= nwritten
;
7201 if (slave
->repldboff
== slave
->repldbsize
) {
7202 close(slave
->repldbfd
);
7203 slave
->repldbfd
= -1;
7204 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7205 slave
->replstate
= REDIS_REPL_ONLINE
;
7206 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7207 sendReplyToClient
, slave
) == AE_ERR
) {
7211 addReplySds(slave
,sdsempty());
7212 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7216 /* This function is called at the end of every backgrond saving.
7217 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7218 * otherwise REDIS_ERR is passed to the function.
7220 * The goal of this function is to handle slaves waiting for a successful
7221 * background saving in order to perform non-blocking synchronization. */
7222 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7224 int startbgsave
= 0;
7227 listRewind(server
.slaves
,&li
);
7228 while((ln
= listNext(&li
))) {
7229 redisClient
*slave
= ln
->value
;
7231 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7233 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7234 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7235 struct redis_stat buf
;
7237 if (bgsaveerr
!= REDIS_OK
) {
7239 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7242 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7243 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7245 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7248 slave
->repldboff
= 0;
7249 slave
->repldbsize
= buf
.st_size
;
7250 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7251 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7252 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7259 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7262 listRewind(server
.slaves
,&li
);
7263 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7264 while((ln
= listNext(&li
))) {
7265 redisClient
*slave
= ln
->value
;
7267 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7274 static int syncWithMaster(void) {
7275 char buf
[1024], tmpfile
[256], authcmd
[1024];
7277 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7278 int dfd
, maxtries
= 5;
7281 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7286 /* AUTH with the master if required. */
7287 if(server
.masterauth
) {
7288 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7289 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7291 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7295 /* Read the AUTH result. */
7296 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7298 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7302 if (buf
[0] != '+') {
7304 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7309 /* Issue the SYNC command */
7310 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7312 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7316 /* Read the bulk write count */
7317 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7319 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7323 if (buf
[0] != '$') {
7325 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7328 dumpsize
= strtol(buf
+1,NULL
,10);
7329 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7330 /* Read the bulk write data on a temp file */
7332 snprintf(tmpfile
,256,
7333 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7334 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7335 if (dfd
!= -1) break;
7340 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7344 int nread
, nwritten
;
7346 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7348 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7354 nwritten
= write(dfd
,buf
,nread
);
7355 if (nwritten
== -1) {
7356 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7364 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7365 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7371 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7372 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7376 server
.master
= createClient(fd
);
7377 server
.master
->flags
|= REDIS_MASTER
;
7378 server
.master
->authenticated
= 1;
7379 server
.replstate
= REDIS_REPL_CONNECTED
;
7383 static void slaveofCommand(redisClient
*c
) {
7384 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7385 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7386 if (server
.masterhost
) {
7387 sdsfree(server
.masterhost
);
7388 server
.masterhost
= NULL
;
7389 if (server
.master
) freeClient(server
.master
);
7390 server
.replstate
= REDIS_REPL_NONE
;
7391 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7394 sdsfree(server
.masterhost
);
7395 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7396 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7397 if (server
.master
) freeClient(server
.master
);
7398 server
.replstate
= REDIS_REPL_CONNECT
;
7399 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7400 server
.masterhost
, server
.masterport
);
7402 addReply(c
,shared
.ok
);
7405 /* ============================ Maxmemory directive ======================== */
7407 /* Try to free one object form the pre-allocated objects free list.
7408 * This is useful under low mem conditions as by default we take 1 million
7409 * free objects allocated. On success REDIS_OK is returned, otherwise
7411 static int tryFreeOneObjectFromFreelist(void) {
7414 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7415 if (listLength(server
.objfreelist
)) {
7416 listNode
*head
= listFirst(server
.objfreelist
);
7417 o
= listNodeValue(head
);
7418 listDelNode(server
.objfreelist
,head
);
7419 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7423 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7428 /* This function gets called when 'maxmemory' is set on the config file to limit
7429 * the max memory used by the server, and we are out of memory.
7430 * This function will try to, in order:
7432 * - Free objects from the free list
7433 * - Try to remove keys with an EXPIRE set
7435 * It is not possible to free enough memory to reach used-memory < maxmemory
7436 * the server will start refusing commands that will enlarge even more the
7439 static void freeMemoryIfNeeded(void) {
7440 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7441 int j
, k
, freed
= 0;
7443 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7444 for (j
= 0; j
< server
.dbnum
; j
++) {
7446 robj
*minkey
= NULL
;
7447 struct dictEntry
*de
;
7449 if (dictSize(server
.db
[j
].expires
)) {
7451 /* From a sample of three keys drop the one nearest to
7452 * the natural expire */
7453 for (k
= 0; k
< 3; k
++) {
7456 de
= dictGetRandomKey(server
.db
[j
].expires
);
7457 t
= (time_t) dictGetEntryVal(de
);
7458 if (minttl
== -1 || t
< minttl
) {
7459 minkey
= dictGetEntryKey(de
);
7463 deleteKey(server
.db
+j
,minkey
);
7466 if (!freed
) return; /* nothing to free... */
7470 /* ============================== Append Only file ========================== */
7472 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7473 sds buf
= sdsempty();
7479 /* The DB this command was targetting is not the same as the last command
7480 * we appendend. To issue a SELECT command is needed. */
7481 if (dictid
!= server
.appendseldb
) {
7484 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7485 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7486 (unsigned long)strlen(seldb
),seldb
);
7487 server
.appendseldb
= dictid
;
7490 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7491 * EXPIREs into EXPIREATs calls */
7492 if (cmd
->proc
== expireCommand
) {
7495 tmpargv
[0] = createStringObject("EXPIREAT",8);
7496 tmpargv
[1] = argv
[1];
7497 incrRefCount(argv
[1]);
7498 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7499 tmpargv
[2] = createObject(REDIS_STRING
,
7500 sdscatprintf(sdsempty(),"%ld",when
));
7504 /* Append the actual command */
7505 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7506 for (j
= 0; j
< argc
; j
++) {
7509 o
= getDecodedObject(o
);
7510 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7511 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7512 buf
= sdscatlen(buf
,"\r\n",2);
7516 /* Free the objects from the modified argv for EXPIREAT */
7517 if (cmd
->proc
== expireCommand
) {
7518 for (j
= 0; j
< 3; j
++)
7519 decrRefCount(argv
[j
]);
7522 /* We want to perform a single write. This should be guaranteed atomic
7523 * at least if the filesystem we are writing is a real physical one.
7524 * While this will save us against the server being killed I don't think
7525 * there is much to do about the whole server stopping for power problems
7527 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7528 if (nwritten
!= (signed)sdslen(buf
)) {
7529 /* Ooops, we are in troubles. The best thing to do for now is
7530 * to simply exit instead to give the illusion that everything is
7531 * working as expected. */
7532 if (nwritten
== -1) {
7533 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7535 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7539 /* If a background append only file rewriting is in progress we want to
7540 * accumulate the differences between the child DB and the current one
7541 * in a buffer, so that when the child process will do its work we
7542 * can append the differences to the new append only file. */
7543 if (server
.bgrewritechildpid
!= -1)
7544 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7548 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7549 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7550 now
-server
.lastfsync
> 1))
7552 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7553 server
.lastfsync
= now
;
7557 /* In Redis commands are always executed in the context of a client, so in
7558 * order to load the append only file we need to create a fake client. */
7559 static struct redisClient
*createFakeClient(void) {
7560 struct redisClient
*c
= zmalloc(sizeof(*c
));
7564 c
->querybuf
= sdsempty();
7568 /* We set the fake client as a slave waiting for the synchronization
7569 * so that Redis will not try to send replies to this client. */
7570 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7571 c
->reply
= listCreate();
7572 listSetFreeMethod(c
->reply
,decrRefCount
);
7573 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7577 static void freeFakeClient(struct redisClient
*c
) {
7578 sdsfree(c
->querybuf
);
7579 listRelease(c
->reply
);
7583 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7584 * error (the append only file is zero-length) REDIS_ERR is returned. On
7585 * fatal error an error message is logged and the program exists. */
7586 int loadAppendOnlyFile(char *filename
) {
7587 struct redisClient
*fakeClient
;
7588 FILE *fp
= fopen(filename
,"r");
7589 struct redis_stat sb
;
7590 unsigned long long loadedkeys
= 0;
7592 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7596 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7600 fakeClient
= createFakeClient();
7607 struct redisCommand
*cmd
;
7609 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7615 if (buf
[0] != '*') goto fmterr
;
7617 argv
= zmalloc(sizeof(robj
*)*argc
);
7618 for (j
= 0; j
< argc
; j
++) {
7619 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7620 if (buf
[0] != '$') goto fmterr
;
7621 len
= strtol(buf
+1,NULL
,10);
7622 argsds
= sdsnewlen(NULL
,len
);
7623 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7624 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7625 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7628 /* Command lookup */
7629 cmd
= lookupCommand(argv
[0]->ptr
);
7631 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7634 /* Try object sharing and encoding */
7635 if (server
.shareobjects
) {
7637 for(j
= 1; j
< argc
; j
++)
7638 argv
[j
] = tryObjectSharing(argv
[j
]);
7640 if (cmd
->flags
& REDIS_CMD_BULK
)
7641 tryObjectEncoding(argv
[argc
-1]);
7642 /* Run the command in the context of a fake client */
7643 fakeClient
->argc
= argc
;
7644 fakeClient
->argv
= argv
;
7645 cmd
->proc(fakeClient
);
7646 /* Discard the reply objects list from the fake client */
7647 while(listLength(fakeClient
->reply
))
7648 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7649 /* Clean up, ready for the next command */
7650 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7652 /* Handle swapping while loading big datasets when VM is on */
7654 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7655 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7656 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7661 freeFakeClient(fakeClient
);
7666 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7668 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7672 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7676 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7677 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7681 /* Avoid the incr/decr ref count business if possible to help
7682 * copy-on-write (we are often in a child process when this function
7684 * Also makes sure that key objects don't get incrRefCount-ed when VM
7686 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7687 obj
= getDecodedObject(obj
);
7690 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7691 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7692 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7694 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7695 if (decrrc
) decrRefCount(obj
);
7698 if (decrrc
) decrRefCount(obj
);
7702 /* Write binary-safe string into a file in the bulkformat
7703 * $<count>\r\n<payload>\r\n */
7704 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7707 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7708 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7709 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7710 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7714 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7715 static int fwriteBulkDouble(FILE *fp
, double d
) {
7716 char buf
[128], dbuf
[128];
7718 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7719 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7720 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7721 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7725 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7726 static int fwriteBulkLong(FILE *fp
, long l
) {
7727 char buf
[128], lbuf
[128];
7729 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7730 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7731 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7732 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7736 /* Write a sequence of commands able to fully rebuild the dataset into
7737 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7738 static int rewriteAppendOnlyFile(char *filename
) {
7739 dictIterator
*di
= NULL
;
7744 time_t now
= time(NULL
);
7746 /* Note that we have to use a different temp name here compared to the
7747 * one used by rewriteAppendOnlyFileBackground() function. */
7748 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7749 fp
= fopen(tmpfile
,"w");
7751 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7754 for (j
= 0; j
< server
.dbnum
; j
++) {
7755 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7756 redisDb
*db
= server
.db
+j
;
7758 if (dictSize(d
) == 0) continue;
7759 di
= dictGetIterator(d
);
7765 /* SELECT the new DB */
7766 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7767 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7769 /* Iterate this DB writing every entry */
7770 while((de
= dictNext(di
)) != NULL
) {
7775 key
= dictGetEntryKey(de
);
7776 /* If the value for this key is swapped, load a preview in memory.
7777 * We use a "swapped" flag to remember if we need to free the
7778 * value object instead to just increment the ref count anyway
7779 * in order to avoid copy-on-write of pages if we are forked() */
7780 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7781 key
->storage
== REDIS_VM_SWAPPING
) {
7782 o
= dictGetEntryVal(de
);
7785 o
= vmPreviewObject(key
);
7788 expiretime
= getExpire(db
,key
);
7790 /* Save the key and associated value */
7791 if (o
->type
== REDIS_STRING
) {
7792 /* Emit a SET command */
7793 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7794 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7796 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7797 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7798 } else if (o
->type
== REDIS_LIST
) {
7799 /* Emit the RPUSHes needed to rebuild the list */
7800 list
*list
= o
->ptr
;
7804 listRewind(list
,&li
);
7805 while((ln
= listNext(&li
))) {
7806 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7807 robj
*eleobj
= listNodeValue(ln
);
7809 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7810 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7811 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7813 } else if (o
->type
== REDIS_SET
) {
7814 /* Emit the SADDs needed to rebuild the set */
7816 dictIterator
*di
= dictGetIterator(set
);
7819 while((de
= dictNext(di
)) != NULL
) {
7820 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7821 robj
*eleobj
= dictGetEntryKey(de
);
7823 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7824 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7825 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7827 dictReleaseIterator(di
);
7828 } else if (o
->type
== REDIS_ZSET
) {
7829 /* Emit the ZADDs needed to rebuild the sorted set */
7831 dictIterator
*di
= dictGetIterator(zs
->dict
);
7834 while((de
= dictNext(di
)) != NULL
) {
7835 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7836 robj
*eleobj
= dictGetEntryKey(de
);
7837 double *score
= dictGetEntryVal(de
);
7839 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7840 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7841 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7842 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7844 dictReleaseIterator(di
);
7845 } else if (o
->type
== REDIS_HASH
) {
7846 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7848 /* Emit the HSETs needed to rebuild the hash */
7849 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7850 unsigned char *p
= zipmapRewind(o
->ptr
);
7851 unsigned char *field
, *val
;
7852 unsigned int flen
, vlen
;
7854 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7855 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7856 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7857 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7859 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7863 dictIterator
*di
= dictGetIterator(o
->ptr
);
7866 while((de
= dictNext(di
)) != NULL
) {
7867 robj
*field
= dictGetEntryKey(de
);
7868 robj
*val
= dictGetEntryVal(de
);
7870 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7871 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7872 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7873 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7875 dictReleaseIterator(di
);
7880 /* Save the expire time */
7881 if (expiretime
!= -1) {
7882 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7883 /* If this key is already expired skip it */
7884 if (expiretime
< now
) continue;
7885 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7886 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7887 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7889 if (swapped
) decrRefCount(o
);
7891 dictReleaseIterator(di
);
7894 /* Make sure data will not remain on the OS's output buffers */
7899 /* Use RENAME to make sure the DB file is changed atomically only
7900 * if the generate DB file is ok. */
7901 if (rename(tmpfile
,filename
) == -1) {
7902 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
7906 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
7912 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
7913 if (di
) dictReleaseIterator(di
);
7917 /* This is how rewriting of the append only file in background works:
7919 * 1) The user calls BGREWRITEAOF
7920 * 2) Redis calls this function, that forks():
7921 * 2a) the child rewrite the append only file in a temp file.
7922 * 2b) the parent accumulates differences in server.bgrewritebuf.
7923 * 3) When the child finished '2a' exists.
7924 * 4) The parent will trap the exit code, if it's OK, will append the
7925 * data accumulated into server.bgrewritebuf into the temp file, and
7926 * finally will rename(2) the temp file in the actual file name.
7927 * The the new file is reopened as the new append only file. Profit!
7929 static int rewriteAppendOnlyFileBackground(void) {
7932 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
7933 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
7934 if ((childpid
= fork()) == 0) {
7938 if (server
.vm_enabled
) vmReopenSwapFile();
7940 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
7941 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
7948 if (childpid
== -1) {
7949 redisLog(REDIS_WARNING
,
7950 "Can't rewrite append only file in background: fork: %s",
7954 redisLog(REDIS_NOTICE
,
7955 "Background append only file rewriting started by pid %d",childpid
);
7956 server
.bgrewritechildpid
= childpid
;
7957 /* We set appendseldb to -1 in order to force the next call to the
7958 * feedAppendOnlyFile() to issue a SELECT command, so the differences
7959 * accumulated by the parent into server.bgrewritebuf will start
7960 * with a SELECT statement and it will be safe to merge. */
7961 server
.appendseldb
= -1;
7964 return REDIS_OK
; /* unreached */
7967 static void bgrewriteaofCommand(redisClient
*c
) {
7968 if (server
.bgrewritechildpid
!= -1) {
7969 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
7972 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
7973 char *status
= "+Background append only file rewriting started\r\n";
7974 addReplySds(c
,sdsnew(status
));
7976 addReply(c
,shared
.err
);
7980 static void aofRemoveTempFile(pid_t childpid
) {
7983 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
7987 /* Virtual Memory is composed mainly of two subsystems:
7988 * - Blocking Virutal Memory
7989 * - Threaded Virtual Memory I/O
7990 * The two parts are not fully decoupled, but functions are split among two
7991 * different sections of the source code (delimited by comments) in order to
7992 * make more clear what functionality is about the blocking VM and what about
7993 * the threaded (not blocking) VM.
7997 * Redis VM is a blocking VM (one that blocks reading swapped values from
7998 * disk into memory when a value swapped out is needed in memory) that is made
7999 * unblocking by trying to examine the command argument vector in order to
8000 * load in background values that will likely be needed in order to exec
8001 * the command. The command is executed only once all the relevant keys
8002 * are loaded into memory.
8004 * This basically is almost as simple of a blocking VM, but almost as parallel
8005 * as a fully non-blocking VM.
8008 /* =================== Virtual Memory - Blocking Side ====================== */
8010 /* substitute the first occurrence of '%p' with the process pid in the
8011 * swap file name. */
8012 static void expandVmSwapFilename(void) {
8013 char *p
= strstr(server
.vm_swap_file
,"%p");
8019 new = sdscat(new,server
.vm_swap_file
);
8020 new = sdscatprintf(new,"%ld",(long) getpid());
8021 new = sdscat(new,p
+2);
8022 zfree(server
.vm_swap_file
);
8023 server
.vm_swap_file
= new;
8026 static void vmInit(void) {
8031 if (server
.vm_max_threads
!= 0)
8032 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8034 expandVmSwapFilename();
8035 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8036 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8037 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8039 if (server
.vm_fp
== NULL
) {
8040 redisLog(REDIS_WARNING
,
8041 "Impossible to open the swap file: %s. Exiting.",
8045 server
.vm_fd
= fileno(server
.vm_fp
);
8046 server
.vm_next_page
= 0;
8047 server
.vm_near_pages
= 0;
8048 server
.vm_stats_used_pages
= 0;
8049 server
.vm_stats_swapped_objects
= 0;
8050 server
.vm_stats_swapouts
= 0;
8051 server
.vm_stats_swapins
= 0;
8052 totsize
= server
.vm_pages
*server
.vm_page_size
;
8053 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8054 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8055 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8059 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8061 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8062 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8063 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8064 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8066 /* Initialize threaded I/O (used by Virtual Memory) */
8067 server
.io_newjobs
= listCreate();
8068 server
.io_processing
= listCreate();
8069 server
.io_processed
= listCreate();
8070 server
.io_ready_clients
= listCreate();
8071 pthread_mutex_init(&server
.io_mutex
,NULL
);
8072 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8073 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8074 server
.io_active_threads
= 0;
8075 if (pipe(pipefds
) == -1) {
8076 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8080 server
.io_ready_pipe_read
= pipefds
[0];
8081 server
.io_ready_pipe_write
= pipefds
[1];
8082 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8083 /* LZF requires a lot of stack */
8084 pthread_attr_init(&server
.io_threads_attr
);
8085 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8086 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8087 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8088 /* Listen for events in the threaded I/O pipe */
8089 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8090 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8091 oom("creating file event");
8094 /* Mark the page as used */
8095 static void vmMarkPageUsed(off_t page
) {
8096 off_t byte
= page
/8;
8098 redisAssert(vmFreePage(page
) == 1);
8099 server
.vm_bitmap
[byte
] |= 1<<bit
;
8102 /* Mark N contiguous pages as used, with 'page' being the first. */
8103 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8106 for (j
= 0; j
< count
; j
++)
8107 vmMarkPageUsed(page
+j
);
8108 server
.vm_stats_used_pages
+= count
;
8109 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8110 (long long)count
, (long long)page
);
8113 /* Mark the page as free */
8114 static void vmMarkPageFree(off_t page
) {
8115 off_t byte
= page
/8;
8117 redisAssert(vmFreePage(page
) == 0);
8118 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8121 /* Mark N contiguous pages as free, with 'page' being the first. */
8122 static void vmMarkPagesFree(off_t page
, off_t count
) {
8125 for (j
= 0; j
< count
; j
++)
8126 vmMarkPageFree(page
+j
);
8127 server
.vm_stats_used_pages
-= count
;
8128 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8129 (long long)count
, (long long)page
);
8132 /* Test if the page is free */
8133 static int vmFreePage(off_t page
) {
8134 off_t byte
= page
/8;
8136 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8139 /* Find N contiguous free pages storing the first page of the cluster in *first.
8140 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8141 * REDIS_ERR is returned.
8143 * This function uses a simple algorithm: we try to allocate
8144 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8145 * again from the start of the swap file searching for free spaces.
8147 * If it looks pretty clear that there are no free pages near our offset
8148 * we try to find less populated places doing a forward jump of
8149 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8150 * without hurry, and then we jump again and so forth...
8152 * This function can be improved using a free list to avoid to guess
8153 * too much, since we could collect data about freed pages.
8155 * note: I implemented this function just after watching an episode of
8156 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8158 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8159 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8161 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8162 server
.vm_near_pages
= 0;
8163 server
.vm_next_page
= 0;
8165 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8166 base
= server
.vm_next_page
;
8168 while(offset
< server
.vm_pages
) {
8169 off_t
this = base
+offset
;
8171 /* If we overflow, restart from page zero */
8172 if (this >= server
.vm_pages
) {
8173 this -= server
.vm_pages
;
8175 /* Just overflowed, what we found on tail is no longer
8176 * interesting, as it's no longer contiguous. */
8180 if (vmFreePage(this)) {
8181 /* This is a free page */
8183 /* Already got N free pages? Return to the caller, with success */
8185 *first
= this-(n
-1);
8186 server
.vm_next_page
= this+1;
8187 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8191 /* The current one is not a free page */
8195 /* Fast-forward if the current page is not free and we already
8196 * searched enough near this place. */
8198 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8199 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8201 /* Note that even if we rewind after the jump, we are don't need
8202 * to make sure numfree is set to zero as we only jump *if* it
8203 * is set to zero. */
8205 /* Otherwise just check the next page */
8212 /* Write the specified object at the specified page of the swap file */
8213 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8214 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8215 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8216 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8217 redisLog(REDIS_WARNING
,
8218 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8222 rdbSaveObject(server
.vm_fp
,o
);
8223 fflush(server
.vm_fp
);
8224 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8228 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8229 * needed to later retrieve the object into the key object.
8230 * If we can't find enough contiguous empty pages to swap the object on disk
8231 * REDIS_ERR is returned. */
8232 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8233 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8236 assert(key
->storage
== REDIS_VM_MEMORY
);
8237 assert(key
->refcount
== 1);
8238 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8239 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8240 key
->vm
.page
= page
;
8241 key
->vm
.usedpages
= pages
;
8242 key
->storage
= REDIS_VM_SWAPPED
;
8243 key
->vtype
= val
->type
;
8244 decrRefCount(val
); /* Deallocate the object from memory. */
8245 vmMarkPagesUsed(page
,pages
);
8246 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8247 (unsigned char*) key
->ptr
,
8248 (unsigned long long) page
, (unsigned long long) pages
);
8249 server
.vm_stats_swapped_objects
++;
8250 server
.vm_stats_swapouts
++;
8254 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8257 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8258 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8259 redisLog(REDIS_WARNING
,
8260 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8264 o
= rdbLoadObject(type
,server
.vm_fp
);
8266 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8269 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8273 /* Load the value object relative to the 'key' object from swap to memory.
8274 * The newly allocated object is returned.
8276 * If preview is true the unserialized object is returned to the caller but
8277 * no changes are made to the key object, nor the pages are marked as freed */
8278 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8281 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8282 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8284 key
->storage
= REDIS_VM_MEMORY
;
8285 key
->vm
.atime
= server
.unixtime
;
8286 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8287 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8288 (unsigned char*) key
->ptr
);
8289 server
.vm_stats_swapped_objects
--;
8291 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8292 (unsigned char*) key
->ptr
);
8294 server
.vm_stats_swapins
++;
8298 /* Plain object loading, from swap to memory */
8299 static robj
*vmLoadObject(robj
*key
) {
8300 /* If we are loading the object in background, stop it, we
8301 * need to load this object synchronously ASAP. */
8302 if (key
->storage
== REDIS_VM_LOADING
)
8303 vmCancelThreadedIOJob(key
);
8304 return vmGenericLoadObject(key
,0);
8307 /* Just load the value on disk, without to modify the key.
8308 * This is useful when we want to perform some operation on the value
8309 * without to really bring it from swap to memory, like while saving the
8310 * dataset or rewriting the append only log. */
8311 static robj
*vmPreviewObject(robj
*key
) {
8312 return vmGenericLoadObject(key
,1);
8315 /* How a good candidate is this object for swapping?
8316 * The better candidate it is, the greater the returned value.
8318 * Currently we try to perform a fast estimation of the object size in
8319 * memory, and combine it with aging informations.
8321 * Basically swappability = idle-time * log(estimated size)
8323 * Bigger objects are preferred over smaller objects, but not
8324 * proportionally, this is why we use the logarithm. This algorithm is
8325 * just a first try and will probably be tuned later. */
8326 static double computeObjectSwappability(robj
*o
) {
8327 time_t age
= server
.unixtime
- o
->vm
.atime
;
8331 struct dictEntry
*de
;
8334 if (age
<= 0) return 0;
8337 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8340 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8345 listNode
*ln
= listFirst(l
);
8347 asize
= sizeof(list
);
8349 robj
*ele
= ln
->value
;
8352 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8353 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8355 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8360 z
= (o
->type
== REDIS_ZSET
);
8361 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8363 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8364 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8369 de
= dictGetRandomKey(d
);
8370 ele
= dictGetEntryKey(de
);
8371 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8372 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8374 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8375 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8379 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8380 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8381 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8382 unsigned int klen
, vlen
;
8383 unsigned char *key
, *val
;
8385 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8389 asize
= len
*(klen
+vlen
+3);
8390 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8392 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8397 de
= dictGetRandomKey(d
);
8398 ele
= dictGetEntryKey(de
);
8399 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8400 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8402 ele
= dictGetEntryVal(de
);
8403 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8404 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8406 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8411 return (double)age
*log(1+asize
);
8414 /* Try to swap an object that's a good candidate for swapping.
8415 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8416 * to swap any object at all.
8418 * If 'usethreaded' is true, Redis will try to swap the object in background
8419 * using I/O threads. */
8420 static int vmSwapOneObject(int usethreads
) {
8422 struct dictEntry
*best
= NULL
;
8423 double best_swappability
= 0;
8424 redisDb
*best_db
= NULL
;
8427 for (j
= 0; j
< server
.dbnum
; j
++) {
8428 redisDb
*db
= server
.db
+j
;
8429 /* Why maxtries is set to 100?
8430 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8431 * are swappable objects */
8434 if (dictSize(db
->dict
) == 0) continue;
8435 for (i
= 0; i
< 5; i
++) {
8437 double swappability
;
8439 if (maxtries
) maxtries
--;
8440 de
= dictGetRandomKey(db
->dict
);
8441 key
= dictGetEntryKey(de
);
8442 val
= dictGetEntryVal(de
);
8443 /* Only swap objects that are currently in memory.
8445 * Also don't swap shared objects if threaded VM is on, as we
8446 * try to ensure that the main thread does not touch the
8447 * object while the I/O thread is using it, but we can't
8448 * control other keys without adding additional mutex. */
8449 if (key
->storage
!= REDIS_VM_MEMORY
||
8450 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8451 if (maxtries
) i
--; /* don't count this try */
8454 swappability
= computeObjectSwappability(val
);
8455 if (!best
|| swappability
> best_swappability
) {
8457 best_swappability
= swappability
;
8462 if (best
== NULL
) return REDIS_ERR
;
8463 key
= dictGetEntryKey(best
);
8464 val
= dictGetEntryVal(best
);
8466 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8467 key
->ptr
, best_swappability
);
8469 /* Unshare the key if needed */
8470 if (key
->refcount
> 1) {
8471 robj
*newkey
= dupStringObject(key
);
8473 key
= dictGetEntryKey(best
) = newkey
;
8477 vmSwapObjectThreaded(key
,val
,best_db
);
8480 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8481 dictGetEntryVal(best
) = NULL
;
8489 static int vmSwapOneObjectBlocking() {
8490 return vmSwapOneObject(0);
8493 static int vmSwapOneObjectThreaded() {
8494 return vmSwapOneObject(1);
8497 /* Return true if it's safe to swap out objects in a given moment.
8498 * Basically we don't want to swap objects out while there is a BGSAVE
8499 * or a BGAEOREWRITE running in backgroud. */
8500 static int vmCanSwapOut(void) {
8501 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8504 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8505 * and was deleted. Otherwise 0 is returned. */
8506 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8510 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8511 foundkey
= dictGetEntryKey(de
);
8512 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8517 /* =================== Virtual Memory - Threaded I/O ======================= */
8519 static void freeIOJob(iojob
*j
) {
8520 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8521 j
->type
== REDIS_IOJOB_DO_SWAP
||
8522 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8523 decrRefCount(j
->val
);
8524 decrRefCount(j
->key
);
8528 /* Every time a thread finished a Job, it writes a byte into the write side
8529 * of an unix pipe in order to "awake" the main thread, and this function
8531 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8535 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8537 REDIS_NOTUSED(mask
);
8538 REDIS_NOTUSED(privdata
);
8540 /* For every byte we read in the read side of the pipe, there is one
8541 * I/O job completed to process. */
8542 while((retval
= read(fd
,buf
,1)) == 1) {
8546 struct dictEntry
*de
;
8548 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8550 /* Get the processed element (the oldest one) */
8552 assert(listLength(server
.io_processed
) != 0);
8553 if (toprocess
== -1) {
8554 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8555 if (toprocess
<= 0) toprocess
= 1;
8557 ln
= listFirst(server
.io_processed
);
8559 listDelNode(server
.io_processed
,ln
);
8561 /* If this job is marked as canceled, just ignore it */
8566 /* Post process it in the main thread, as there are things we
8567 * can do just here to avoid race conditions and/or invasive locks */
8568 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8569 de
= dictFind(j
->db
->dict
,j
->key
);
8571 key
= dictGetEntryKey(de
);
8572 if (j
->type
== REDIS_IOJOB_LOAD
) {
8575 /* Key loaded, bring it at home */
8576 key
->storage
= REDIS_VM_MEMORY
;
8577 key
->vm
.atime
= server
.unixtime
;
8578 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8579 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8580 (unsigned char*) key
->ptr
);
8581 server
.vm_stats_swapped_objects
--;
8582 server
.vm_stats_swapins
++;
8583 dictGetEntryVal(de
) = j
->val
;
8584 incrRefCount(j
->val
);
8587 /* Handle clients waiting for this key to be loaded. */
8588 handleClientsBlockedOnSwappedKey(db
,key
);
8589 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8590 /* Now we know the amount of pages required to swap this object.
8591 * Let's find some space for it, and queue this task again
8592 * rebranded as REDIS_IOJOB_DO_SWAP. */
8593 if (!vmCanSwapOut() ||
8594 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8596 /* Ooops... no space or we can't swap as there is
8597 * a fork()ed Redis trying to save stuff on disk. */
8599 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8601 /* Note that we need to mark this pages as used now,
8602 * if the job will be canceled, we'll mark them as freed
8604 vmMarkPagesUsed(j
->page
,j
->pages
);
8605 j
->type
= REDIS_IOJOB_DO_SWAP
;
8610 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8613 /* Key swapped. We can finally free some memory. */
8614 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8615 printf("key->storage: %d\n",key
->storage
);
8616 printf("key->name: %s\n",(char*)key
->ptr
);
8617 printf("key->refcount: %d\n",key
->refcount
);
8618 printf("val: %p\n",(void*)j
->val
);
8619 printf("val->type: %d\n",j
->val
->type
);
8620 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8622 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8623 val
= dictGetEntryVal(de
);
8624 key
->vm
.page
= j
->page
;
8625 key
->vm
.usedpages
= j
->pages
;
8626 key
->storage
= REDIS_VM_SWAPPED
;
8627 key
->vtype
= j
->val
->type
;
8628 decrRefCount(val
); /* Deallocate the object from memory. */
8629 dictGetEntryVal(de
) = NULL
;
8630 redisLog(REDIS_DEBUG
,
8631 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8632 (unsigned char*) key
->ptr
,
8633 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8634 server
.vm_stats_swapped_objects
++;
8635 server
.vm_stats_swapouts
++;
8637 /* Put a few more swap requests in queue if we are still
8639 if (trytoswap
&& vmCanSwapOut() &&
8640 zmalloc_used_memory() > server
.vm_max_memory
)
8645 more
= listLength(server
.io_newjobs
) <
8646 (unsigned) server
.vm_max_threads
;
8648 /* Don't waste CPU time if swappable objects are rare. */
8649 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8657 if (processed
== toprocess
) return;
8659 if (retval
< 0 && errno
!= EAGAIN
) {
8660 redisLog(REDIS_WARNING
,
8661 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8666 static void lockThreadedIO(void) {
8667 pthread_mutex_lock(&server
.io_mutex
);
8670 static void unlockThreadedIO(void) {
8671 pthread_mutex_unlock(&server
.io_mutex
);
8674 /* Remove the specified object from the threaded I/O queue if still not
8675 * processed, otherwise make sure to flag it as canceled. */
8676 static void vmCancelThreadedIOJob(robj
*o
) {
8678 server
.io_newjobs
, /* 0 */
8679 server
.io_processing
, /* 1 */
8680 server
.io_processed
/* 2 */
8684 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8687 /* Search for a matching key in one of the queues */
8688 for (i
= 0; i
< 3; i
++) {
8692 listRewind(lists
[i
],&li
);
8693 while ((ln
= listNext(&li
)) != NULL
) {
8694 iojob
*job
= ln
->value
;
8696 if (job
->canceled
) continue; /* Skip this, already canceled. */
8697 if (compareStringObjects(job
->key
,o
) == 0) {
8698 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8699 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8700 /* Mark the pages as free since the swap didn't happened
8701 * or happened but is now discarded. */
8702 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8703 vmMarkPagesFree(job
->page
,job
->pages
);
8704 /* Cancel the job. It depends on the list the job is
8707 case 0: /* io_newjobs */
8708 /* If the job was yet not processed the best thing to do
8709 * is to remove it from the queue at all */
8711 listDelNode(lists
[i
],ln
);
8713 case 1: /* io_processing */
8714 /* Oh Shi- the thread is messing with the Job:
8716 * Probably it's accessing the object if this is a
8717 * PREPARE_SWAP or DO_SWAP job.
8718 * If it's a LOAD job it may be reading from disk and
8719 * if we don't wait for the job to terminate before to
8720 * cancel it, maybe in a few microseconds data can be
8721 * corrupted in this pages. So the short story is:
8723 * Better to wait for the job to move into the
8724 * next queue (processed)... */
8726 /* We try again and again until the job is completed. */
8728 /* But let's wait some time for the I/O thread
8729 * to finish with this job. After all this condition
8730 * should be very rare. */
8733 case 2: /* io_processed */
8734 /* The job was already processed, that's easy...
8735 * just mark it as canceled so that we'll ignore it
8736 * when processing completed jobs. */
8740 /* Finally we have to adjust the storage type of the object
8741 * in order to "UNDO" the operaiton. */
8742 if (o
->storage
== REDIS_VM_LOADING
)
8743 o
->storage
= REDIS_VM_SWAPPED
;
8744 else if (o
->storage
== REDIS_VM_SWAPPING
)
8745 o
->storage
= REDIS_VM_MEMORY
;
8752 assert(1 != 1); /* We should never reach this */
8755 static void *IOThreadEntryPoint(void *arg
) {
8760 pthread_detach(pthread_self());
8762 /* Get a new job to process */
8764 if (listLength(server
.io_newjobs
) == 0) {
8765 /* No new jobs in queue, exit. */
8766 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8767 (long) pthread_self());
8768 server
.io_active_threads
--;
8772 ln
= listFirst(server
.io_newjobs
);
8774 listDelNode(server
.io_newjobs
,ln
);
8775 /* Add the job in the processing queue */
8776 j
->thread
= pthread_self();
8777 listAddNodeTail(server
.io_processing
,j
);
8778 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8780 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8781 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8783 /* Process the Job */
8784 if (j
->type
== REDIS_IOJOB_LOAD
) {
8785 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8786 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8787 FILE *fp
= fopen("/dev/null","w+");
8788 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8790 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8791 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8795 /* Done: insert the job into the processed queue */
8796 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8797 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8799 listDelNode(server
.io_processing
,ln
);
8800 listAddNodeTail(server
.io_processed
,j
);
8803 /* Signal the main thread there is new stuff to process */
8804 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8806 return NULL
; /* never reached */
8809 static void spawnIOThread(void) {
8811 sigset_t mask
, omask
;
8815 sigaddset(&mask
,SIGCHLD
);
8816 sigaddset(&mask
,SIGHUP
);
8817 sigaddset(&mask
,SIGPIPE
);
8818 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8819 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8820 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8824 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8825 server
.io_active_threads
++;
8828 /* We need to wait for the last thread to exit before we are able to
8829 * fork() in order to BGSAVE or BGREWRITEAOF. */
8830 static void waitEmptyIOJobsQueue(void) {
8832 int io_processed_len
;
8835 if (listLength(server
.io_newjobs
) == 0 &&
8836 listLength(server
.io_processing
) == 0 &&
8837 server
.io_active_threads
== 0)
8842 /* While waiting for empty jobs queue condition we post-process some
8843 * finshed job, as I/O threads may be hanging trying to write against
8844 * the io_ready_pipe_write FD but there are so much pending jobs that
8846 io_processed_len
= listLength(server
.io_processed
);
8848 if (io_processed_len
) {
8849 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8850 usleep(1000); /* 1 millisecond */
8852 usleep(10000); /* 10 milliseconds */
8857 static void vmReopenSwapFile(void) {
8858 /* Note: we don't close the old one as we are in the child process
8859 * and don't want to mess at all with the original file object. */
8860 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8861 if (server
.vm_fp
== NULL
) {
8862 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8863 server
.vm_swap_file
);
8866 server
.vm_fd
= fileno(server
.vm_fp
);
8869 /* This function must be called while with threaded IO locked */
8870 static void queueIOJob(iojob
*j
) {
8871 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8872 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8873 listAddNodeTail(server
.io_newjobs
,j
);
8874 if (server
.io_active_threads
< server
.vm_max_threads
)
8878 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8881 assert(key
->storage
== REDIS_VM_MEMORY
);
8882 assert(key
->refcount
== 1);
8884 j
= zmalloc(sizeof(*j
));
8885 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8887 j
->key
= dupStringObject(key
);
8891 j
->thread
= (pthread_t
) -1;
8892 key
->storage
= REDIS_VM_SWAPPING
;
8900 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
8902 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
8903 * If there is not already a job loading the key, it is craeted.
8904 * The key is added to the io_keys list in the client structure, and also
8905 * in the hash table mapping swapped keys to waiting clients, that is,
8906 * server.io_waited_keys. */
8907 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
8908 struct dictEntry
*de
;
8912 /* If the key does not exist or is already in RAM we don't need to
8913 * block the client at all. */
8914 de
= dictFind(c
->db
->dict
,key
);
8915 if (de
== NULL
) return 0;
8916 o
= dictGetEntryKey(de
);
8917 if (o
->storage
== REDIS_VM_MEMORY
) {
8919 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
8920 /* We were swapping the key, undo it! */
8921 vmCancelThreadedIOJob(o
);
8925 /* OK: the key is either swapped, or being loaded just now. */
8927 /* Add the key to the list of keys this client is waiting for.
8928 * This maps clients to keys they are waiting for. */
8929 listAddNodeTail(c
->io_keys
,key
);
8932 /* Add the client to the swapped keys => clients waiting map. */
8933 de
= dictFind(c
->db
->io_keys
,key
);
8937 /* For every key we take a list of clients blocked for it */
8939 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
8941 assert(retval
== DICT_OK
);
8943 l
= dictGetEntryVal(de
);
8945 listAddNodeTail(l
,c
);
8947 /* Are we already loading the key from disk? If not create a job */
8948 if (o
->storage
== REDIS_VM_SWAPPED
) {
8951 o
->storage
= REDIS_VM_LOADING
;
8952 j
= zmalloc(sizeof(*j
));
8953 j
->type
= REDIS_IOJOB_LOAD
;
8955 j
->key
= dupStringObject(key
);
8956 j
->key
->vtype
= o
->vtype
;
8957 j
->page
= o
->vm
.page
;
8960 j
->thread
= (pthread_t
) -1;
8968 /* Preload keys needed for the ZUNION and ZINTER commands. */
8969 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
8971 num
= atoi(c
->argv
[2]->ptr
);
8972 for (i
= 0; i
< num
; i
++) {
8973 waitForSwappedKey(c
,c
->argv
[3+i
]);
8977 /* Is this client attempting to run a command against swapped keys?
8978 * If so, block it ASAP, load the keys in background, then resume it.
8980 * The important idea about this function is that it can fail! If keys will
8981 * still be swapped when the client is resumed, this key lookups will
8982 * just block loading keys from disk. In practical terms this should only
8983 * happen with SORT BY command or if there is a bug in this function.
8985 * Return 1 if the client is marked as blocked, 0 if the client can
8986 * continue as the keys it is going to access appear to be in memory. */
8987 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
8990 if (cmd
->vm_preload_proc
!= NULL
) {
8991 cmd
->vm_preload_proc(c
);
8993 if (cmd
->vm_firstkey
== 0) return 0;
8994 last
= cmd
->vm_lastkey
;
8995 if (last
< 0) last
= c
->argc
+last
;
8996 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
8997 waitForSwappedKey(c
,c
->argv
[j
]);
9000 /* If the client was blocked for at least one key, mark it as blocked. */
9001 if (listLength(c
->io_keys
)) {
9002 c
->flags
|= REDIS_IO_WAIT
;
9003 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9004 server
.vm_blocked_clients
++;
9011 /* Remove the 'key' from the list of blocked keys for a given client.
9013 * The function returns 1 when there are no longer blocking keys after
9014 * the current one was removed (and the client can be unblocked). */
9015 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9019 struct dictEntry
*de
;
9021 /* Remove the key from the list of keys this client is waiting for. */
9022 listRewind(c
->io_keys
,&li
);
9023 while ((ln
= listNext(&li
)) != NULL
) {
9024 if (compareStringObjects(ln
->value
,key
) == 0) {
9025 listDelNode(c
->io_keys
,ln
);
9031 /* Remove the client form the key => waiting clients map. */
9032 de
= dictFind(c
->db
->io_keys
,key
);
9034 l
= dictGetEntryVal(de
);
9035 ln
= listSearchKey(l
,c
);
9038 if (listLength(l
) == 0)
9039 dictDelete(c
->db
->io_keys
,key
);
9041 return listLength(c
->io_keys
) == 0;
9044 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9045 struct dictEntry
*de
;
9050 de
= dictFind(db
->io_keys
,key
);
9053 l
= dictGetEntryVal(de
);
9054 len
= listLength(l
);
9055 /* Note: we can't use something like while(listLength(l)) as the list
9056 * can be freed by the calling function when we remove the last element. */
9059 redisClient
*c
= ln
->value
;
9061 if (dontWaitForSwappedKey(c
,key
)) {
9062 /* Put the client in the list of clients ready to go as we
9063 * loaded all the keys about it. */
9064 listAddNodeTail(server
.io_ready_clients
,c
);
9069 /* =========================== Remote Configuration ========================= */
9071 static void configSetCommand(redisClient
*c
) {
9072 robj
*o
= getDecodedObject(c
->argv
[3]);
9073 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9074 zfree(server
.dbfilename
);
9075 server
.dbfilename
= zstrdup(o
->ptr
);
9076 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9077 zfree(server
.requirepass
);
9078 server
.requirepass
= zstrdup(o
->ptr
);
9079 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9080 zfree(server
.masterauth
);
9081 server
.masterauth
= zstrdup(o
->ptr
);
9082 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9083 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9085 addReplySds(c
,sdscatprintf(sdsempty(),
9086 "-ERR not supported CONFIG parameter %s\r\n",
9087 (char*)c
->argv
[2]->ptr
));
9092 addReply(c
,shared
.ok
);
9095 static void configGetCommand(redisClient
*c
) {
9096 robj
*o
= getDecodedObject(c
->argv
[2]);
9097 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9098 char *pattern
= o
->ptr
;
9102 decrRefCount(lenobj
);
9104 if (stringmatch(pattern
,"dbfilename",0)) {
9105 addReplyBulkCString(c
,"dbfilename");
9106 addReplyBulkCString(c
,server
.dbfilename
);
9109 if (stringmatch(pattern
,"requirepass",0)) {
9110 addReplyBulkCString(c
,"requirepass");
9111 addReplyBulkCString(c
,server
.requirepass
);
9114 if (stringmatch(pattern
,"masterauth",0)) {
9115 addReplyBulkCString(c
,"masterauth");
9116 addReplyBulkCString(c
,server
.masterauth
);
9119 if (stringmatch(pattern
,"maxmemory",0)) {
9122 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9123 addReplyBulkCString(c
,"maxmemory");
9124 addReplyBulkCString(c
,buf
);
9128 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9131 static void configCommand(redisClient
*c
) {
9132 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9133 if (c
->argc
!= 4) goto badarity
;
9134 configSetCommand(c
);
9135 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9136 if (c
->argc
!= 3) goto badarity
;
9137 configGetCommand(c
);
9138 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9139 if (c
->argc
!= 2) goto badarity
;
9140 server
.stat_numcommands
= 0;
9141 server
.stat_numconnections
= 0;
9142 server
.stat_expiredkeys
= 0;
9143 server
.stat_starttime
= time(NULL
);
9144 addReply(c
,shared
.ok
);
9146 addReplySds(c
,sdscatprintf(sdsempty(),
9147 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9152 addReplySds(c
,sdscatprintf(sdsempty(),
9153 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9154 (char*) c
->argv
[1]->ptr
));
9157 /* ================================= Debugging ============================== */
9159 static void debugCommand(redisClient
*c
) {
9160 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9162 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9163 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9164 addReply(c
,shared
.err
);
9168 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9169 addReply(c
,shared
.err
);
9172 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9173 addReply(c
,shared
.ok
);
9174 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9176 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9177 addReply(c
,shared
.err
);
9180 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9181 addReply(c
,shared
.ok
);
9182 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9183 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9187 addReply(c
,shared
.nokeyerr
);
9190 key
= dictGetEntryKey(de
);
9191 val
= dictGetEntryVal(de
);
9192 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9193 key
->storage
== REDIS_VM_SWAPPING
)) {
9197 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9198 strenc
= strencoding
[val
->encoding
];
9200 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9203 addReplySds(c
,sdscatprintf(sdsempty(),
9204 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9205 "encoding:%s serializedlength:%lld\r\n",
9206 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9207 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9209 addReplySds(c
,sdscatprintf(sdsempty(),
9210 "+Key at:%p refcount:%d, value swapped at: page %llu "
9211 "using %llu pages\r\n",
9212 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9213 (unsigned long long) key
->vm
.usedpages
));
9215 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9216 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9219 if (!server
.vm_enabled
) {
9220 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9224 addReply(c
,shared
.nokeyerr
);
9227 key
= dictGetEntryKey(de
);
9228 val
= dictGetEntryVal(de
);
9229 /* If the key is shared we want to create a copy */
9230 if (key
->refcount
> 1) {
9231 robj
*newkey
= dupStringObject(key
);
9233 key
= dictGetEntryKey(de
) = newkey
;
9236 if (key
->storage
!= REDIS_VM_MEMORY
) {
9237 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9238 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9239 dictGetEntryVal(de
) = NULL
;
9240 addReply(c
,shared
.ok
);
9242 addReply(c
,shared
.err
);
9245 addReplySds(c
,sdsnew(
9246 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9250 static void _redisAssert(char *estr
, char *file
, int line
) {
9251 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9252 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9253 #ifdef HAVE_BACKTRACE
9254 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9259 /* =================================== Main! ================================ */
9262 int linuxOvercommitMemoryValue(void) {
9263 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9267 if (fgets(buf
,64,fp
) == NULL
) {
9276 void linuxOvercommitMemoryWarning(void) {
9277 if (linuxOvercommitMemoryValue() == 0) {
9278 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9281 #endif /* __linux__ */
9283 static void daemonize(void) {
9287 if (fork() != 0) exit(0); /* parent exits */
9288 setsid(); /* create a new session */
9290 /* Every output goes to /dev/null. If Redis is daemonized but
9291 * the 'logfile' is set to 'stdout' in the configuration file
9292 * it will not log at all. */
9293 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9294 dup2(fd
, STDIN_FILENO
);
9295 dup2(fd
, STDOUT_FILENO
);
9296 dup2(fd
, STDERR_FILENO
);
9297 if (fd
> STDERR_FILENO
) close(fd
);
9299 /* Try to write the pid file */
9300 fp
= fopen(server
.pidfile
,"w");
9302 fprintf(fp
,"%d\n",getpid());
9307 static void version() {
9308 printf("Redis server version %s\n", REDIS_VERSION
);
9312 static void usage() {
9313 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9314 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9318 int main(int argc
, char **argv
) {
9323 if (strcmp(argv
[1], "-v") == 0 ||
9324 strcmp(argv
[1], "--version") == 0) version();
9325 if (strcmp(argv
[1], "--help") == 0) usage();
9326 resetServerSaveParams();
9327 loadServerConfig(argv
[1]);
9328 } else if ((argc
> 2)) {
9331 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9333 if (server
.daemonize
) daemonize();
9335 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9337 linuxOvercommitMemoryWarning();
9340 if (server
.appendonly
) {
9341 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9342 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9344 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9345 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9347 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9348 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9350 aeDeleteEventLoop(server
.el
);
9354 /* ============================= Backtrace support ========================= */
9356 #ifdef HAVE_BACKTRACE
9357 static char *findFuncName(void *pointer
, unsigned long *offset
);
9359 static void *getMcontextEip(ucontext_t
*uc
) {
9360 #if defined(__FreeBSD__)
9361 return (void*) uc
->uc_mcontext
.mc_eip
;
9362 #elif defined(__dietlibc__)
9363 return (void*) uc
->uc_mcontext
.eip
;
9364 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9366 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9368 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9370 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9371 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9372 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9374 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9376 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9377 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9378 #elif defined(__ia64__) /* Linux IA64 */
9379 return (void*) uc
->uc_mcontext
.sc_ip
;
9385 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9387 char **messages
= NULL
;
9388 int i
, trace_size
= 0;
9389 unsigned long offset
=0;
9390 ucontext_t
*uc
= (ucontext_t
*) secret
;
9392 REDIS_NOTUSED(info
);
9394 redisLog(REDIS_WARNING
,
9395 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9396 infostring
= genRedisInfoString();
9397 redisLog(REDIS_WARNING
, "%s",infostring
);
9398 /* It's not safe to sdsfree() the returned string under memory
9399 * corruption conditions. Let it leak as we are going to abort */
9401 trace_size
= backtrace(trace
, 100);
9402 /* overwrite sigaction with caller's address */
9403 if (getMcontextEip(uc
) != NULL
) {
9404 trace
[1] = getMcontextEip(uc
);
9406 messages
= backtrace_symbols(trace
, trace_size
);
9408 for (i
=1; i
<trace_size
; ++i
) {
9409 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9411 p
= strchr(messages
[i
],'+');
9412 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9413 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9415 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9418 /* free(messages); Don't call free() with possibly corrupted memory. */
9422 static void setupSigSegvAction(void) {
9423 struct sigaction act
;
9425 sigemptyset (&act
.sa_mask
);
9426 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9427 * is used. Otherwise, sa_handler is used */
9428 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9429 act
.sa_sigaction
= segvHandler
;
9430 sigaction (SIGSEGV
, &act
, NULL
);
9431 sigaction (SIGBUS
, &act
, NULL
);
9432 sigaction (SIGFPE
, &act
, NULL
);
9433 sigaction (SIGILL
, &act
, NULL
);
9434 sigaction (SIGBUS
, &act
, NULL
);
9438 #include "staticsymbols.h"
9439 /* This function try to convert a pointer into a function name. It's used in
9440 * oreder to provide a backtrace under segmentation fault that's able to
9441 * display functions declared as static (otherwise the backtrace is useless). */
9442 static char *findFuncName(void *pointer
, unsigned long *offset
){
9444 unsigned long off
, minoff
= 0;
9446 /* Try to match against the Symbol with the smallest offset */
9447 for (i
=0; symsTable
[i
].pointer
; i
++) {
9448 unsigned long lp
= (unsigned long) pointer
;
9450 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9451 off
=lp
-symsTable
[i
].pointer
;
9452 if (ret
< 0 || off
< minoff
) {
9458 if (ret
== -1) return NULL
;
9460 return symsTable
[ret
].name
;
9462 #else /* HAVE_BACKTRACE */
9463 static void setupSigSegvAction(void) {
9465 #endif /* HAVE_BACKTRACE */