2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.7"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 4
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
337 /* Global server state structure */
342 dict
*sharingpool
; /* Poll used for object sharing */
343 unsigned int sharingpoolsize
;
344 long long dirty
; /* changes to DB from the last save */
346 list
*slaves
, *monitors
;
347 char neterr
[ANET_ERR_LEN
];
349 int cronloops
; /* number of times the cron function run */
350 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
351 time_t lastsave
; /* Unix time of last save succeeede */
352 /* Fields used only for stats */
353 time_t stat_starttime
; /* server start time */
354 long long stat_numcommands
; /* number of processed commands */
355 long long stat_numconnections
; /* number of connections received */
356 long long stat_expiredkeys
; /* number of expired keys */
369 pid_t bgsavechildpid
;
370 pid_t bgrewritechildpid
;
371 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
372 struct saveparam
*saveparams
;
377 char *appendfilename
;
381 /* Replication related */
386 redisClient
*master
; /* client that is master for this slave */
388 unsigned int maxclients
;
389 unsigned long long maxmemory
;
390 unsigned int blpop_blocked_clients
;
391 unsigned int vm_blocked_clients
;
392 /* Sort parameters - qsort_r() is only available under BSD so we
393 * have to take this state global, in order to pass it to sortCompare() */
397 /* Virtual memory configuration */
402 unsigned long long vm_max_memory
;
404 size_t hash_max_zipmap_entries
;
405 size_t hash_max_zipmap_value
;
406 /* Virtual memory state */
409 off_t vm_next_page
; /* Next probably empty page */
410 off_t vm_near_pages
; /* Number of pages allocated sequentially */
411 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
412 time_t unixtime
; /* Unix time sampled every second. */
413 /* Virtual memory I/O threads stuff */
414 /* An I/O thread process an element taken from the io_jobs queue and
415 * put the result of the operation in the io_done list. While the
416 * job is being processed, it's put on io_processing queue. */
417 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
418 list
*io_processing
; /* List of VM I/O jobs being processed */
419 list
*io_processed
; /* List of VM I/O jobs already processed */
420 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
421 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
422 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
423 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
424 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
425 int io_active_threads
; /* Number of running I/O threads */
426 int vm_max_threads
; /* Max number of I/O threads running at the same time */
427 /* Our main thread is blocked on the event loop, locking for sockets ready
428 * to be read or written, so when a threaded I/O operation is ready to be
429 * processed by the main thread, the I/O thread will use a unix pipe to
430 * awake the main thread. The followings are the two pipe FDs. */
431 int io_ready_pipe_read
;
432 int io_ready_pipe_write
;
433 /* Virtual memory stats */
434 unsigned long long vm_stats_used_pages
;
435 unsigned long long vm_stats_swapped_objects
;
436 unsigned long long vm_stats_swapouts
;
437 unsigned long long vm_stats_swapins
;
441 typedef void redisCommandProc(redisClient
*c
);
442 struct redisCommand
{
444 redisCommandProc
*proc
;
447 /* Use a function to determine which keys need to be loaded
448 * in the background prior to executing this command. Takes precedence
449 * over vm_firstkey and others, ignored when NULL */
450 redisCommandProc
*vm_preload_proc
;
451 /* What keys should be loaded in background when calling this command? */
452 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
453 int vm_lastkey
; /* THe last argument that's a key */
454 int vm_keystep
; /* The step between first and last key */
457 struct redisFunctionSym
{
459 unsigned long pointer
;
462 typedef struct _redisSortObject
{
470 typedef struct _redisSortOperation
{
473 } redisSortOperation
;
475 /* ZSETs use a specialized version of Skiplists */
477 typedef struct zskiplistNode
{
478 struct zskiplistNode
**forward
;
479 struct zskiplistNode
*backward
;
485 typedef struct zskiplist
{
486 struct zskiplistNode
*header
, *tail
;
487 unsigned long length
;
491 typedef struct zset
{
496 /* Our shared "common" objects */
498 struct sharedObjectsStruct
{
499 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
500 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
501 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
502 *outofrangeerr
, *plus
,
503 *select0
, *select1
, *select2
, *select3
, *select4
,
504 *select5
, *select6
, *select7
, *select8
, *select9
;
507 /* Global vars that are actally used as constants. The following double
508 * values are used for double on-disk serialization, and are initialized
509 * at runtime to avoid strange compiler optimizations. */
511 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
513 /* VM threaded I/O request message */
514 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
515 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
516 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
517 typedef struct iojob
{
518 int type
; /* Request type, REDIS_IOJOB_* */
519 redisDb
*db
;/* Redis database */
520 robj
*key
; /* This I/O request is about swapping this key */
521 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
522 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
523 off_t page
; /* Swap page where to read/write the object */
524 off_t pages
; /* Swap pages needed to safe object. PREPARE_SWAP return val */
525 int canceled
; /* True if this command was canceled by blocking side of VM */
526 pthread_t thread
; /* ID of the thread processing this entry */
529 /*================================ Prototypes =============================== */
531 static void freeStringObject(robj
*o
);
532 static void freeListObject(robj
*o
);
533 static void freeSetObject(robj
*o
);
534 static void decrRefCount(void *o
);
535 static robj
*createObject(int type
, void *ptr
);
536 static void freeClient(redisClient
*c
);
537 static int rdbLoad(char *filename
);
538 static void addReply(redisClient
*c
, robj
*obj
);
539 static void addReplySds(redisClient
*c
, sds s
);
540 static void incrRefCount(robj
*o
);
541 static int rdbSaveBackground(char *filename
);
542 static robj
*createStringObject(char *ptr
, size_t len
);
543 static robj
*dupStringObject(robj
*o
);
544 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
545 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
546 static int syncWithMaster(void);
547 static robj
*tryObjectSharing(robj
*o
);
548 static int tryObjectEncoding(robj
*o
);
549 static robj
*getDecodedObject(robj
*o
);
550 static int removeExpire(redisDb
*db
, robj
*key
);
551 static int expireIfNeeded(redisDb
*db
, robj
*key
);
552 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
553 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
554 static int deleteKey(redisDb
*db
, robj
*key
);
555 static time_t getExpire(redisDb
*db
, robj
*key
);
556 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
557 static void updateSlavesWaitingBgsave(int bgsaveerr
);
558 static void freeMemoryIfNeeded(void);
559 static int processCommand(redisClient
*c
);
560 static void setupSigSegvAction(void);
561 static void rdbRemoveTempFile(pid_t childpid
);
562 static void aofRemoveTempFile(pid_t childpid
);
563 static size_t stringObjectLen(robj
*o
);
564 static void processInputBuffer(redisClient
*c
);
565 static zskiplist
*zslCreate(void);
566 static void zslFree(zskiplist
*zsl
);
567 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
568 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
569 static void initClientMultiState(redisClient
*c
);
570 static void freeClientMultiState(redisClient
*c
);
571 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
572 static void unblockClientWaitingData(redisClient
*c
);
573 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
574 static void vmInit(void);
575 static void vmMarkPagesFree(off_t page
, off_t count
);
576 static robj
*vmLoadObject(robj
*key
);
577 static robj
*vmPreviewObject(robj
*key
);
578 static int vmSwapOneObjectBlocking(void);
579 static int vmSwapOneObjectThreaded(void);
580 static int vmCanSwapOut(void);
581 static int tryFreeOneObjectFromFreelist(void);
582 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
583 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
584 static void vmCancelThreadedIOJob(robj
*o
);
585 static void lockThreadedIO(void);
586 static void unlockThreadedIO(void);
587 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
588 static void freeIOJob(iojob
*j
);
589 static void queueIOJob(iojob
*j
);
590 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
591 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
592 static void waitEmptyIOJobsQueue(void);
593 static void vmReopenSwapFile(void);
594 static int vmFreePage(off_t page
);
595 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
596 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
597 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
598 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
599 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static struct redisCommand
*lookupCommand(char *name
);
601 static void call(redisClient
*c
, struct redisCommand
*cmd
);
602 static void resetClient(redisClient
*c
);
603 static void convertToRealHash(robj
*o
);
605 static void authCommand(redisClient
*c
);
606 static void pingCommand(redisClient
*c
);
607 static void echoCommand(redisClient
*c
);
608 static void setCommand(redisClient
*c
);
609 static void setnxCommand(redisClient
*c
);
610 static void getCommand(redisClient
*c
);
611 static void delCommand(redisClient
*c
);
612 static void existsCommand(redisClient
*c
);
613 static void incrCommand(redisClient
*c
);
614 static void decrCommand(redisClient
*c
);
615 static void incrbyCommand(redisClient
*c
);
616 static void decrbyCommand(redisClient
*c
);
617 static void selectCommand(redisClient
*c
);
618 static void randomkeyCommand(redisClient
*c
);
619 static void keysCommand(redisClient
*c
);
620 static void dbsizeCommand(redisClient
*c
);
621 static void lastsaveCommand(redisClient
*c
);
622 static void saveCommand(redisClient
*c
);
623 static void bgsaveCommand(redisClient
*c
);
624 static void bgrewriteaofCommand(redisClient
*c
);
625 static void shutdownCommand(redisClient
*c
);
626 static void moveCommand(redisClient
*c
);
627 static void renameCommand(redisClient
*c
);
628 static void renamenxCommand(redisClient
*c
);
629 static void lpushCommand(redisClient
*c
);
630 static void rpushCommand(redisClient
*c
);
631 static void lpopCommand(redisClient
*c
);
632 static void rpopCommand(redisClient
*c
);
633 static void llenCommand(redisClient
*c
);
634 static void lindexCommand(redisClient
*c
);
635 static void lrangeCommand(redisClient
*c
);
636 static void ltrimCommand(redisClient
*c
);
637 static void typeCommand(redisClient
*c
);
638 static void lsetCommand(redisClient
*c
);
639 static void saddCommand(redisClient
*c
);
640 static void sremCommand(redisClient
*c
);
641 static void smoveCommand(redisClient
*c
);
642 static void sismemberCommand(redisClient
*c
);
643 static void scardCommand(redisClient
*c
);
644 static void spopCommand(redisClient
*c
);
645 static void srandmemberCommand(redisClient
*c
);
646 static void sinterCommand(redisClient
*c
);
647 static void sinterstoreCommand(redisClient
*c
);
648 static void sunionCommand(redisClient
*c
);
649 static void sunionstoreCommand(redisClient
*c
);
650 static void sdiffCommand(redisClient
*c
);
651 static void sdiffstoreCommand(redisClient
*c
);
652 static void syncCommand(redisClient
*c
);
653 static void flushdbCommand(redisClient
*c
);
654 static void flushallCommand(redisClient
*c
);
655 static void sortCommand(redisClient
*c
);
656 static void lremCommand(redisClient
*c
);
657 static void rpoplpushcommand(redisClient
*c
);
658 static void infoCommand(redisClient
*c
);
659 static void mgetCommand(redisClient
*c
);
660 static void monitorCommand(redisClient
*c
);
661 static void expireCommand(redisClient
*c
);
662 static void expireatCommand(redisClient
*c
);
663 static void getsetCommand(redisClient
*c
);
664 static void ttlCommand(redisClient
*c
);
665 static void slaveofCommand(redisClient
*c
);
666 static void debugCommand(redisClient
*c
);
667 static void msetCommand(redisClient
*c
);
668 static void msetnxCommand(redisClient
*c
);
669 static void zaddCommand(redisClient
*c
);
670 static void zincrbyCommand(redisClient
*c
);
671 static void zrangeCommand(redisClient
*c
);
672 static void zrangebyscoreCommand(redisClient
*c
);
673 static void zcountCommand(redisClient
*c
);
674 static void zrevrangeCommand(redisClient
*c
);
675 static void zcardCommand(redisClient
*c
);
676 static void zremCommand(redisClient
*c
);
677 static void zscoreCommand(redisClient
*c
);
678 static void zremrangebyscoreCommand(redisClient
*c
);
679 static void multiCommand(redisClient
*c
);
680 static void execCommand(redisClient
*c
);
681 static void discardCommand(redisClient
*c
);
682 static void blpopCommand(redisClient
*c
);
683 static void brpopCommand(redisClient
*c
);
684 static void appendCommand(redisClient
*c
);
685 static void substrCommand(redisClient
*c
);
686 static void zrankCommand(redisClient
*c
);
687 static void zrevrankCommand(redisClient
*c
);
688 static void hsetCommand(redisClient
*c
);
689 static void hgetCommand(redisClient
*c
);
690 static void hdelCommand(redisClient
*c
);
691 static void hlenCommand(redisClient
*c
);
692 static void zremrangebyrankCommand(redisClient
*c
);
693 static void zunionCommand(redisClient
*c
);
694 static void zinterCommand(redisClient
*c
);
695 static void hkeysCommand(redisClient
*c
);
696 static void hvalsCommand(redisClient
*c
);
697 static void hgetallCommand(redisClient
*c
);
698 static void hexistsCommand(redisClient
*c
);
699 static void configCommand(redisClient
*c
);
701 /*================================= Globals ================================= */
704 static struct redisServer server
; /* server global state */
705 static struct redisCommand cmdTable
[] = {
706 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
707 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
708 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
709 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
710 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
711 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
712 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
713 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
714 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
715 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
716 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
717 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
718 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
719 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
720 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
721 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
723 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
725 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
726 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
727 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
728 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
729 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
730 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
731 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
732 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
733 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
737 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
738 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
739 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
740 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
741 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
742 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
745 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
746 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
749 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
750 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
758 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
759 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
760 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
761 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
766 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
770 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
771 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
772 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
773 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
777 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
778 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
779 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
780 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
781 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
782 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
783 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
785 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
786 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
787 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
788 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
791 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
795 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
802 {NULL
,NULL
,0,0,NULL
,0,0,0}
807 /*============================ Utility functions ============================ */
809 /* Glob-style pattern matching. */
810 static int stringmatchlen(const char *pattern
, int patternLen
,
811 const char *string
, int stringLen
, int nocase
)
816 while (pattern
[1] == '*') {
821 return 1; /* match */
823 if (stringmatchlen(pattern
+1, patternLen
-1,
824 string
, stringLen
, nocase
))
825 return 1; /* match */
829 return 0; /* no match */
833 return 0; /* no match */
843 not = pattern
[0] == '^';
850 if (pattern
[0] == '\\') {
853 if (pattern
[0] == string
[0])
855 } else if (pattern
[0] == ']') {
857 } else if (patternLen
== 0) {
861 } else if (pattern
[1] == '-' && patternLen
>= 3) {
862 int start
= pattern
[0];
863 int end
= pattern
[2];
871 start
= tolower(start
);
877 if (c
>= start
&& c
<= end
)
881 if (pattern
[0] == string
[0])
884 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
894 return 0; /* no match */
900 if (patternLen
>= 2) {
907 if (pattern
[0] != string
[0])
908 return 0; /* no match */
910 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
911 return 0; /* no match */
919 if (stringLen
== 0) {
920 while(*pattern
== '*') {
927 if (patternLen
== 0 && stringLen
== 0)
932 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
933 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
936 static void redisLog(int level
, const char *fmt
, ...) {
940 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
944 if (level
>= server
.verbosity
) {
950 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
951 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
952 vfprintf(fp
, fmt
, ap
);
958 if (server
.logfile
) fclose(fp
);
961 /*====================== Hash table type implementation ==================== */
963 /* This is an hash table type that uses the SDS dynamic strings libary as
964 * keys and radis objects as values (objects can hold SDS strings,
967 static void dictVanillaFree(void *privdata
, void *val
)
969 DICT_NOTUSED(privdata
);
973 static void dictListDestructor(void *privdata
, void *val
)
975 DICT_NOTUSED(privdata
);
976 listRelease((list
*)val
);
979 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
983 DICT_NOTUSED(privdata
);
985 l1
= sdslen((sds
)key1
);
986 l2
= sdslen((sds
)key2
);
987 if (l1
!= l2
) return 0;
988 return memcmp(key1
, key2
, l1
) == 0;
991 static void dictRedisObjectDestructor(void *privdata
, void *val
)
993 DICT_NOTUSED(privdata
);
995 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
999 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1002 const robj
*o1
= key1
, *o2
= key2
;
1003 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1006 static unsigned int dictObjHash(const void *key
) {
1007 const robj
*o
= key
;
1008 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1011 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1014 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1017 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1018 o2
->encoding
== REDIS_ENCODING_INT
&&
1019 o1
->ptr
== o2
->ptr
) return 1;
1021 o1
= getDecodedObject(o1
);
1022 o2
= getDecodedObject(o2
);
1023 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1029 static unsigned int dictEncObjHash(const void *key
) {
1030 robj
*o
= (robj
*) key
;
1032 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1033 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1035 if (o
->encoding
== REDIS_ENCODING_INT
) {
1039 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1040 return dictGenHashFunction((unsigned char*)buf
, len
);
1044 o
= getDecodedObject(o
);
1045 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1052 /* Sets type and expires */
1053 static dictType setDictType
= {
1054 dictEncObjHash
, /* hash function */
1057 dictEncObjKeyCompare
, /* key compare */
1058 dictRedisObjectDestructor
, /* key destructor */
1059 NULL
/* val destructor */
1062 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1063 static dictType zsetDictType
= {
1064 dictEncObjHash
, /* hash function */
1067 dictEncObjKeyCompare
, /* key compare */
1068 dictRedisObjectDestructor
, /* key destructor */
1069 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1073 static dictType dbDictType
= {
1074 dictObjHash
, /* hash function */
1077 dictObjKeyCompare
, /* key compare */
1078 dictRedisObjectDestructor
, /* key destructor */
1079 dictRedisObjectDestructor
/* val destructor */
1083 static dictType keyptrDictType
= {
1084 dictObjHash
, /* hash function */
1087 dictObjKeyCompare
, /* key compare */
1088 dictRedisObjectDestructor
, /* key destructor */
1089 NULL
/* val destructor */
1092 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1093 static dictType hashDictType
= {
1094 dictEncObjHash
, /* hash function */
1097 dictEncObjKeyCompare
, /* key compare */
1098 dictRedisObjectDestructor
, /* key destructor */
1099 dictRedisObjectDestructor
/* val destructor */
1102 /* Keylist hash table type has unencoded redis objects as keys and
1103 * lists as values. It's used for blocking operations (BLPOP) and to
1104 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1105 static dictType keylistDictType
= {
1106 dictObjHash
, /* hash function */
1109 dictObjKeyCompare
, /* key compare */
1110 dictRedisObjectDestructor
, /* key destructor */
1111 dictListDestructor
/* val destructor */
1114 static void version();
1116 /* ========================= Random utility functions ======================= */
1118 /* Redis generally does not try to recover from out of memory conditions
1119 * when allocating objects or strings, it is not clear if it will be possible
1120 * to report this condition to the client since the networking layer itself
1121 * is based on heap allocation for send buffers, so we simply abort.
1122 * At least the code will be simpler to read... */
1123 static void oom(const char *msg
) {
1124 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1129 /* ====================== Redis server networking stuff ===================== */
1130 static void closeTimedoutClients(void) {
1133 time_t now
= time(NULL
);
1136 listRewind(server
.clients
,&li
);
1137 while ((ln
= listNext(&li
)) != NULL
) {
1138 c
= listNodeValue(ln
);
1139 if (server
.maxidletime
&&
1140 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1141 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1142 (now
- c
->lastinteraction
> server
.maxidletime
))
1144 redisLog(REDIS_VERBOSE
,"Closing idle client");
1146 } else if (c
->flags
& REDIS_BLOCKED
) {
1147 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1148 addReply(c
,shared
.nullmultibulk
);
1149 unblockClientWaitingData(c
);
1155 static int htNeedsResize(dict
*dict
) {
1156 long long size
, used
;
1158 size
= dictSlots(dict
);
1159 used
= dictSize(dict
);
1160 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1161 (used
*100/size
< REDIS_HT_MINFILL
));
1164 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1165 * we resize the hash table to save memory */
1166 static void tryResizeHashTables(void) {
1169 for (j
= 0; j
< server
.dbnum
; j
++) {
1170 if (htNeedsResize(server
.db
[j
].dict
)) {
1171 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1172 dictResize(server
.db
[j
].dict
);
1173 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1175 if (htNeedsResize(server
.db
[j
].expires
))
1176 dictResize(server
.db
[j
].expires
);
1180 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1181 void backgroundSaveDoneHandler(int statloc
) {
1182 int exitcode
= WEXITSTATUS(statloc
);
1183 int bysignal
= WIFSIGNALED(statloc
);
1185 if (!bysignal
&& exitcode
== 0) {
1186 redisLog(REDIS_NOTICE
,
1187 "Background saving terminated with success");
1189 server
.lastsave
= time(NULL
);
1190 } else if (!bysignal
&& exitcode
!= 0) {
1191 redisLog(REDIS_WARNING
, "Background saving error");
1193 redisLog(REDIS_WARNING
,
1194 "Background saving terminated by signal");
1195 rdbRemoveTempFile(server
.bgsavechildpid
);
1197 server
.bgsavechildpid
= -1;
1198 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1199 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1200 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1203 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1205 void backgroundRewriteDoneHandler(int statloc
) {
1206 int exitcode
= WEXITSTATUS(statloc
);
1207 int bysignal
= WIFSIGNALED(statloc
);
1209 if (!bysignal
&& exitcode
== 0) {
1213 redisLog(REDIS_NOTICE
,
1214 "Background append only file rewriting terminated with success");
1215 /* Now it's time to flush the differences accumulated by the parent */
1216 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1217 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1219 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1222 /* Flush our data... */
1223 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1224 (signed) sdslen(server
.bgrewritebuf
)) {
1225 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1229 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1230 /* Now our work is to rename the temp file into the stable file. And
1231 * switch the file descriptor used by the server for append only. */
1232 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1233 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1237 /* Mission completed... almost */
1238 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1239 if (server
.appendfd
!= -1) {
1240 /* If append only is actually enabled... */
1241 close(server
.appendfd
);
1242 server
.appendfd
= fd
;
1244 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1245 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1247 /* If append only is disabled we just generate a dump in this
1248 * format. Why not? */
1251 } else if (!bysignal
&& exitcode
!= 0) {
1252 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1254 redisLog(REDIS_WARNING
,
1255 "Background append only file rewriting terminated by signal");
1258 sdsfree(server
.bgrewritebuf
);
1259 server
.bgrewritebuf
= sdsempty();
1260 aofRemoveTempFile(server
.bgrewritechildpid
);
1261 server
.bgrewritechildpid
= -1;
1264 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1265 int j
, loops
= server
.cronloops
++;
1266 REDIS_NOTUSED(eventLoop
);
1268 REDIS_NOTUSED(clientData
);
1270 /* We take a cached value of the unix time in the global state because
1271 * with virtual memory and aging there is to store the current time
1272 * in objects at every object access, and accuracy is not needed.
1273 * To access a global var is faster than calling time(NULL) */
1274 server
.unixtime
= time(NULL
);
1276 /* Show some info about non-empty databases */
1277 for (j
= 0; j
< server
.dbnum
; j
++) {
1278 long long size
, used
, vkeys
;
1280 size
= dictSlots(server
.db
[j
].dict
);
1281 used
= dictSize(server
.db
[j
].dict
);
1282 vkeys
= dictSize(server
.db
[j
].expires
);
1283 if (!(loops
% 50) && (used
|| vkeys
)) {
1284 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1285 /* dictPrintStats(server.dict); */
1289 /* We don't want to resize the hash tables while a bacground saving
1290 * is in progress: the saving child is created using fork() that is
1291 * implemented with a copy-on-write semantic in most modern systems, so
1292 * if we resize the HT while there is the saving child at work actually
1293 * a lot of memory movements in the parent will cause a lot of pages
1295 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1297 /* Show information about connected clients */
1298 if (!(loops
% 50)) {
1299 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1300 listLength(server
.clients
)-listLength(server
.slaves
),
1301 listLength(server
.slaves
),
1302 zmalloc_used_memory(),
1303 dictSize(server
.sharingpool
));
1306 /* Close connections of timedout clients */
1307 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1308 closeTimedoutClients();
1310 /* Check if a background saving or AOF rewrite in progress terminated */
1311 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1315 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1316 if (pid
== server
.bgsavechildpid
) {
1317 backgroundSaveDoneHandler(statloc
);
1319 backgroundRewriteDoneHandler(statloc
);
1323 /* If there is not a background saving in progress check if
1324 * we have to save now */
1325 time_t now
= time(NULL
);
1326 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1327 struct saveparam
*sp
= server
.saveparams
+j
;
1329 if (server
.dirty
>= sp
->changes
&&
1330 now
-server
.lastsave
> sp
->seconds
) {
1331 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1332 sp
->changes
, sp
->seconds
);
1333 rdbSaveBackground(server
.dbfilename
);
1339 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1340 * will use few CPU cycles if there are few expiring keys, otherwise
1341 * it will get more aggressive to avoid that too much memory is used by
1342 * keys that can be removed from the keyspace. */
1343 for (j
= 0; j
< server
.dbnum
; j
++) {
1345 redisDb
*db
= server
.db
+j
;
1347 /* Continue to expire if at the end of the cycle more than 25%
1348 * of the keys were expired. */
1350 long num
= dictSize(db
->expires
);
1351 time_t now
= time(NULL
);
1354 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1355 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1360 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1361 t
= (time_t) dictGetEntryVal(de
);
1363 deleteKey(db
,dictGetEntryKey(de
));
1365 server
.stat_expiredkeys
++;
1368 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1371 /* Swap a few keys on disk if we are over the memory limit and VM
1372 * is enbled. Try to free objects from the free list first. */
1373 if (vmCanSwapOut()) {
1374 while (server
.vm_enabled
&& zmalloc_used_memory() >
1375 server
.vm_max_memory
)
1379 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1380 retval
= (server
.vm_max_threads
== 0) ?
1381 vmSwapOneObjectBlocking() :
1382 vmSwapOneObjectThreaded();
1383 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1384 zmalloc_used_memory() >
1385 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1387 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1389 /* Note that when using threade I/O we free just one object,
1390 * because anyway when the I/O thread in charge to swap this
1391 * object out will finish, the handler of completed jobs
1392 * will try to swap more objects if we are still out of memory. */
1393 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1397 /* Check if we should connect to a MASTER */
1398 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1399 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1400 if (syncWithMaster() == REDIS_OK
) {
1401 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1407 /* This function gets called every time Redis is entering the
1408 * main loop of the event driven library, that is, before to sleep
1409 * for ready file descriptors. */
1410 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1411 REDIS_NOTUSED(eventLoop
);
1413 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1417 listRewind(server
.io_ready_clients
,&li
);
1418 while((ln
= listNext(&li
))) {
1419 redisClient
*c
= ln
->value
;
1420 struct redisCommand
*cmd
;
1422 /* Resume the client. */
1423 listDelNode(server
.io_ready_clients
,ln
);
1424 c
->flags
&= (~REDIS_IO_WAIT
);
1425 server
.vm_blocked_clients
--;
1426 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1427 readQueryFromClient
, c
);
1428 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1429 assert(cmd
!= NULL
);
1432 /* There may be more data to process in the input buffer. */
1433 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1434 processInputBuffer(c
);
1439 static void createSharedObjects(void) {
1440 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1441 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1442 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1443 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1444 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1445 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1446 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1447 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1448 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1449 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1450 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1451 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1452 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1453 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1454 "-ERR no such key\r\n"));
1455 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1456 "-ERR syntax error\r\n"));
1457 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1458 "-ERR source and destination objects are the same\r\n"));
1459 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1460 "-ERR index out of range\r\n"));
1461 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1462 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1463 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1464 shared
.select0
= createStringObject("select 0\r\n",10);
1465 shared
.select1
= createStringObject("select 1\r\n",10);
1466 shared
.select2
= createStringObject("select 2\r\n",10);
1467 shared
.select3
= createStringObject("select 3\r\n",10);
1468 shared
.select4
= createStringObject("select 4\r\n",10);
1469 shared
.select5
= createStringObject("select 5\r\n",10);
1470 shared
.select6
= createStringObject("select 6\r\n",10);
1471 shared
.select7
= createStringObject("select 7\r\n",10);
1472 shared
.select8
= createStringObject("select 8\r\n",10);
1473 shared
.select9
= createStringObject("select 9\r\n",10);
1476 static void appendServerSaveParams(time_t seconds
, int changes
) {
1477 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1478 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1479 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1480 server
.saveparamslen
++;
1483 static void resetServerSaveParams() {
1484 zfree(server
.saveparams
);
1485 server
.saveparams
= NULL
;
1486 server
.saveparamslen
= 0;
1489 static void initServerConfig() {
1490 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1491 server
.port
= REDIS_SERVERPORT
;
1492 server
.verbosity
= REDIS_VERBOSE
;
1493 server
.maxidletime
= REDIS_MAXIDLETIME
;
1494 server
.saveparams
= NULL
;
1495 server
.logfile
= NULL
; /* NULL = log on standard output */
1496 server
.bindaddr
= NULL
;
1497 server
.glueoutputbuf
= 1;
1498 server
.daemonize
= 0;
1499 server
.appendonly
= 0;
1500 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1501 server
.lastfsync
= time(NULL
);
1502 server
.appendfd
= -1;
1503 server
.appendseldb
= -1; /* Make sure the first time will not match */
1504 server
.pidfile
= zstrdup("/var/run/redis.pid");
1505 server
.dbfilename
= zstrdup("dump.rdb");
1506 server
.appendfilename
= zstrdup("appendonly.aof");
1507 server
.requirepass
= NULL
;
1508 server
.shareobjects
= 0;
1509 server
.rdbcompression
= 1;
1510 server
.sharingpoolsize
= 1024;
1511 server
.maxclients
= 0;
1512 server
.blpop_blocked_clients
= 0;
1513 server
.maxmemory
= 0;
1514 server
.vm_enabled
= 0;
1515 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1516 server
.vm_page_size
= 256; /* 256 bytes per page */
1517 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1518 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1519 server
.vm_max_threads
= 4;
1520 server
.vm_blocked_clients
= 0;
1521 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1522 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1524 resetServerSaveParams();
1526 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1527 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1528 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1529 /* Replication related */
1531 server
.masterauth
= NULL
;
1532 server
.masterhost
= NULL
;
1533 server
.masterport
= 6379;
1534 server
.master
= NULL
;
1535 server
.replstate
= REDIS_REPL_NONE
;
1537 /* Double constants initialization */
1539 R_PosInf
= 1.0/R_Zero
;
1540 R_NegInf
= -1.0/R_Zero
;
1541 R_Nan
= R_Zero
/R_Zero
;
1544 static void initServer() {
1547 signal(SIGHUP
, SIG_IGN
);
1548 signal(SIGPIPE
, SIG_IGN
);
1549 setupSigSegvAction();
1551 server
.devnull
= fopen("/dev/null","w");
1552 if (server
.devnull
== NULL
) {
1553 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1556 server
.clients
= listCreate();
1557 server
.slaves
= listCreate();
1558 server
.monitors
= listCreate();
1559 server
.objfreelist
= listCreate();
1560 createSharedObjects();
1561 server
.el
= aeCreateEventLoop();
1562 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1563 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1564 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1565 if (server
.fd
== -1) {
1566 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1569 for (j
= 0; j
< server
.dbnum
; j
++) {
1570 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1571 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1572 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1573 if (server
.vm_enabled
)
1574 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1575 server
.db
[j
].id
= j
;
1577 server
.cronloops
= 0;
1578 server
.bgsavechildpid
= -1;
1579 server
.bgrewritechildpid
= -1;
1580 server
.bgrewritebuf
= sdsempty();
1581 server
.lastsave
= time(NULL
);
1583 server
.stat_numcommands
= 0;
1584 server
.stat_numconnections
= 0;
1585 server
.stat_expiredkeys
= 0;
1586 server
.stat_starttime
= time(NULL
);
1587 server
.unixtime
= time(NULL
);
1588 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1589 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1590 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1592 if (server
.appendonly
) {
1593 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1594 if (server
.appendfd
== -1) {
1595 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1601 if (server
.vm_enabled
) vmInit();
1604 /* Empty the whole database */
1605 static long long emptyDb() {
1607 long long removed
= 0;
1609 for (j
= 0; j
< server
.dbnum
; j
++) {
1610 removed
+= dictSize(server
.db
[j
].dict
);
1611 dictEmpty(server
.db
[j
].dict
);
1612 dictEmpty(server
.db
[j
].expires
);
1617 static int yesnotoi(char *s
) {
1618 if (!strcasecmp(s
,"yes")) return 1;
1619 else if (!strcasecmp(s
,"no")) return 0;
1623 /* I agree, this is a very rudimental way to load a configuration...
1624 will improve later if the config gets more complex */
1625 static void loadServerConfig(char *filename
) {
1627 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1630 char *errormsg
= "Fatal error, can't open config file '%s'";
1631 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1632 sprintf(errorbuf
, errormsg
, filename
);
1634 if (filename
[0] == '-' && filename
[1] == '\0')
1637 if ((fp
= fopen(filename
,"r")) == NULL
) {
1638 redisLog(REDIS_WARNING
, errorbuf
);
1643 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1649 line
= sdstrim(line
," \t\r\n");
1651 /* Skip comments and blank lines*/
1652 if (line
[0] == '#' || line
[0] == '\0') {
1657 /* Split into arguments */
1658 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1659 sdstolower(argv
[0]);
1661 /* Execute config directives */
1662 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1663 server
.maxidletime
= atoi(argv
[1]);
1664 if (server
.maxidletime
< 0) {
1665 err
= "Invalid timeout value"; goto loaderr
;
1667 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1668 server
.port
= atoi(argv
[1]);
1669 if (server
.port
< 1 || server
.port
> 65535) {
1670 err
= "Invalid port"; goto loaderr
;
1672 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1673 server
.bindaddr
= zstrdup(argv
[1]);
1674 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1675 int seconds
= atoi(argv
[1]);
1676 int changes
= atoi(argv
[2]);
1677 if (seconds
< 1 || changes
< 0) {
1678 err
= "Invalid save parameters"; goto loaderr
;
1680 appendServerSaveParams(seconds
,changes
);
1681 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1682 if (chdir(argv
[1]) == -1) {
1683 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1684 argv
[1], strerror(errno
));
1687 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1688 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1689 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1690 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1691 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1693 err
= "Invalid log level. Must be one of debug, notice, warning";
1696 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1699 server
.logfile
= zstrdup(argv
[1]);
1700 if (!strcasecmp(server
.logfile
,"stdout")) {
1701 zfree(server
.logfile
);
1702 server
.logfile
= NULL
;
1704 if (server
.logfile
) {
1705 /* Test if we are able to open the file. The server will not
1706 * be able to abort just for this problem later... */
1707 logfp
= fopen(server
.logfile
,"a");
1708 if (logfp
== NULL
) {
1709 err
= sdscatprintf(sdsempty(),
1710 "Can't open the log file: %s", strerror(errno
));
1715 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1716 server
.dbnum
= atoi(argv
[1]);
1717 if (server
.dbnum
< 1) {
1718 err
= "Invalid number of databases"; goto loaderr
;
1720 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1721 loadServerConfig(argv
[1]);
1722 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1723 server
.maxclients
= atoi(argv
[1]);
1724 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1725 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1726 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1727 server
.masterhost
= sdsnew(argv
[1]);
1728 server
.masterport
= atoi(argv
[2]);
1729 server
.replstate
= REDIS_REPL_CONNECT
;
1730 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1731 server
.masterauth
= zstrdup(argv
[1]);
1732 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1733 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1734 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1736 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1737 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1738 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1740 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1741 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1742 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1744 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1745 server
.sharingpoolsize
= atoi(argv
[1]);
1746 if (server
.sharingpoolsize
< 1) {
1747 err
= "invalid object sharing pool size"; goto loaderr
;
1749 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1750 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1751 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1753 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1754 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1755 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1757 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1758 if (!strcasecmp(argv
[1],"no")) {
1759 server
.appendfsync
= APPENDFSYNC_NO
;
1760 } else if (!strcasecmp(argv
[1],"always")) {
1761 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1762 } else if (!strcasecmp(argv
[1],"everysec")) {
1763 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1765 err
= "argument must be 'no', 'always' or 'everysec'";
1768 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1769 server
.requirepass
= zstrdup(argv
[1]);
1770 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1771 zfree(server
.pidfile
);
1772 server
.pidfile
= zstrdup(argv
[1]);
1773 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1774 zfree(server
.dbfilename
);
1775 server
.dbfilename
= zstrdup(argv
[1]);
1776 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1777 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1778 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1780 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1781 zfree(server
.vm_swap_file
);
1782 server
.vm_swap_file
= zstrdup(argv
[1]);
1783 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1784 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1785 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1786 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1787 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1788 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1789 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1790 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1791 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1792 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1793 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1794 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1795 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1796 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1798 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1800 for (j
= 0; j
< argc
; j
++)
1805 if (fp
!= stdin
) fclose(fp
);
1809 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1810 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1811 fprintf(stderr
, ">>> '%s'\n", line
);
1812 fprintf(stderr
, "%s\n", err
);
1816 static void freeClientArgv(redisClient
*c
) {
1819 for (j
= 0; j
< c
->argc
; j
++)
1820 decrRefCount(c
->argv
[j
]);
1821 for (j
= 0; j
< c
->mbargc
; j
++)
1822 decrRefCount(c
->mbargv
[j
]);
1827 static void freeClient(redisClient
*c
) {
1830 /* Note that if the client we are freeing is blocked into a blocking
1831 * call, we have to set querybuf to NULL *before* to call
1832 * unblockClientWaitingData() to avoid processInputBuffer() will get
1833 * called. Also it is important to remove the file events after
1834 * this, because this call adds the READABLE event. */
1835 sdsfree(c
->querybuf
);
1837 if (c
->flags
& REDIS_BLOCKED
)
1838 unblockClientWaitingData(c
);
1840 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1841 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1842 listRelease(c
->reply
);
1845 /* Remove from the list of clients */
1846 ln
= listSearchKey(server
.clients
,c
);
1847 redisAssert(ln
!= NULL
);
1848 listDelNode(server
.clients
,ln
);
1849 /* Remove from the list of clients waiting for swapped keys */
1850 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1851 ln
= listSearchKey(server
.io_ready_clients
,c
);
1853 listDelNode(server
.io_ready_clients
,ln
);
1854 server
.vm_blocked_clients
--;
1857 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1858 ln
= listFirst(c
->io_keys
);
1859 dontWaitForSwappedKey(c
,ln
->value
);
1861 listRelease(c
->io_keys
);
1863 if (c
->flags
& REDIS_SLAVE
) {
1864 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1866 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1867 ln
= listSearchKey(l
,c
);
1868 redisAssert(ln
!= NULL
);
1871 if (c
->flags
& REDIS_MASTER
) {
1872 server
.master
= NULL
;
1873 server
.replstate
= REDIS_REPL_CONNECT
;
1877 freeClientMultiState(c
);
1881 #define GLUEREPLY_UP_TO (1024)
1882 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1884 char buf
[GLUEREPLY_UP_TO
];
1889 listRewind(c
->reply
,&li
);
1890 while((ln
= listNext(&li
))) {
1894 objlen
= sdslen(o
->ptr
);
1895 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1896 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1898 listDelNode(c
->reply
,ln
);
1900 if (copylen
== 0) return;
1904 /* Now the output buffer is empty, add the new single element */
1905 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1906 listAddNodeHead(c
->reply
,o
);
1909 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1910 redisClient
*c
= privdata
;
1911 int nwritten
= 0, totwritten
= 0, objlen
;
1914 REDIS_NOTUSED(mask
);
1916 /* Use writev() if we have enough buffers to send */
1917 if (!server
.glueoutputbuf
&&
1918 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1919 !(c
->flags
& REDIS_MASTER
))
1921 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1925 while(listLength(c
->reply
)) {
1926 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1927 glueReplyBuffersIfNeeded(c
);
1929 o
= listNodeValue(listFirst(c
->reply
));
1930 objlen
= sdslen(o
->ptr
);
1933 listDelNode(c
->reply
,listFirst(c
->reply
));
1937 if (c
->flags
& REDIS_MASTER
) {
1938 /* Don't reply to a master */
1939 nwritten
= objlen
- c
->sentlen
;
1941 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1942 if (nwritten
<= 0) break;
1944 c
->sentlen
+= nwritten
;
1945 totwritten
+= nwritten
;
1946 /* If we fully sent the object on head go to the next one */
1947 if (c
->sentlen
== objlen
) {
1948 listDelNode(c
->reply
,listFirst(c
->reply
));
1951 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1952 * bytes, in a single threaded server it's a good idea to serve
1953 * other clients as well, even if a very large request comes from
1954 * super fast link that is always able to accept data (in real world
1955 * scenario think about 'KEYS *' against the loopback interfae) */
1956 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1958 if (nwritten
== -1) {
1959 if (errno
== EAGAIN
) {
1962 redisLog(REDIS_VERBOSE
,
1963 "Error writing to client: %s", strerror(errno
));
1968 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1969 if (listLength(c
->reply
) == 0) {
1971 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1975 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
1977 redisClient
*c
= privdata
;
1978 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
1980 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
1981 int offset
, ion
= 0;
1983 REDIS_NOTUSED(mask
);
1986 while (listLength(c
->reply
)) {
1987 offset
= c
->sentlen
;
1991 /* fill-in the iov[] array */
1992 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
1993 o
= listNodeValue(node
);
1994 objlen
= sdslen(o
->ptr
);
1996 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
1999 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2000 break; /* no more iovecs */
2002 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2003 iov
[ion
].iov_len
= objlen
- offset
;
2004 willwrite
+= objlen
- offset
;
2005 offset
= 0; /* just for the first item */
2012 /* write all collected blocks at once */
2013 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2014 if (errno
!= EAGAIN
) {
2015 redisLog(REDIS_VERBOSE
,
2016 "Error writing to client: %s", strerror(errno
));
2023 totwritten
+= nwritten
;
2024 offset
= c
->sentlen
;
2026 /* remove written robjs from c->reply */
2027 while (nwritten
&& listLength(c
->reply
)) {
2028 o
= listNodeValue(listFirst(c
->reply
));
2029 objlen
= sdslen(o
->ptr
);
2031 if(nwritten
>= objlen
- offset
) {
2032 listDelNode(c
->reply
, listFirst(c
->reply
));
2033 nwritten
-= objlen
- offset
;
2037 c
->sentlen
+= nwritten
;
2045 c
->lastinteraction
= time(NULL
);
2047 if (listLength(c
->reply
) == 0) {
2049 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2053 static struct redisCommand
*lookupCommand(char *name
) {
2055 while(cmdTable
[j
].name
!= NULL
) {
2056 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2062 /* resetClient prepare the client to process the next command */
2063 static void resetClient(redisClient
*c
) {
2069 /* Call() is the core of Redis execution of a command */
2070 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2073 dirty
= server
.dirty
;
2075 if (server
.appendonly
&& server
.dirty
-dirty
)
2076 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2077 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2078 replicationFeedSlaves(server
.slaves
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2079 if (listLength(server
.monitors
))
2080 replicationFeedSlaves(server
.monitors
,cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2081 server
.stat_numcommands
++;
2084 /* If this function gets called we already read a whole
2085 * command, argments are in the client argv/argc fields.
2086 * processCommand() execute the command or prepare the
2087 * server for a bulk read from the client.
2089 * If 1 is returned the client is still alive and valid and
2090 * and other operations can be performed by the caller. Otherwise
2091 * if 0 is returned the client was destroied (i.e. after QUIT). */
2092 static int processCommand(redisClient
*c
) {
2093 struct redisCommand
*cmd
;
2095 /* Free some memory if needed (maxmemory setting) */
2096 if (server
.maxmemory
) freeMemoryIfNeeded();
2098 /* Handle the multi bulk command type. This is an alternative protocol
2099 * supported by Redis in order to receive commands that are composed of
2100 * multiple binary-safe "bulk" arguments. The latency of processing is
2101 * a bit higher but this allows things like multi-sets, so if this
2102 * protocol is used only for MSET and similar commands this is a big win. */
2103 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2104 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2105 if (c
->multibulk
<= 0) {
2109 decrRefCount(c
->argv
[c
->argc
-1]);
2113 } else if (c
->multibulk
) {
2114 if (c
->bulklen
== -1) {
2115 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2116 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2120 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2121 decrRefCount(c
->argv
[0]);
2122 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2124 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2129 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2133 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2134 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2138 if (c
->multibulk
== 0) {
2142 /* Here we need to swap the multi-bulk argc/argv with the
2143 * normal argc/argv of the client structure. */
2145 c
->argv
= c
->mbargv
;
2146 c
->mbargv
= auxargv
;
2149 c
->argc
= c
->mbargc
;
2150 c
->mbargc
= auxargc
;
2152 /* We need to set bulklen to something different than -1
2153 * in order for the code below to process the command without
2154 * to try to read the last argument of a bulk command as
2155 * a special argument. */
2157 /* continue below and process the command */
2164 /* -- end of multi bulk commands processing -- */
2166 /* The QUIT command is handled as a special case. Normal command
2167 * procs are unable to close the client connection safely */
2168 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2173 /* Now lookup the command and check ASAP about trivial error conditions
2174 * such wrong arity, bad command name and so forth. */
2175 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2178 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2179 (char*)c
->argv
[0]->ptr
));
2182 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2183 (c
->argc
< -cmd
->arity
)) {
2185 sdscatprintf(sdsempty(),
2186 "-ERR wrong number of arguments for '%s' command\r\n",
2190 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2191 /* This is a bulk command, we have to read the last argument yet. */
2192 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2194 decrRefCount(c
->argv
[c
->argc
-1]);
2195 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2197 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2202 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2203 /* It is possible that the bulk read is already in the
2204 * buffer. Check this condition and handle it accordingly.
2205 * This is just a fast path, alternative to call processInputBuffer().
2206 * It's a good idea since the code is small and this condition
2207 * happens most of the times. */
2208 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2209 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2211 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2213 /* Otherwise return... there is to read the last argument
2214 * from the socket. */
2218 /* Let's try to share objects on the command arguments vector */
2219 if (server
.shareobjects
) {
2221 for(j
= 1; j
< c
->argc
; j
++)
2222 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2224 /* Let's try to encode the bulk object to save space. */
2225 if (cmd
->flags
& REDIS_CMD_BULK
)
2226 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2228 /* Check if the user is authenticated */
2229 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2230 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2235 /* Handle the maxmemory directive */
2236 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2237 zmalloc_used_memory() > server
.maxmemory
)
2239 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2244 /* Exec the command */
2245 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2246 queueMultiCommand(c
,cmd
);
2247 addReply(c
,shared
.queued
);
2249 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2250 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2254 /* Prepare the client for the next command */
2259 static void replicationFeedSlaves(list
*slaves
, struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
2264 /* (args*2)+1 is enough room for args, spaces, newlines */
2265 robj
*static_outv
[REDIS_STATIC_ARGS
*2+1];
2267 if (argc
<= REDIS_STATIC_ARGS
) {
2270 outv
= zmalloc(sizeof(robj
*)*(argc
*2+1));
2273 for (j
= 0; j
< argc
; j
++) {
2274 if (j
!= 0) outv
[outc
++] = shared
.space
;
2275 if ((cmd
->flags
& REDIS_CMD_BULK
) && j
== argc
-1) {
2278 lenobj
= createObject(REDIS_STRING
,
2279 sdscatprintf(sdsempty(),"%lu\r\n",
2280 (unsigned long) stringObjectLen(argv
[j
])));
2281 lenobj
->refcount
= 0;
2282 outv
[outc
++] = lenobj
;
2284 outv
[outc
++] = argv
[j
];
2286 outv
[outc
++] = shared
.crlf
;
2288 /* Increment all the refcounts at start and decrement at end in order to
2289 * be sure to free objects if there is no slave in a replication state
2290 * able to be feed with commands */
2291 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2292 listRewind(slaves
,&li
);
2293 while((ln
= listNext(&li
))) {
2294 redisClient
*slave
= ln
->value
;
2296 /* Don't feed slaves that are still waiting for BGSAVE to start */
2297 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2299 /* Feed all the other slaves, MONITORs and so on */
2300 if (slave
->slaveseldb
!= dictid
) {
2304 case 0: selectcmd
= shared
.select0
; break;
2305 case 1: selectcmd
= shared
.select1
; break;
2306 case 2: selectcmd
= shared
.select2
; break;
2307 case 3: selectcmd
= shared
.select3
; break;
2308 case 4: selectcmd
= shared
.select4
; break;
2309 case 5: selectcmd
= shared
.select5
; break;
2310 case 6: selectcmd
= shared
.select6
; break;
2311 case 7: selectcmd
= shared
.select7
; break;
2312 case 8: selectcmd
= shared
.select8
; break;
2313 case 9: selectcmd
= shared
.select9
; break;
2315 selectcmd
= createObject(REDIS_STRING
,
2316 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2317 selectcmd
->refcount
= 0;
2320 addReply(slave
,selectcmd
);
2321 slave
->slaveseldb
= dictid
;
2323 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2325 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2326 if (outv
!= static_outv
) zfree(outv
);
2329 static void processInputBuffer(redisClient
*c
) {
2331 /* Before to process the input buffer, make sure the client is not
2332 * waitig for a blocking operation such as BLPOP. Note that the first
2333 * iteration the client is never blocked, otherwise the processInputBuffer
2334 * would not be called at all, but after the execution of the first commands
2335 * in the input buffer the client may be blocked, and the "goto again"
2336 * will try to reiterate. The following line will make it return asap. */
2337 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2338 if (c
->bulklen
== -1) {
2339 /* Read the first line of the query */
2340 char *p
= strchr(c
->querybuf
,'\n');
2347 query
= c
->querybuf
;
2348 c
->querybuf
= sdsempty();
2349 querylen
= 1+(p
-(query
));
2350 if (sdslen(query
) > querylen
) {
2351 /* leave data after the first line of the query in the buffer */
2352 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2354 *p
= '\0'; /* remove "\n" */
2355 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2356 sdsupdatelen(query
);
2358 /* Now we can split the query in arguments */
2359 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2362 if (c
->argv
) zfree(c
->argv
);
2363 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2365 for (j
= 0; j
< argc
; j
++) {
2366 if (sdslen(argv
[j
])) {
2367 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2375 /* Execute the command. If the client is still valid
2376 * after processCommand() return and there is something
2377 * on the query buffer try to process the next command. */
2378 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2380 /* Nothing to process, argc == 0. Just process the query
2381 * buffer if it's not empty or return to the caller */
2382 if (sdslen(c
->querybuf
)) goto again
;
2385 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2386 redisLog(REDIS_VERBOSE
, "Client protocol error");
2391 /* Bulk read handling. Note that if we are at this point
2392 the client already sent a command terminated with a newline,
2393 we are reading the bulk data that is actually the last
2394 argument of the command. */
2395 int qbl
= sdslen(c
->querybuf
);
2397 if (c
->bulklen
<= qbl
) {
2398 /* Copy everything but the final CRLF as final argument */
2399 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2401 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2402 /* Process the command. If the client is still valid after
2403 * the processing and there is more data in the buffer
2404 * try to parse it. */
2405 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2411 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2412 redisClient
*c
= (redisClient
*) privdata
;
2413 char buf
[REDIS_IOBUF_LEN
];
2416 REDIS_NOTUSED(mask
);
2418 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2420 if (errno
== EAGAIN
) {
2423 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2427 } else if (nread
== 0) {
2428 redisLog(REDIS_VERBOSE
, "Client closed connection");
2433 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2434 c
->lastinteraction
= time(NULL
);
2438 if (!(c
->flags
& REDIS_BLOCKED
))
2439 processInputBuffer(c
);
2442 static int selectDb(redisClient
*c
, int id
) {
2443 if (id
< 0 || id
>= server
.dbnum
)
2445 c
->db
= &server
.db
[id
];
2449 static void *dupClientReplyValue(void *o
) {
2450 incrRefCount((robj
*)o
);
2454 static redisClient
*createClient(int fd
) {
2455 redisClient
*c
= zmalloc(sizeof(*c
));
2457 anetNonBlock(NULL
,fd
);
2458 anetTcpNoDelay(NULL
,fd
);
2459 if (!c
) return NULL
;
2462 c
->querybuf
= sdsempty();
2471 c
->lastinteraction
= time(NULL
);
2472 c
->authenticated
= 0;
2473 c
->replstate
= REDIS_REPL_NONE
;
2474 c
->reply
= listCreate();
2475 listSetFreeMethod(c
->reply
,decrRefCount
);
2476 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2477 c
->blockingkeys
= NULL
;
2478 c
->blockingkeysnum
= 0;
2479 c
->io_keys
= listCreate();
2480 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2481 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2482 readQueryFromClient
, c
) == AE_ERR
) {
2486 listAddNodeTail(server
.clients
,c
);
2487 initClientMultiState(c
);
2491 static void addReply(redisClient
*c
, robj
*obj
) {
2492 if (listLength(c
->reply
) == 0 &&
2493 (c
->replstate
== REDIS_REPL_NONE
||
2494 c
->replstate
== REDIS_REPL_ONLINE
) &&
2495 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2496 sendReplyToClient
, c
) == AE_ERR
) return;
2498 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2499 obj
= dupStringObject(obj
);
2500 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2502 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2505 static void addReplySds(redisClient
*c
, sds s
) {
2506 robj
*o
= createObject(REDIS_STRING
,s
);
2511 static void addReplyDouble(redisClient
*c
, double d
) {
2514 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2515 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2516 (unsigned long) strlen(buf
),buf
));
2519 static void addReplyLong(redisClient
*c
, long l
) {
2524 addReply(c
,shared
.czero
);
2526 } else if (l
== 1) {
2527 addReply(c
,shared
.cone
);
2530 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2531 addReplySds(c
,sdsnewlen(buf
,len
));
2534 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2539 addReply(c
,shared
.czero
);
2541 } else if (ul
== 1) {
2542 addReply(c
,shared
.cone
);
2545 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2546 addReplySds(c
,sdsnewlen(buf
,len
));
2549 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2552 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2553 len
= sdslen(obj
->ptr
);
2555 long n
= (long)obj
->ptr
;
2557 /* Compute how many bytes will take this integer as a radix 10 string */
2563 while((n
= n
/10) != 0) {
2567 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2570 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2571 addReplyBulkLen(c
,obj
);
2573 addReply(c
,shared
.crlf
);
2576 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2577 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2579 addReply(c
,shared
.nullbulk
);
2581 robj
*o
= createStringObject(s
,strlen(s
));
2587 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2592 REDIS_NOTUSED(mask
);
2593 REDIS_NOTUSED(privdata
);
2595 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2596 if (cfd
== AE_ERR
) {
2597 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2600 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2601 if ((c
= createClient(cfd
)) == NULL
) {
2602 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2603 close(cfd
); /* May be already closed, just ingore errors */
2606 /* If maxclient directive is set and this is one client more... close the
2607 * connection. Note that we create the client instead to check before
2608 * for this condition, since now the socket is already set in nonblocking
2609 * mode and we can send an error for free using the Kernel I/O */
2610 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2611 char *err
= "-ERR max number of clients reached\r\n";
2613 /* That's a best effort error message, don't check write errors */
2614 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2615 /* Nothing to do, Just to avoid the warning... */
2620 server
.stat_numconnections
++;
2623 /* ======================= Redis objects implementation ===================== */
2625 static robj
*createObject(int type
, void *ptr
) {
2628 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2629 if (listLength(server
.objfreelist
)) {
2630 listNode
*head
= listFirst(server
.objfreelist
);
2631 o
= listNodeValue(head
);
2632 listDelNode(server
.objfreelist
,head
);
2633 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2635 if (server
.vm_enabled
) {
2636 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2637 o
= zmalloc(sizeof(*o
));
2639 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2643 o
->encoding
= REDIS_ENCODING_RAW
;
2646 if (server
.vm_enabled
) {
2647 /* Note that this code may run in the context of an I/O thread
2648 * and accessing to server.unixtime in theory is an error
2649 * (no locks). But in practice this is safe, and even if we read
2650 * garbage Redis will not fail, as it's just a statistical info */
2651 o
->vm
.atime
= server
.unixtime
;
2652 o
->storage
= REDIS_VM_MEMORY
;
2657 static robj
*createStringObject(char *ptr
, size_t len
) {
2658 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2661 static robj
*dupStringObject(robj
*o
) {
2662 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2663 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2666 static robj
*createListObject(void) {
2667 list
*l
= listCreate();
2669 listSetFreeMethod(l
,decrRefCount
);
2670 return createObject(REDIS_LIST
,l
);
2673 static robj
*createSetObject(void) {
2674 dict
*d
= dictCreate(&setDictType
,NULL
);
2675 return createObject(REDIS_SET
,d
);
2678 static robj
*createHashObject(void) {
2679 /* All the Hashes start as zipmaps. Will be automatically converted
2680 * into hash tables if there are enough elements or big elements
2682 unsigned char *zm
= zipmapNew();
2683 robj
*o
= createObject(REDIS_HASH
,zm
);
2684 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2688 static robj
*createZsetObject(void) {
2689 zset
*zs
= zmalloc(sizeof(*zs
));
2691 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2692 zs
->zsl
= zslCreate();
2693 return createObject(REDIS_ZSET
,zs
);
2696 static void freeStringObject(robj
*o
) {
2697 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2702 static void freeListObject(robj
*o
) {
2703 listRelease((list
*) o
->ptr
);
2706 static void freeSetObject(robj
*o
) {
2707 dictRelease((dict
*) o
->ptr
);
2710 static void freeZsetObject(robj
*o
) {
2713 dictRelease(zs
->dict
);
2718 static void freeHashObject(robj
*o
) {
2719 switch (o
->encoding
) {
2720 case REDIS_ENCODING_HT
:
2721 dictRelease((dict
*) o
->ptr
);
2723 case REDIS_ENCODING_ZIPMAP
:
2732 static void incrRefCount(robj
*o
) {
2733 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2737 static void decrRefCount(void *obj
) {
2740 /* Object is a key of a swapped out value, or in the process of being
2742 if (server
.vm_enabled
&&
2743 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2745 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2746 redisAssert(o
->refcount
== 1);
2748 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2749 redisAssert(o
->type
== REDIS_STRING
);
2750 freeStringObject(o
);
2751 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2752 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2753 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2754 !listAddNodeHead(server
.objfreelist
,o
))
2756 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2757 server
.vm_stats_swapped_objects
--;
2760 /* Object is in memory, or in the process of being swapped out. */
2761 if (--(o
->refcount
) == 0) {
2762 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2763 vmCancelThreadedIOJob(obj
);
2765 case REDIS_STRING
: freeStringObject(o
); break;
2766 case REDIS_LIST
: freeListObject(o
); break;
2767 case REDIS_SET
: freeSetObject(o
); break;
2768 case REDIS_ZSET
: freeZsetObject(o
); break;
2769 case REDIS_HASH
: freeHashObject(o
); break;
2770 default: redisAssert(0); break;
2772 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2773 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2774 !listAddNodeHead(server
.objfreelist
,o
))
2776 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2780 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2781 dictEntry
*de
= dictFind(db
->dict
,key
);
2783 robj
*key
= dictGetEntryKey(de
);
2784 robj
*val
= dictGetEntryVal(de
);
2786 if (server
.vm_enabled
) {
2787 if (key
->storage
== REDIS_VM_MEMORY
||
2788 key
->storage
== REDIS_VM_SWAPPING
)
2790 /* If we were swapping the object out, stop it, this key
2792 if (key
->storage
== REDIS_VM_SWAPPING
)
2793 vmCancelThreadedIOJob(key
);
2794 /* Update the access time of the key for the aging algorithm. */
2795 key
->vm
.atime
= server
.unixtime
;
2797 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2799 /* Our value was swapped on disk. Bring it at home. */
2800 redisAssert(val
== NULL
);
2801 val
= vmLoadObject(key
);
2802 dictGetEntryVal(de
) = val
;
2804 /* Clients blocked by the VM subsystem may be waiting for
2806 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2815 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2816 expireIfNeeded(db
,key
);
2817 return lookupKey(db
,key
);
2820 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2821 deleteIfVolatile(db
,key
);
2822 return lookupKey(db
,key
);
2825 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2826 robj
*o
= lookupKeyRead(c
->db
, key
);
2827 if (!o
) addReply(c
,reply
);
2831 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2832 robj
*o
= lookupKeyWrite(c
->db
, key
);
2833 if (!o
) addReply(c
,reply
);
2837 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2838 if (o
->type
!= type
) {
2839 addReply(c
,shared
.wrongtypeerr
);
2845 static int deleteKey(redisDb
*db
, robj
*key
) {
2848 /* We need to protect key from destruction: after the first dictDelete()
2849 * it may happen that 'key' is no longer valid if we don't increment
2850 * it's count. This may happen when we get the object reference directly
2851 * from the hash table with dictRandomKey() or dict iterators */
2853 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2854 retval
= dictDelete(db
->dict
,key
);
2857 return retval
== DICT_OK
;
2860 /* Try to share an object against the shared objects pool */
2861 static robj
*tryObjectSharing(robj
*o
) {
2862 struct dictEntry
*de
;
2865 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2867 redisAssert(o
->type
== REDIS_STRING
);
2868 de
= dictFind(server
.sharingpool
,o
);
2870 robj
*shared
= dictGetEntryKey(de
);
2872 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2873 dictGetEntryVal(de
) = (void*) c
;
2874 incrRefCount(shared
);
2878 /* Here we are using a stream algorihtm: Every time an object is
2879 * shared we increment its count, everytime there is a miss we
2880 * recrement the counter of a random object. If this object reaches
2881 * zero we remove the object and put the current object instead. */
2882 if (dictSize(server
.sharingpool
) >=
2883 server
.sharingpoolsize
) {
2884 de
= dictGetRandomKey(server
.sharingpool
);
2885 redisAssert(de
!= NULL
);
2886 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2887 dictGetEntryVal(de
) = (void*) c
;
2889 dictDelete(server
.sharingpool
,de
->key
);
2892 c
= 0; /* If the pool is empty we want to add this object */
2897 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2898 redisAssert(retval
== DICT_OK
);
2905 /* Check if the nul-terminated string 's' can be represented by a long
2906 * (that is, is a number that fits into long without any other space or
2907 * character before or after the digits).
2909 * If so, the function returns REDIS_OK and *longval is set to the value
2910 * of the number. Otherwise REDIS_ERR is returned */
2911 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2912 char buf
[32], *endptr
;
2916 value
= strtol(s
, &endptr
, 10);
2917 if (endptr
[0] != '\0') return REDIS_ERR
;
2918 slen
= snprintf(buf
,32,"%ld",value
);
2920 /* If the number converted back into a string is not identical
2921 * then it's not possible to encode the string as integer */
2922 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2923 if (longval
) *longval
= value
;
2927 /* Try to encode a string object in order to save space */
2928 static int tryObjectEncoding(robj
*o
) {
2932 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2933 return REDIS_ERR
; /* Already encoded */
2935 /* It's not save to encode shared objects: shared objects can be shared
2936 * everywhere in the "object space" of Redis. Encoded objects can only
2937 * appear as "values" (and not, for instance, as keys) */
2938 if (o
->refcount
> 1) return REDIS_ERR
;
2940 /* Currently we try to encode only strings */
2941 redisAssert(o
->type
== REDIS_STRING
);
2943 /* Check if we can represent this string as a long integer */
2944 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2946 /* Ok, this object can be encoded */
2947 o
->encoding
= REDIS_ENCODING_INT
;
2949 o
->ptr
= (void*) value
;
2953 /* Get a decoded version of an encoded object (returned as a new object).
2954 * If the object is already raw-encoded just increment the ref count. */
2955 static robj
*getDecodedObject(robj
*o
) {
2958 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2962 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2965 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2966 dec
= createStringObject(buf
,strlen(buf
));
2969 redisAssert(1 != 1);
2973 /* Compare two string objects via strcmp() or alike.
2974 * Note that the objects may be integer-encoded. In such a case we
2975 * use snprintf() to get a string representation of the numbers on the stack
2976 * and compare the strings, it's much faster than calling getDecodedObject().
2978 * Important note: if objects are not integer encoded, but binary-safe strings,
2979 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2981 static int compareStringObjects(robj
*a
, robj
*b
) {
2982 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
2983 char bufa
[128], bufb
[128], *astr
, *bstr
;
2986 if (a
== b
) return 0;
2987 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
2988 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
2994 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
2995 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3001 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3004 static size_t stringObjectLen(robj
*o
) {
3005 redisAssert(o
->type
== REDIS_STRING
);
3006 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3007 return sdslen(o
->ptr
);
3011 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3015 /*============================ RDB saving/loading =========================== */
3017 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3018 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3022 static int rdbSaveTime(FILE *fp
, time_t t
) {
3023 int32_t t32
= (int32_t) t
;
3024 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3028 /* check rdbLoadLen() comments for more info */
3029 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3030 unsigned char buf
[2];
3033 /* Save a 6 bit len */
3034 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3035 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3036 } else if (len
< (1<<14)) {
3037 /* Save a 14 bit len */
3038 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3040 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3042 /* Save a 32 bit len */
3043 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3044 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3046 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3051 /* String objects in the form "2391" "-100" without any space and with a
3052 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3053 * encoded as integers to save space */
3054 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3056 char *endptr
, buf
[32];
3058 /* Check if it's possible to encode this value as a number */
3059 value
= strtoll(s
, &endptr
, 10);
3060 if (endptr
[0] != '\0') return 0;
3061 snprintf(buf
,32,"%lld",value
);
3063 /* If the number converted back into a string is not identical
3064 * then it's not possible to encode the string as integer */
3065 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3067 /* Finally check if it fits in our ranges */
3068 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3069 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3070 enc
[1] = value
&0xFF;
3072 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3073 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3074 enc
[1] = value
&0xFF;
3075 enc
[2] = (value
>>8)&0xFF;
3077 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3078 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3079 enc
[1] = value
&0xFF;
3080 enc
[2] = (value
>>8)&0xFF;
3081 enc
[3] = (value
>>16)&0xFF;
3082 enc
[4] = (value
>>24)&0xFF;
3089 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3090 size_t comprlen
, outlen
;
3094 /* We require at least four bytes compression for this to be worth it */
3095 if (len
<= 4) return 0;
3097 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3098 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3099 if (comprlen
== 0) {
3103 /* Data compressed! Let's save it on disk */
3104 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3105 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3106 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3107 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3108 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3117 /* Save a string objet as [len][data] on disk. If the object is a string
3118 * representation of an integer value we try to safe it in a special form */
3119 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3122 /* Try integer encoding */
3124 unsigned char buf
[5];
3125 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3126 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3131 /* Try LZF compression - under 20 bytes it's unable to compress even
3132 * aaaaaaaaaaaaaaaaaa so skip it */
3133 if (server
.rdbcompression
&& len
> 20) {
3136 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3137 if (retval
== -1) return -1;
3138 if (retval
> 0) return 0;
3139 /* retval == 0 means data can't be compressed, save the old way */
3142 /* Store verbatim */
3143 if (rdbSaveLen(fp
,len
) == -1) return -1;
3144 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3148 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3149 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3152 /* Avoid incr/decr ref count business when possible.
3153 * This plays well with copy-on-write given that we are probably
3154 * in a child process (BGSAVE). Also this makes sure key objects
3155 * of swapped objects are not incRefCount-ed (an assert does not allow
3156 * this in order to avoid bugs) */
3157 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3158 obj
= getDecodedObject(obj
);
3159 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3162 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3167 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3168 * 8 bit integer specifing the length of the representation.
3169 * This 8 bit integer has special values in order to specify the following
3175 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3176 unsigned char buf
[128];
3182 } else if (!isfinite(val
)) {
3184 buf
[0] = (val
< 0) ? 255 : 254;
3186 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3187 buf
[0] = strlen((char*)buf
+1);
3190 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3194 /* Save a Redis object. */
3195 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3196 if (o
->type
== REDIS_STRING
) {
3197 /* Save a string value */
3198 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3199 } else if (o
->type
== REDIS_LIST
) {
3200 /* Save a list value */
3201 list
*list
= o
->ptr
;
3205 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3206 listRewind(list
,&li
);
3207 while((ln
= listNext(&li
))) {
3208 robj
*eleobj
= listNodeValue(ln
);
3210 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3212 } else if (o
->type
== REDIS_SET
) {
3213 /* Save a set value */
3215 dictIterator
*di
= dictGetIterator(set
);
3218 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3219 while((de
= dictNext(di
)) != NULL
) {
3220 robj
*eleobj
= dictGetEntryKey(de
);
3222 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3224 dictReleaseIterator(di
);
3225 } else if (o
->type
== REDIS_ZSET
) {
3226 /* Save a set value */
3228 dictIterator
*di
= dictGetIterator(zs
->dict
);
3231 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3232 while((de
= dictNext(di
)) != NULL
) {
3233 robj
*eleobj
= dictGetEntryKey(de
);
3234 double *score
= dictGetEntryVal(de
);
3236 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3237 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3239 dictReleaseIterator(di
);
3240 } else if (o
->type
== REDIS_HASH
) {
3241 /* Save a hash value */
3242 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3243 unsigned char *p
= zipmapRewind(o
->ptr
);
3244 unsigned int count
= zipmapLen(o
->ptr
);
3245 unsigned char *key
, *val
;
3246 unsigned int klen
, vlen
;
3248 if (rdbSaveLen(fp
,count
) == -1) return -1;
3249 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3250 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3251 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3254 dictIterator
*di
= dictGetIterator(o
->ptr
);
3257 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3258 while((de
= dictNext(di
)) != NULL
) {
3259 robj
*key
= dictGetEntryKey(de
);
3260 robj
*val
= dictGetEntryVal(de
);
3262 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3263 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3265 dictReleaseIterator(di
);
3273 /* Return the length the object will have on disk if saved with
3274 * the rdbSaveObject() function. Currently we use a trick to get
3275 * this length with very little changes to the code. In the future
3276 * we could switch to a faster solution. */
3277 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3278 if (fp
== NULL
) fp
= server
.devnull
;
3280 assert(rdbSaveObject(fp
,o
) != 1);
3284 /* Return the number of pages required to save this object in the swap file */
3285 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3286 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3288 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3291 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3292 static int rdbSave(char *filename
) {
3293 dictIterator
*di
= NULL
;
3298 time_t now
= time(NULL
);
3300 /* Wait for I/O therads to terminate, just in case this is a
3301 * foreground-saving, to avoid seeking the swap file descriptor at the
3303 if (server
.vm_enabled
)
3304 waitEmptyIOJobsQueue();
3306 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3307 fp
= fopen(tmpfile
,"w");
3309 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3312 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3313 for (j
= 0; j
< server
.dbnum
; j
++) {
3314 redisDb
*db
= server
.db
+j
;
3316 if (dictSize(d
) == 0) continue;
3317 di
= dictGetIterator(d
);
3323 /* Write the SELECT DB opcode */
3324 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3325 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3327 /* Iterate this DB writing every entry */
3328 while((de
= dictNext(di
)) != NULL
) {
3329 robj
*key
= dictGetEntryKey(de
);
3330 robj
*o
= dictGetEntryVal(de
);
3331 time_t expiretime
= getExpire(db
,key
);
3333 /* Save the expire time */
3334 if (expiretime
!= -1) {
3335 /* If this key is already expired skip it */
3336 if (expiretime
< now
) continue;
3337 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3338 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3340 /* Save the key and associated value. This requires special
3341 * handling if the value is swapped out. */
3342 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3343 key
->storage
== REDIS_VM_SWAPPING
) {
3344 /* Save type, key, value */
3345 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3346 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3347 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3349 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3351 /* Get a preview of the object in memory */
3352 po
= vmPreviewObject(key
);
3353 /* Save type, key, value */
3354 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3355 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3356 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3357 /* Remove the loaded object from memory */
3361 dictReleaseIterator(di
);
3364 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3366 /* Make sure data will not remain on the OS's output buffers */
3371 /* Use RENAME to make sure the DB file is changed atomically only
3372 * if the generate DB file is ok. */
3373 if (rename(tmpfile
,filename
) == -1) {
3374 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3378 redisLog(REDIS_NOTICE
,"DB saved on disk");
3380 server
.lastsave
= time(NULL
);
3386 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3387 if (di
) dictReleaseIterator(di
);
3391 static int rdbSaveBackground(char *filename
) {
3394 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3395 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3396 if ((childpid
= fork()) == 0) {
3398 if (server
.vm_enabled
) vmReopenSwapFile();
3400 if (rdbSave(filename
) == REDIS_OK
) {
3407 if (childpid
== -1) {
3408 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3412 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3413 server
.bgsavechildpid
= childpid
;
3416 return REDIS_OK
; /* unreached */
3419 static void rdbRemoveTempFile(pid_t childpid
) {
3422 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3426 static int rdbLoadType(FILE *fp
) {
3428 if (fread(&type
,1,1,fp
) == 0) return -1;
3432 static time_t rdbLoadTime(FILE *fp
) {
3434 if (fread(&t32
,4,1,fp
) == 0) return -1;
3435 return (time_t) t32
;
3438 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3439 * of this file for a description of how this are stored on disk.
3441 * isencoded is set to 1 if the readed length is not actually a length but
3442 * an "encoding type", check the above comments for more info */
3443 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3444 unsigned char buf
[2];
3448 if (isencoded
) *isencoded
= 0;
3449 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3450 type
= (buf
[0]&0xC0)>>6;
3451 if (type
== REDIS_RDB_6BITLEN
) {
3452 /* Read a 6 bit len */
3454 } else if (type
== REDIS_RDB_ENCVAL
) {
3455 /* Read a 6 bit len encoding type */
3456 if (isencoded
) *isencoded
= 1;
3458 } else if (type
== REDIS_RDB_14BITLEN
) {
3459 /* Read a 14 bit len */
3460 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3461 return ((buf
[0]&0x3F)<<8)|buf
[1];
3463 /* Read a 32 bit len */
3464 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3469 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3470 unsigned char enc
[4];
3473 if (enctype
== REDIS_RDB_ENC_INT8
) {
3474 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3475 val
= (signed char)enc
[0];
3476 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3478 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3479 v
= enc
[0]|(enc
[1]<<8);
3481 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3483 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3484 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3487 val
= 0; /* anti-warning */
3490 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3493 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3494 unsigned int len
, clen
;
3495 unsigned char *c
= NULL
;
3498 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3499 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3500 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3501 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3502 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3503 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3505 return createObject(REDIS_STRING
,val
);
3512 static robj
*rdbLoadStringObject(FILE*fp
) {
3517 len
= rdbLoadLen(fp
,&isencoded
);
3520 case REDIS_RDB_ENC_INT8
:
3521 case REDIS_RDB_ENC_INT16
:
3522 case REDIS_RDB_ENC_INT32
:
3523 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3524 case REDIS_RDB_ENC_LZF
:
3525 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3531 if (len
== REDIS_RDB_LENERR
) return NULL
;
3532 val
= sdsnewlen(NULL
,len
);
3533 if (len
&& fread(val
,len
,1,fp
) == 0) {
3537 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3540 /* For information about double serialization check rdbSaveDoubleValue() */
3541 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3545 if (fread(&len
,1,1,fp
) == 0) return -1;
3547 case 255: *val
= R_NegInf
; return 0;
3548 case 254: *val
= R_PosInf
; return 0;
3549 case 253: *val
= R_Nan
; return 0;
3551 if (fread(buf
,len
,1,fp
) == 0) return -1;
3553 sscanf(buf
, "%lg", val
);
3558 /* Load a Redis object of the specified type from the specified file.
3559 * On success a newly allocated object is returned, otherwise NULL. */
3560 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3563 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3564 if (type
== REDIS_STRING
) {
3565 /* Read string value */
3566 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3567 tryObjectEncoding(o
);
3568 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3569 /* Read list/set value */
3572 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3573 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3574 /* It's faster to expand the dict to the right size asap in order
3575 * to avoid rehashing */
3576 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3577 dictExpand(o
->ptr
,listlen
);
3578 /* Load every single element of the list/set */
3582 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3583 tryObjectEncoding(ele
);
3584 if (type
== REDIS_LIST
) {
3585 listAddNodeTail((list
*)o
->ptr
,ele
);
3587 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3590 } else if (type
== REDIS_ZSET
) {
3591 /* Read list/set value */
3595 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3596 o
= createZsetObject();
3598 /* Load every single element of the list/set */
3601 double *score
= zmalloc(sizeof(double));
3603 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3604 tryObjectEncoding(ele
);
3605 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3606 dictAdd(zs
->dict
,ele
,score
);
3607 zslInsert(zs
->zsl
,*score
,ele
);
3608 incrRefCount(ele
); /* added to skiplist */
3610 } else if (type
== REDIS_HASH
) {
3613 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3614 o
= createHashObject();
3615 /* Too many entries? Use an hash table. */
3616 if (hashlen
> server
.hash_max_zipmap_entries
)
3617 convertToRealHash(o
);
3618 /* Load every key/value, then set it into the zipmap or hash
3619 * table, as needed. */
3623 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3624 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3625 /* If we are using a zipmap and there are too big values
3626 * the object is converted to real hash table encoding. */
3627 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3628 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3629 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3631 convertToRealHash(o
);
3634 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3635 unsigned char *zm
= o
->ptr
;
3637 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3638 val
->ptr
,sdslen(val
->ptr
),NULL
);
3643 tryObjectEncoding(key
);
3644 tryObjectEncoding(val
);
3645 dictAdd((dict
*)o
->ptr
,key
,val
);
3654 static int rdbLoad(char *filename
) {
3656 robj
*keyobj
= NULL
;
3658 int type
, retval
, rdbver
;
3659 dict
*d
= server
.db
[0].dict
;
3660 redisDb
*db
= server
.db
+0;
3662 time_t expiretime
= -1, now
= time(NULL
);
3663 long long loadedkeys
= 0;
3665 fp
= fopen(filename
,"r");
3666 if (!fp
) return REDIS_ERR
;
3667 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3669 if (memcmp(buf
,"REDIS",5) != 0) {
3671 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3674 rdbver
= atoi(buf
+5);
3677 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3684 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3685 if (type
== REDIS_EXPIRETIME
) {
3686 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3687 /* We read the time so we need to read the object type again */
3688 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3690 if (type
== REDIS_EOF
) break;
3691 /* Handle SELECT DB opcode as a special case */
3692 if (type
== REDIS_SELECTDB
) {
3693 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3695 if (dbid
>= (unsigned)server
.dbnum
) {
3696 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3699 db
= server
.db
+dbid
;
3704 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3706 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3707 /* Add the new object in the hash table */
3708 retval
= dictAdd(d
,keyobj
,o
);
3709 if (retval
== DICT_ERR
) {
3710 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3713 /* Set the expire time if needed */
3714 if (expiretime
!= -1) {
3715 setExpire(db
,keyobj
,expiretime
);
3716 /* Delete this key if already expired */
3717 if (expiretime
< now
) deleteKey(db
,keyobj
);
3721 /* Handle swapping while loading big datasets when VM is on */
3723 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3724 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3725 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3732 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3733 if (keyobj
) decrRefCount(keyobj
);
3734 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3736 return REDIS_ERR
; /* Just to avoid warning */
3739 /*================================== Commands =============================== */
3741 static void authCommand(redisClient
*c
) {
3742 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3743 c
->authenticated
= 1;
3744 addReply(c
,shared
.ok
);
3746 c
->authenticated
= 0;
3747 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3751 static void pingCommand(redisClient
*c
) {
3752 addReply(c
,shared
.pong
);
3755 static void echoCommand(redisClient
*c
) {
3756 addReplyBulk(c
,c
->argv
[1]);
3759 /*=================================== Strings =============================== */
3761 static void setGenericCommand(redisClient
*c
, int nx
) {
3764 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3765 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3766 if (retval
== DICT_ERR
) {
3768 /* If the key is about a swapped value, we want a new key object
3769 * to overwrite the old. So we delete the old key in the database.
3770 * This will also make sure that swap pages about the old object
3771 * will be marked as free. */
3772 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3773 incrRefCount(c
->argv
[1]);
3774 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3775 incrRefCount(c
->argv
[2]);
3777 addReply(c
,shared
.czero
);
3781 incrRefCount(c
->argv
[1]);
3782 incrRefCount(c
->argv
[2]);
3785 removeExpire(c
->db
,c
->argv
[1]);
3786 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3789 static void setCommand(redisClient
*c
) {
3790 setGenericCommand(c
,0);
3793 static void setnxCommand(redisClient
*c
) {
3794 setGenericCommand(c
,1);
3797 static int getGenericCommand(redisClient
*c
) {
3800 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3803 if (o
->type
!= REDIS_STRING
) {
3804 addReply(c
,shared
.wrongtypeerr
);
3812 static void getCommand(redisClient
*c
) {
3813 getGenericCommand(c
);
3816 static void getsetCommand(redisClient
*c
) {
3817 if (getGenericCommand(c
) == REDIS_ERR
) return;
3818 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3819 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3821 incrRefCount(c
->argv
[1]);
3823 incrRefCount(c
->argv
[2]);
3825 removeExpire(c
->db
,c
->argv
[1]);
3828 static void mgetCommand(redisClient
*c
) {
3831 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3832 for (j
= 1; j
< c
->argc
; j
++) {
3833 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3835 addReply(c
,shared
.nullbulk
);
3837 if (o
->type
!= REDIS_STRING
) {
3838 addReply(c
,shared
.nullbulk
);
3846 static void msetGenericCommand(redisClient
*c
, int nx
) {
3847 int j
, busykeys
= 0;
3849 if ((c
->argc
% 2) == 0) {
3850 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3853 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3854 * set nothing at all if at least one already key exists. */
3856 for (j
= 1; j
< c
->argc
; j
+= 2) {
3857 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3863 addReply(c
, shared
.czero
);
3867 for (j
= 1; j
< c
->argc
; j
+= 2) {
3870 tryObjectEncoding(c
->argv
[j
+1]);
3871 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3872 if (retval
== DICT_ERR
) {
3873 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3874 incrRefCount(c
->argv
[j
+1]);
3876 incrRefCount(c
->argv
[j
]);
3877 incrRefCount(c
->argv
[j
+1]);
3879 removeExpire(c
->db
,c
->argv
[j
]);
3881 server
.dirty
+= (c
->argc
-1)/2;
3882 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3885 static void msetCommand(redisClient
*c
) {
3886 msetGenericCommand(c
,0);
3889 static void msetnxCommand(redisClient
*c
) {
3890 msetGenericCommand(c
,1);
3893 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3898 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3902 if (o
->type
!= REDIS_STRING
) {
3907 if (o
->encoding
== REDIS_ENCODING_RAW
)
3908 value
= strtoll(o
->ptr
, &eptr
, 10);
3909 else if (o
->encoding
== REDIS_ENCODING_INT
)
3910 value
= (long)o
->ptr
;
3912 redisAssert(1 != 1);
3917 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3918 tryObjectEncoding(o
);
3919 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3920 if (retval
== DICT_ERR
) {
3921 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3922 removeExpire(c
->db
,c
->argv
[1]);
3924 incrRefCount(c
->argv
[1]);
3927 addReply(c
,shared
.colon
);
3929 addReply(c
,shared
.crlf
);
3932 static void incrCommand(redisClient
*c
) {
3933 incrDecrCommand(c
,1);
3936 static void decrCommand(redisClient
*c
) {
3937 incrDecrCommand(c
,-1);
3940 static void incrbyCommand(redisClient
*c
) {
3941 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3942 incrDecrCommand(c
,incr
);
3945 static void decrbyCommand(redisClient
*c
) {
3946 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3947 incrDecrCommand(c
,-incr
);
3950 static void appendCommand(redisClient
*c
) {
3955 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3957 /* Create the key */
3958 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3959 incrRefCount(c
->argv
[1]);
3960 incrRefCount(c
->argv
[2]);
3961 totlen
= stringObjectLen(c
->argv
[2]);
3965 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3968 o
= dictGetEntryVal(de
);
3969 if (o
->type
!= REDIS_STRING
) {
3970 addReply(c
,shared
.wrongtypeerr
);
3973 /* If the object is specially encoded or shared we have to make
3975 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
3976 robj
*decoded
= getDecodedObject(o
);
3978 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
3979 decrRefCount(decoded
);
3980 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3983 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
3984 o
->ptr
= sdscatlen(o
->ptr
,
3985 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
3987 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
3988 (unsigned long) c
->argv
[2]->ptr
);
3990 totlen
= sdslen(o
->ptr
);
3993 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
3996 static void substrCommand(redisClient
*c
) {
3998 long start
= atoi(c
->argv
[2]->ptr
);
3999 long end
= atoi(c
->argv
[3]->ptr
);
4000 size_t rangelen
, strlen
;
4003 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4004 checkType(c
,o
,REDIS_STRING
)) return;
4006 o
= getDecodedObject(o
);
4007 strlen
= sdslen(o
->ptr
);
4009 /* convert negative indexes */
4010 if (start
< 0) start
= strlen
+start
;
4011 if (end
< 0) end
= strlen
+end
;
4012 if (start
< 0) start
= 0;
4013 if (end
< 0) end
= 0;
4015 /* indexes sanity checks */
4016 if (start
> end
|| (size_t)start
>= strlen
) {
4017 /* Out of range start or start > end result in null reply */
4018 addReply(c
,shared
.nullbulk
);
4022 if ((size_t)end
>= strlen
) end
= strlen
-1;
4023 rangelen
= (end
-start
)+1;
4025 /* Return the result */
4026 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4027 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4028 addReplySds(c
,range
);
4029 addReply(c
,shared
.crlf
);
4033 /* ========================= Type agnostic commands ========================= */
4035 static void delCommand(redisClient
*c
) {
4038 for (j
= 1; j
< c
->argc
; j
++) {
4039 if (deleteKey(c
->db
,c
->argv
[j
])) {
4044 addReplyLong(c
,deleted
);
4047 static void existsCommand(redisClient
*c
) {
4048 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4051 static void selectCommand(redisClient
*c
) {
4052 int id
= atoi(c
->argv
[1]->ptr
);
4054 if (selectDb(c
,id
) == REDIS_ERR
) {
4055 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4057 addReply(c
,shared
.ok
);
4061 static void randomkeyCommand(redisClient
*c
) {
4065 de
= dictGetRandomKey(c
->db
->dict
);
4066 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4069 addReply(c
,shared
.plus
);
4070 addReply(c
,shared
.crlf
);
4072 addReply(c
,shared
.plus
);
4073 addReply(c
,dictGetEntryKey(de
));
4074 addReply(c
,shared
.crlf
);
4078 static void keysCommand(redisClient
*c
) {
4081 sds pattern
= c
->argv
[1]->ptr
;
4082 int plen
= sdslen(pattern
);
4083 unsigned long numkeys
= 0;
4084 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4086 di
= dictGetIterator(c
->db
->dict
);
4088 decrRefCount(lenobj
);
4089 while((de
= dictNext(di
)) != NULL
) {
4090 robj
*keyobj
= dictGetEntryKey(de
);
4092 sds key
= keyobj
->ptr
;
4093 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4094 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4095 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4096 addReplyBulk(c
,keyobj
);
4101 dictReleaseIterator(di
);
4102 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4105 static void dbsizeCommand(redisClient
*c
) {
4107 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4110 static void lastsaveCommand(redisClient
*c
) {
4112 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4115 static void typeCommand(redisClient
*c
) {
4119 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4124 case REDIS_STRING
: type
= "+string"; break;
4125 case REDIS_LIST
: type
= "+list"; break;
4126 case REDIS_SET
: type
= "+set"; break;
4127 case REDIS_ZSET
: type
= "+zset"; break;
4128 case REDIS_HASH
: type
= "+hash"; break;
4129 default: type
= "+unknown"; break;
4132 addReplySds(c
,sdsnew(type
));
4133 addReply(c
,shared
.crlf
);
4136 static void saveCommand(redisClient
*c
) {
4137 if (server
.bgsavechildpid
!= -1) {
4138 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4141 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4142 addReply(c
,shared
.ok
);
4144 addReply(c
,shared
.err
);
4148 static void bgsaveCommand(redisClient
*c
) {
4149 if (server
.bgsavechildpid
!= -1) {
4150 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4153 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4154 char *status
= "+Background saving started\r\n";
4155 addReplySds(c
,sdsnew(status
));
4157 addReply(c
,shared
.err
);
4161 static void shutdownCommand(redisClient
*c
) {
4162 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4163 /* Kill the saving child if there is a background saving in progress.
4164 We want to avoid race conditions, for instance our saving child may
4165 overwrite the synchronous saving did by SHUTDOWN. */
4166 if (server
.bgsavechildpid
!= -1) {
4167 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4168 kill(server
.bgsavechildpid
,SIGKILL
);
4169 rdbRemoveTempFile(server
.bgsavechildpid
);
4171 if (server
.appendonly
) {
4172 /* Append only file: fsync() the AOF and exit */
4173 fsync(server
.appendfd
);
4174 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4177 /* Snapshotting. Perform a SYNC SAVE and exit */
4178 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4179 if (server
.daemonize
)
4180 unlink(server
.pidfile
);
4181 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4182 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4183 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4186 /* Ooops.. error saving! The best we can do is to continue
4187 * operating. Note that if there was a background saving process,
4188 * in the next cron() Redis will be notified that the background
4189 * saving aborted, handling special stuff like slaves pending for
4190 * synchronization... */
4191 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4193 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4198 static void renameGenericCommand(redisClient
*c
, int nx
) {
4201 /* To use the same key as src and dst is probably an error */
4202 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4203 addReply(c
,shared
.sameobjecterr
);
4207 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4211 deleteIfVolatile(c
->db
,c
->argv
[2]);
4212 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4215 addReply(c
,shared
.czero
);
4218 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4220 incrRefCount(c
->argv
[2]);
4222 deleteKey(c
->db
,c
->argv
[1]);
4224 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4227 static void renameCommand(redisClient
*c
) {
4228 renameGenericCommand(c
,0);
4231 static void renamenxCommand(redisClient
*c
) {
4232 renameGenericCommand(c
,1);
4235 static void moveCommand(redisClient
*c
) {
4240 /* Obtain source and target DB pointers */
4243 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4244 addReply(c
,shared
.outofrangeerr
);
4248 selectDb(c
,srcid
); /* Back to the source DB */
4250 /* If the user is moving using as target the same
4251 * DB as the source DB it is probably an error. */
4253 addReply(c
,shared
.sameobjecterr
);
4257 /* Check if the element exists and get a reference */
4258 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4260 addReply(c
,shared
.czero
);
4264 /* Try to add the element to the target DB */
4265 deleteIfVolatile(dst
,c
->argv
[1]);
4266 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4267 addReply(c
,shared
.czero
);
4270 incrRefCount(c
->argv
[1]);
4273 /* OK! key moved, free the entry in the source DB */
4274 deleteKey(src
,c
->argv
[1]);
4276 addReply(c
,shared
.cone
);
4279 /* =================================== Lists ================================ */
4280 static void pushGenericCommand(redisClient
*c
, int where
) {
4284 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4286 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4287 addReply(c
,shared
.cone
);
4290 lobj
= createListObject();
4292 if (where
== REDIS_HEAD
) {
4293 listAddNodeHead(list
,c
->argv
[2]);
4295 listAddNodeTail(list
,c
->argv
[2]);
4297 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4298 incrRefCount(c
->argv
[1]);
4299 incrRefCount(c
->argv
[2]);
4301 if (lobj
->type
!= REDIS_LIST
) {
4302 addReply(c
,shared
.wrongtypeerr
);
4305 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4306 addReply(c
,shared
.cone
);
4310 if (where
== REDIS_HEAD
) {
4311 listAddNodeHead(list
,c
->argv
[2]);
4313 listAddNodeTail(list
,c
->argv
[2]);
4315 incrRefCount(c
->argv
[2]);
4318 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4321 static void lpushCommand(redisClient
*c
) {
4322 pushGenericCommand(c
,REDIS_HEAD
);
4325 static void rpushCommand(redisClient
*c
) {
4326 pushGenericCommand(c
,REDIS_TAIL
);
4329 static void llenCommand(redisClient
*c
) {
4333 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4334 checkType(c
,o
,REDIS_LIST
)) return;
4337 addReplyUlong(c
,listLength(l
));
4340 static void lindexCommand(redisClient
*c
) {
4342 int index
= atoi(c
->argv
[2]->ptr
);
4346 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4347 checkType(c
,o
,REDIS_LIST
)) return;
4350 ln
= listIndex(list
, index
);
4352 addReply(c
,shared
.nullbulk
);
4354 robj
*ele
= listNodeValue(ln
);
4355 addReplyBulk(c
,ele
);
4359 static void lsetCommand(redisClient
*c
) {
4361 int index
= atoi(c
->argv
[2]->ptr
);
4365 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4366 checkType(c
,o
,REDIS_LIST
)) return;
4369 ln
= listIndex(list
, index
);
4371 addReply(c
,shared
.outofrangeerr
);
4373 robj
*ele
= listNodeValue(ln
);
4376 listNodeValue(ln
) = c
->argv
[3];
4377 incrRefCount(c
->argv
[3]);
4378 addReply(c
,shared
.ok
);
4383 static void popGenericCommand(redisClient
*c
, int where
) {
4388 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4389 checkType(c
,o
,REDIS_LIST
)) return;
4392 if (where
== REDIS_HEAD
)
4393 ln
= listFirst(list
);
4395 ln
= listLast(list
);
4398 addReply(c
,shared
.nullbulk
);
4400 robj
*ele
= listNodeValue(ln
);
4401 addReplyBulk(c
,ele
);
4402 listDelNode(list
,ln
);
4403 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4408 static void lpopCommand(redisClient
*c
) {
4409 popGenericCommand(c
,REDIS_HEAD
);
4412 static void rpopCommand(redisClient
*c
) {
4413 popGenericCommand(c
,REDIS_TAIL
);
4416 static void lrangeCommand(redisClient
*c
) {
4418 int start
= atoi(c
->argv
[2]->ptr
);
4419 int end
= atoi(c
->argv
[3]->ptr
);
4426 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4427 checkType(c
,o
,REDIS_LIST
)) return;
4429 llen
= listLength(list
);
4431 /* convert negative indexes */
4432 if (start
< 0) start
= llen
+start
;
4433 if (end
< 0) end
= llen
+end
;
4434 if (start
< 0) start
= 0;
4435 if (end
< 0) end
= 0;
4437 /* indexes sanity checks */
4438 if (start
> end
|| start
>= llen
) {
4439 /* Out of range start or start > end result in empty list */
4440 addReply(c
,shared
.emptymultibulk
);
4443 if (end
>= llen
) end
= llen
-1;
4444 rangelen
= (end
-start
)+1;
4446 /* Return the result in form of a multi-bulk reply */
4447 ln
= listIndex(list
, start
);
4448 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4449 for (j
= 0; j
< rangelen
; j
++) {
4450 ele
= listNodeValue(ln
);
4451 addReplyBulk(c
,ele
);
4456 static void ltrimCommand(redisClient
*c
) {
4458 int start
= atoi(c
->argv
[2]->ptr
);
4459 int end
= atoi(c
->argv
[3]->ptr
);
4461 int j
, ltrim
, rtrim
;
4465 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4466 checkType(c
,o
,REDIS_LIST
)) return;
4468 llen
= listLength(list
);
4470 /* convert negative indexes */
4471 if (start
< 0) start
= llen
+start
;
4472 if (end
< 0) end
= llen
+end
;
4473 if (start
< 0) start
= 0;
4474 if (end
< 0) end
= 0;
4476 /* indexes sanity checks */
4477 if (start
> end
|| start
>= llen
) {
4478 /* Out of range start or start > end result in empty list */
4482 if (end
>= llen
) end
= llen
-1;
4487 /* Remove list elements to perform the trim */
4488 for (j
= 0; j
< ltrim
; j
++) {
4489 ln
= listFirst(list
);
4490 listDelNode(list
,ln
);
4492 for (j
= 0; j
< rtrim
; j
++) {
4493 ln
= listLast(list
);
4494 listDelNode(list
,ln
);
4496 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4498 addReply(c
,shared
.ok
);
4501 static void lremCommand(redisClient
*c
) {
4504 listNode
*ln
, *next
;
4505 int toremove
= atoi(c
->argv
[2]->ptr
);
4509 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4510 checkType(c
,o
,REDIS_LIST
)) return;
4514 toremove
= -toremove
;
4517 ln
= fromtail
? list
->tail
: list
->head
;
4519 robj
*ele
= listNodeValue(ln
);
4521 next
= fromtail
? ln
->prev
: ln
->next
;
4522 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4523 listDelNode(list
,ln
);
4526 if (toremove
&& removed
== toremove
) break;
4530 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4531 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4534 /* This is the semantic of this command:
4535 * RPOPLPUSH srclist dstlist:
4536 * IF LLEN(srclist) > 0
4537 * element = RPOP srclist
4538 * LPUSH dstlist element
4545 * The idea is to be able to get an element from a list in a reliable way
4546 * since the element is not just returned but pushed against another list
4547 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4549 static void rpoplpushcommand(redisClient
*c
) {
4554 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4555 checkType(c
,sobj
,REDIS_LIST
)) return;
4556 srclist
= sobj
->ptr
;
4557 ln
= listLast(srclist
);
4560 addReply(c
,shared
.nullbulk
);
4562 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4563 robj
*ele
= listNodeValue(ln
);
4566 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4567 addReply(c
,shared
.wrongtypeerr
);
4571 /* Add the element to the target list (unless it's directly
4572 * passed to some BLPOP-ing client */
4573 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4575 /* Create the list if the key does not exist */
4576 dobj
= createListObject();
4577 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4578 incrRefCount(c
->argv
[2]);
4580 dstlist
= dobj
->ptr
;
4581 listAddNodeHead(dstlist
,ele
);
4585 /* Send the element to the client as reply as well */
4586 addReplyBulk(c
,ele
);
4588 /* Finally remove the element from the source list */
4589 listDelNode(srclist
,ln
);
4590 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4595 /* ==================================== Sets ================================ */
4597 static void saddCommand(redisClient
*c
) {
4600 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4602 set
= createSetObject();
4603 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4604 incrRefCount(c
->argv
[1]);
4606 if (set
->type
!= REDIS_SET
) {
4607 addReply(c
,shared
.wrongtypeerr
);
4611 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4612 incrRefCount(c
->argv
[2]);
4614 addReply(c
,shared
.cone
);
4616 addReply(c
,shared
.czero
);
4620 static void sremCommand(redisClient
*c
) {
4623 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4624 checkType(c
,set
,REDIS_SET
)) return;
4626 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4628 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4629 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4630 addReply(c
,shared
.cone
);
4632 addReply(c
,shared
.czero
);
4636 static void smoveCommand(redisClient
*c
) {
4637 robj
*srcset
, *dstset
;
4639 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4640 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4642 /* If the source key does not exist return 0, if it's of the wrong type
4644 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4645 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4648 /* Error if the destination key is not a set as well */
4649 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4650 addReply(c
,shared
.wrongtypeerr
);
4653 /* Remove the element from the source set */
4654 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4655 /* Key not found in the src set! return zero */
4656 addReply(c
,shared
.czero
);
4659 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4660 deleteKey(c
->db
,c
->argv
[1]);
4662 /* Add the element to the destination set */
4664 dstset
= createSetObject();
4665 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4666 incrRefCount(c
->argv
[2]);
4668 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4669 incrRefCount(c
->argv
[3]);
4670 addReply(c
,shared
.cone
);
4673 static void sismemberCommand(redisClient
*c
) {
4676 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4677 checkType(c
,set
,REDIS_SET
)) return;
4679 if (dictFind(set
->ptr
,c
->argv
[2]))
4680 addReply(c
,shared
.cone
);
4682 addReply(c
,shared
.czero
);
4685 static void scardCommand(redisClient
*c
) {
4689 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4690 checkType(c
,o
,REDIS_SET
)) return;
4693 addReplyUlong(c
,dictSize(s
));
4696 static void spopCommand(redisClient
*c
) {
4700 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4701 checkType(c
,set
,REDIS_SET
)) return;
4703 de
= dictGetRandomKey(set
->ptr
);
4705 addReply(c
,shared
.nullbulk
);
4707 robj
*ele
= dictGetEntryKey(de
);
4709 addReplyBulk(c
,ele
);
4710 dictDelete(set
->ptr
,ele
);
4711 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4712 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4717 static void srandmemberCommand(redisClient
*c
) {
4721 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4722 checkType(c
,set
,REDIS_SET
)) return;
4724 de
= dictGetRandomKey(set
->ptr
);
4726 addReply(c
,shared
.nullbulk
);
4728 robj
*ele
= dictGetEntryKey(de
);
4730 addReplyBulk(c
,ele
);
4734 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4735 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4737 return dictSize(*d1
)-dictSize(*d2
);
4740 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4741 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4744 robj
*lenobj
= NULL
, *dstset
= NULL
;
4745 unsigned long j
, cardinality
= 0;
4747 for (j
= 0; j
< setsnum
; j
++) {
4751 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4752 lookupKeyRead(c
->db
,setskeys
[j
]);
4756 if (deleteKey(c
->db
,dstkey
))
4758 addReply(c
,shared
.czero
);
4760 addReply(c
,shared
.nullmultibulk
);
4764 if (setobj
->type
!= REDIS_SET
) {
4766 addReply(c
,shared
.wrongtypeerr
);
4769 dv
[j
] = setobj
->ptr
;
4771 /* Sort sets from the smallest to largest, this will improve our
4772 * algorithm's performace */
4773 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4775 /* The first thing we should output is the total number of elements...
4776 * since this is a multi-bulk write, but at this stage we don't know
4777 * the intersection set size, so we use a trick, append an empty object
4778 * to the output list and save the pointer to later modify it with the
4781 lenobj
= createObject(REDIS_STRING
,NULL
);
4783 decrRefCount(lenobj
);
4785 /* If we have a target key where to store the resulting set
4786 * create this key with an empty set inside */
4787 dstset
= createSetObject();
4790 /* Iterate all the elements of the first (smallest) set, and test
4791 * the element against all the other sets, if at least one set does
4792 * not include the element it is discarded */
4793 di
= dictGetIterator(dv
[0]);
4795 while((de
= dictNext(di
)) != NULL
) {
4798 for (j
= 1; j
< setsnum
; j
++)
4799 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4801 continue; /* at least one set does not contain the member */
4802 ele
= dictGetEntryKey(de
);
4804 addReplyBulk(c
,ele
);
4807 dictAdd(dstset
->ptr
,ele
,NULL
);
4811 dictReleaseIterator(di
);
4814 /* Store the resulting set into the target, if the intersection
4815 * is not an empty set. */
4816 deleteKey(c
->db
,dstkey
);
4817 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4818 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4819 incrRefCount(dstkey
);
4820 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4822 decrRefCount(dstset
);
4823 addReply(c
,shared
.czero
);
4827 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4832 static void sinterCommand(redisClient
*c
) {
4833 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4836 static void sinterstoreCommand(redisClient
*c
) {
4837 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4840 #define REDIS_OP_UNION 0
4841 #define REDIS_OP_DIFF 1
4842 #define REDIS_OP_INTER 2
4844 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4845 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4848 robj
*dstset
= NULL
;
4849 int j
, cardinality
= 0;
4851 for (j
= 0; j
< setsnum
; j
++) {
4855 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4856 lookupKeyRead(c
->db
,setskeys
[j
]);
4861 if (setobj
->type
!= REDIS_SET
) {
4863 addReply(c
,shared
.wrongtypeerr
);
4866 dv
[j
] = setobj
->ptr
;
4869 /* We need a temp set object to store our union. If the dstkey
4870 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4871 * this set object will be the resulting object to set into the target key*/
4872 dstset
= createSetObject();
4874 /* Iterate all the elements of all the sets, add every element a single
4875 * time to the result set */
4876 for (j
= 0; j
< setsnum
; j
++) {
4877 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4878 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4880 di
= dictGetIterator(dv
[j
]);
4882 while((de
= dictNext(di
)) != NULL
) {
4885 /* dictAdd will not add the same element multiple times */
4886 ele
= dictGetEntryKey(de
);
4887 if (op
== REDIS_OP_UNION
|| j
== 0) {
4888 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4892 } else if (op
== REDIS_OP_DIFF
) {
4893 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4898 dictReleaseIterator(di
);
4900 /* result set is empty? Exit asap. */
4901 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4904 /* Output the content of the resulting set, if not in STORE mode */
4906 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4907 di
= dictGetIterator(dstset
->ptr
);
4908 while((de
= dictNext(di
)) != NULL
) {
4911 ele
= dictGetEntryKey(de
);
4912 addReplyBulk(c
,ele
);
4914 dictReleaseIterator(di
);
4915 decrRefCount(dstset
);
4917 /* If we have a target key where to store the resulting set
4918 * create this key with the result set inside */
4919 deleteKey(c
->db
,dstkey
);
4920 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4921 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4922 incrRefCount(dstkey
);
4923 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4925 decrRefCount(dstset
);
4926 addReply(c
,shared
.czero
);
4933 static void sunionCommand(redisClient
*c
) {
4934 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4937 static void sunionstoreCommand(redisClient
*c
) {
4938 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4941 static void sdiffCommand(redisClient
*c
) {
4942 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4945 static void sdiffstoreCommand(redisClient
*c
) {
4946 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4949 /* ==================================== ZSets =============================== */
4951 /* ZSETs are ordered sets using two data structures to hold the same elements
4952 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4955 * The elements are added to an hash table mapping Redis objects to scores.
4956 * At the same time the elements are added to a skip list mapping scores
4957 * to Redis objects (so objects are sorted by scores in this "view"). */
4959 /* This skiplist implementation is almost a C translation of the original
4960 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4961 * Alternative to Balanced Trees", modified in three ways:
4962 * a) this implementation allows for repeated values.
4963 * b) the comparison is not just by key (our 'score') but by satellite data.
4964 * c) there is a back pointer, so it's a doubly linked list with the back
4965 * pointers being only at "level 1". This allows to traverse the list
4966 * from tail to head, useful for ZREVRANGE. */
4968 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4969 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4971 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4973 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
4979 static zskiplist
*zslCreate(void) {
4983 zsl
= zmalloc(sizeof(*zsl
));
4986 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
4987 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
4988 zsl
->header
->forward
[j
] = NULL
;
4990 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
4991 if (j
< ZSKIPLIST_MAXLEVEL
-1)
4992 zsl
->header
->span
[j
] = 0;
4994 zsl
->header
->backward
= NULL
;
4999 static void zslFreeNode(zskiplistNode
*node
) {
5000 decrRefCount(node
->obj
);
5001 zfree(node
->forward
);
5006 static void zslFree(zskiplist
*zsl
) {
5007 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5009 zfree(zsl
->header
->forward
);
5010 zfree(zsl
->header
->span
);
5013 next
= node
->forward
[0];
5020 static int zslRandomLevel(void) {
5022 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5027 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5028 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5029 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5033 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5034 /* store rank that is crossed to reach the insert position */
5035 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5037 while (x
->forward
[i
] &&
5038 (x
->forward
[i
]->score
< score
||
5039 (x
->forward
[i
]->score
== score
&&
5040 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5041 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5046 /* we assume the key is not already inside, since we allow duplicated
5047 * scores, and the re-insertion of score and redis object should never
5048 * happpen since the caller of zslInsert() should test in the hash table
5049 * if the element is already inside or not. */
5050 level
= zslRandomLevel();
5051 if (level
> zsl
->level
) {
5052 for (i
= zsl
->level
; i
< level
; i
++) {
5054 update
[i
] = zsl
->header
;
5055 update
[i
]->span
[i
-1] = zsl
->length
;
5059 x
= zslCreateNode(level
,score
,obj
);
5060 for (i
= 0; i
< level
; i
++) {
5061 x
->forward
[i
] = update
[i
]->forward
[i
];
5062 update
[i
]->forward
[i
] = x
;
5064 /* update span covered by update[i] as x is inserted here */
5066 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5067 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5071 /* increment span for untouched levels */
5072 for (i
= level
; i
< zsl
->level
; i
++) {
5073 update
[i
]->span
[i
-1]++;
5076 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5078 x
->forward
[0]->backward
= x
;
5084 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5085 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5087 for (i
= 0; i
< zsl
->level
; i
++) {
5088 if (update
[i
]->forward
[i
] == x
) {
5090 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5092 update
[i
]->forward
[i
] = x
->forward
[i
];
5094 /* invariant: i > 0, because update[0]->forward[0]
5095 * is always equal to x */
5096 update
[i
]->span
[i
-1] -= 1;
5099 if (x
->forward
[0]) {
5100 x
->forward
[0]->backward
= x
->backward
;
5102 zsl
->tail
= x
->backward
;
5104 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5109 /* Delete an element with matching score/object from the skiplist. */
5110 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5111 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5115 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5116 while (x
->forward
[i
] &&
5117 (x
->forward
[i
]->score
< score
||
5118 (x
->forward
[i
]->score
== score
&&
5119 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5123 /* We may have multiple elements with the same score, what we need
5124 * is to find the element with both the right score and object. */
5126 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5127 zslDeleteNode(zsl
, x
, update
);
5131 return 0; /* not found */
5133 return 0; /* not found */
5136 /* Delete all the elements with score between min and max from the skiplist.
5137 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5138 * Note that this function takes the reference to the hash table view of the
5139 * sorted set, in order to remove the elements from the hash table too. */
5140 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5141 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5142 unsigned long removed
= 0;
5146 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5147 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5151 /* We may have multiple elements with the same score, what we need
5152 * is to find the element with both the right score and object. */
5154 while (x
&& x
->score
<= max
) {
5155 zskiplistNode
*next
= x
->forward
[0];
5156 zslDeleteNode(zsl
, x
, update
);
5157 dictDelete(dict
,x
->obj
);
5162 return removed
; /* not found */
5165 /* Delete all the elements with rank between start and end from the skiplist.
5166 * Start and end are inclusive. Note that start and end need to be 1-based */
5167 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5168 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5169 unsigned long traversed
= 0, removed
= 0;
5173 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5174 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5175 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5183 while (x
&& traversed
<= end
) {
5184 zskiplistNode
*next
= x
->forward
[0];
5185 zslDeleteNode(zsl
, x
, update
);
5186 dictDelete(dict
,x
->obj
);
5195 /* Find the first node having a score equal or greater than the specified one.
5196 * Returns NULL if there is no match. */
5197 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5202 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5203 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5206 /* We may have multiple elements with the same score, what we need
5207 * is to find the element with both the right score and object. */
5208 return x
->forward
[0];
5211 /* Find the rank for an element by both score and key.
5212 * Returns 0 when the element cannot be found, rank otherwise.
5213 * Note that the rank is 1-based due to the span of zsl->header to the
5215 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5217 unsigned long rank
= 0;
5221 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5222 while (x
->forward
[i
] &&
5223 (x
->forward
[i
]->score
< score
||
5224 (x
->forward
[i
]->score
== score
&&
5225 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5226 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5230 /* x might be equal to zsl->header, so test if obj is non-NULL */
5231 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5238 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5239 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5241 unsigned long traversed
= 0;
5245 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5246 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5248 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5251 if (traversed
== rank
) {
5258 /* The actual Z-commands implementations */
5260 /* This generic command implements both ZADD and ZINCRBY.
5261 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5262 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5263 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5268 zsetobj
= lookupKeyWrite(c
->db
,key
);
5269 if (zsetobj
== NULL
) {
5270 zsetobj
= createZsetObject();
5271 dictAdd(c
->db
->dict
,key
,zsetobj
);
5274 if (zsetobj
->type
!= REDIS_ZSET
) {
5275 addReply(c
,shared
.wrongtypeerr
);
5281 /* Ok now since we implement both ZADD and ZINCRBY here the code
5282 * needs to handle the two different conditions. It's all about setting
5283 * '*score', that is, the new score to set, to the right value. */
5284 score
= zmalloc(sizeof(double));
5288 /* Read the old score. If the element was not present starts from 0 */
5289 de
= dictFind(zs
->dict
,ele
);
5291 double *oldscore
= dictGetEntryVal(de
);
5292 *score
= *oldscore
+ scoreval
;
5300 /* What follows is a simple remove and re-insert operation that is common
5301 * to both ZADD and ZINCRBY... */
5302 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5303 /* case 1: New element */
5304 incrRefCount(ele
); /* added to hash */
5305 zslInsert(zs
->zsl
,*score
,ele
);
5306 incrRefCount(ele
); /* added to skiplist */
5309 addReplyDouble(c
,*score
);
5311 addReply(c
,shared
.cone
);
5316 /* case 2: Score update operation */
5317 de
= dictFind(zs
->dict
,ele
);
5318 redisAssert(de
!= NULL
);
5319 oldscore
= dictGetEntryVal(de
);
5320 if (*score
!= *oldscore
) {
5323 /* Remove and insert the element in the skip list with new score */
5324 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5325 redisAssert(deleted
!= 0);
5326 zslInsert(zs
->zsl
,*score
,ele
);
5328 /* Update the score in the hash table */
5329 dictReplace(zs
->dict
,ele
,score
);
5335 addReplyDouble(c
,*score
);
5337 addReply(c
,shared
.czero
);
5341 static void zaddCommand(redisClient
*c
) {
5344 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5345 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5348 static void zincrbyCommand(redisClient
*c
) {
5351 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5352 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5355 static void zremCommand(redisClient
*c
) {
5362 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5363 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5366 de
= dictFind(zs
->dict
,c
->argv
[2]);
5368 addReply(c
,shared
.czero
);
5371 /* Delete from the skiplist */
5372 oldscore
= dictGetEntryVal(de
);
5373 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5374 redisAssert(deleted
!= 0);
5376 /* Delete from the hash table */
5377 dictDelete(zs
->dict
,c
->argv
[2]);
5378 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5379 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5381 addReply(c
,shared
.cone
);
5384 static void zremrangebyscoreCommand(redisClient
*c
) {
5385 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5386 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5391 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5392 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5395 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5396 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5397 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5398 server
.dirty
+= deleted
;
5399 addReplyLong(c
,deleted
);
5402 static void zremrangebyrankCommand(redisClient
*c
) {
5403 int start
= atoi(c
->argv
[2]->ptr
);
5404 int end
= atoi(c
->argv
[3]->ptr
);
5410 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5411 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5413 llen
= zs
->zsl
->length
;
5415 /* convert negative indexes */
5416 if (start
< 0) start
= llen
+start
;
5417 if (end
< 0) end
= llen
+end
;
5418 if (start
< 0) start
= 0;
5419 if (end
< 0) end
= 0;
5421 /* indexes sanity checks */
5422 if (start
> end
|| start
>= llen
) {
5423 addReply(c
,shared
.czero
);
5426 if (end
>= llen
) end
= llen
-1;
5428 /* increment start and end because zsl*Rank functions
5429 * use 1-based rank */
5430 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5431 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5432 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5433 server
.dirty
+= deleted
;
5434 addReplyLong(c
, deleted
);
5442 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5443 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5444 unsigned long size1
, size2
;
5445 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5446 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5447 return size1
- size2
;
5450 #define REDIS_AGGR_SUM 1
5451 #define REDIS_AGGR_MIN 2
5452 #define REDIS_AGGR_MAX 3
5454 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5455 if (aggregate
== REDIS_AGGR_SUM
) {
5456 *target
= *target
+ val
;
5457 } else if (aggregate
== REDIS_AGGR_MIN
) {
5458 *target
= val
< *target
? val
: *target
;
5459 } else if (aggregate
== REDIS_AGGR_MAX
) {
5460 *target
= val
> *target
? val
: *target
;
5463 redisAssert(0 != 0);
5467 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5469 int aggregate
= REDIS_AGGR_SUM
;
5476 /* expect zsetnum input keys to be given */
5477 zsetnum
= atoi(c
->argv
[2]->ptr
);
5479 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5483 /* test if the expected number of keys would overflow */
5484 if (3+zsetnum
> c
->argc
) {
5485 addReply(c
,shared
.syntaxerr
);
5489 /* read keys to be used for input */
5490 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5491 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5492 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5496 if (zsetobj
->type
!= REDIS_ZSET
) {
5498 addReply(c
,shared
.wrongtypeerr
);
5501 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5504 /* default all weights to 1 */
5505 src
[i
].weight
= 1.0;
5508 /* parse optional extra arguments */
5510 int remaining
= c
->argc
- j
;
5513 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5515 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5516 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5518 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5520 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5521 aggregate
= REDIS_AGGR_SUM
;
5522 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5523 aggregate
= REDIS_AGGR_MIN
;
5524 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5525 aggregate
= REDIS_AGGR_MAX
;
5528 addReply(c
,shared
.syntaxerr
);
5534 addReply(c
,shared
.syntaxerr
);
5540 /* sort sets from the smallest to largest, this will improve our
5541 * algorithm's performance */
5542 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5544 dstobj
= createZsetObject();
5545 dstzset
= dstobj
->ptr
;
5547 if (op
== REDIS_OP_INTER
) {
5548 /* skip going over all entries if the smallest zset is NULL or empty */
5549 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5550 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5551 * from small to large, all src[i > 0].dict are non-empty too */
5552 di
= dictGetIterator(src
[0].dict
);
5553 while((de
= dictNext(di
)) != NULL
) {
5554 double *score
= zmalloc(sizeof(double)), value
;
5555 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5557 for (j
= 1; j
< zsetnum
; j
++) {
5558 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5560 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5561 zunionInterAggregate(score
, value
, aggregate
);
5567 /* skip entry when not present in every source dict */
5571 robj
*o
= dictGetEntryKey(de
);
5572 dictAdd(dstzset
->dict
,o
,score
);
5573 incrRefCount(o
); /* added to dictionary */
5574 zslInsert(dstzset
->zsl
,*score
,o
);
5575 incrRefCount(o
); /* added to skiplist */
5578 dictReleaseIterator(di
);
5580 } else if (op
== REDIS_OP_UNION
) {
5581 for (i
= 0; i
< zsetnum
; i
++) {
5582 if (!src
[i
].dict
) continue;
5584 di
= dictGetIterator(src
[i
].dict
);
5585 while((de
= dictNext(di
)) != NULL
) {
5586 /* skip key when already processed */
5587 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5589 double *score
= zmalloc(sizeof(double)), value
;
5590 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5592 /* because the zsets are sorted by size, its only possible
5593 * for sets at larger indices to hold this entry */
5594 for (j
= (i
+1); j
< zsetnum
; j
++) {
5595 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5597 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5598 zunionInterAggregate(score
, value
, aggregate
);
5602 robj
*o
= dictGetEntryKey(de
);
5603 dictAdd(dstzset
->dict
,o
,score
);
5604 incrRefCount(o
); /* added to dictionary */
5605 zslInsert(dstzset
->zsl
,*score
,o
);
5606 incrRefCount(o
); /* added to skiplist */
5608 dictReleaseIterator(di
);
5611 /* unknown operator */
5612 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5615 deleteKey(c
->db
,dstkey
);
5616 if (dstzset
->zsl
->length
) {
5617 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5618 incrRefCount(dstkey
);
5619 addReplyLong(c
, dstzset
->zsl
->length
);
5622 decrRefCount(dstzset
);
5623 addReply(c
, shared
.czero
);
5628 static void zunionCommand(redisClient
*c
) {
5629 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5632 static void zinterCommand(redisClient
*c
) {
5633 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5636 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5638 int start
= atoi(c
->argv
[2]->ptr
);
5639 int end
= atoi(c
->argv
[3]->ptr
);
5648 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5650 } else if (c
->argc
>= 5) {
5651 addReply(c
,shared
.syntaxerr
);
5655 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5656 checkType(c
,o
,REDIS_ZSET
)) return;
5661 /* convert negative indexes */
5662 if (start
< 0) start
= llen
+start
;
5663 if (end
< 0) end
= llen
+end
;
5664 if (start
< 0) start
= 0;
5665 if (end
< 0) end
= 0;
5667 /* indexes sanity checks */
5668 if (start
> end
|| start
>= llen
) {
5669 /* Out of range start or start > end result in empty list */
5670 addReply(c
,shared
.emptymultibulk
);
5673 if (end
>= llen
) end
= llen
-1;
5674 rangelen
= (end
-start
)+1;
5676 /* check if starting point is trivial, before searching
5677 * the element in log(N) time */
5679 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5682 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5685 /* Return the result in form of a multi-bulk reply */
5686 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5687 withscores
? (rangelen
*2) : rangelen
));
5688 for (j
= 0; j
< rangelen
; j
++) {
5690 addReplyBulk(c
,ele
);
5692 addReplyDouble(c
,ln
->score
);
5693 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5697 static void zrangeCommand(redisClient
*c
) {
5698 zrangeGenericCommand(c
,0);
5701 static void zrevrangeCommand(redisClient
*c
) {
5702 zrangeGenericCommand(c
,1);
5705 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5706 * If justcount is non-zero, just the count is returned. */
5707 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5710 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5711 int offset
= 0, limit
= -1;
5715 /* Parse the min-max interval. If one of the values is prefixed
5716 * by the "(" character, it's considered "open". For instance
5717 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5718 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5719 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5720 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5723 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5725 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5726 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5729 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5732 /* Parse "WITHSCORES": note that if the command was called with
5733 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5734 * enter the following paths to parse WITHSCORES and LIMIT. */
5735 if (c
->argc
== 5 || c
->argc
== 8) {
5736 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5741 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5745 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5750 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5751 addReply(c
,shared
.syntaxerr
);
5753 } else if (c
->argc
== (7 + withscores
)) {
5754 offset
= atoi(c
->argv
[5]->ptr
);
5755 limit
= atoi(c
->argv
[6]->ptr
);
5756 if (offset
< 0) offset
= 0;
5759 /* Ok, lookup the key and get the range */
5760 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5762 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5764 if (o
->type
!= REDIS_ZSET
) {
5765 addReply(c
,shared
.wrongtypeerr
);
5767 zset
*zsetobj
= o
->ptr
;
5768 zskiplist
*zsl
= zsetobj
->zsl
;
5770 robj
*ele
, *lenobj
= NULL
;
5771 unsigned long rangelen
= 0;
5773 /* Get the first node with the score >= min, or with
5774 * score > min if 'minex' is true. */
5775 ln
= zslFirstWithScore(zsl
,min
);
5776 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5779 /* No element matching the speciifed interval */
5780 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5784 /* We don't know in advance how many matching elements there
5785 * are in the list, so we push this object that will represent
5786 * the multi-bulk length in the output buffer, and will "fix"
5789 lenobj
= createObject(REDIS_STRING
,NULL
);
5791 decrRefCount(lenobj
);
5794 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5797 ln
= ln
->forward
[0];
5800 if (limit
== 0) break;
5803 addReplyBulk(c
,ele
);
5805 addReplyDouble(c
,ln
->score
);
5807 ln
= ln
->forward
[0];
5809 if (limit
> 0) limit
--;
5812 addReplyLong(c
,(long)rangelen
);
5814 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5815 withscores
? (rangelen
*2) : rangelen
);
5821 static void zrangebyscoreCommand(redisClient
*c
) {
5822 genericZrangebyscoreCommand(c
,0);
5825 static void zcountCommand(redisClient
*c
) {
5826 genericZrangebyscoreCommand(c
,1);
5829 static void zcardCommand(redisClient
*c
) {
5833 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5834 checkType(c
,o
,REDIS_ZSET
)) return;
5837 addReplyUlong(c
,zs
->zsl
->length
);
5840 static void zscoreCommand(redisClient
*c
) {
5845 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5846 checkType(c
,o
,REDIS_ZSET
)) return;
5849 de
= dictFind(zs
->dict
,c
->argv
[2]);
5851 addReply(c
,shared
.nullbulk
);
5853 double *score
= dictGetEntryVal(de
);
5855 addReplyDouble(c
,*score
);
5859 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5867 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5868 checkType(c
,o
,REDIS_ZSET
)) return;
5872 de
= dictFind(zs
->dict
,c
->argv
[2]);
5874 addReply(c
,shared
.nullbulk
);
5878 score
= dictGetEntryVal(de
);
5879 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5882 addReplyLong(c
, zsl
->length
- rank
);
5884 addReplyLong(c
, rank
-1);
5887 addReply(c
,shared
.nullbulk
);
5891 static void zrankCommand(redisClient
*c
) {
5892 zrankGenericCommand(c
, 0);
5895 static void zrevrankCommand(redisClient
*c
) {
5896 zrankGenericCommand(c
, 1);
5899 /* =================================== Hashes =============================== */
5900 static void hsetCommand(redisClient
*c
) {
5902 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5905 o
= createHashObject();
5906 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5907 incrRefCount(c
->argv
[1]);
5909 if (o
->type
!= REDIS_HASH
) {
5910 addReply(c
,shared
.wrongtypeerr
);
5914 /* We want to convert the zipmap into an hash table right now if the
5915 * entry to be added is too big. Note that we check if the object
5916 * is integer encoded before to try fetching the length in the test below.
5917 * This is because integers are small, but currently stringObjectLen()
5918 * performs a slow conversion: not worth it. */
5919 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5920 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5921 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5922 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5923 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5925 convertToRealHash(o
);
5928 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5929 unsigned char *zm
= o
->ptr
;
5930 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5932 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5933 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5934 decrRefCount(valobj
);
5937 /* And here there is the second check for hash conversion...
5938 * we want to do it only if the operation was not just an update as
5939 * zipmapLen() is O(N). */
5940 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5941 convertToRealHash(o
);
5943 tryObjectEncoding(c
->argv
[2]);
5944 /* note that c->argv[3] is already encoded, as the latest arg
5945 * of a bulk command is always integer encoded if possible. */
5946 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5947 incrRefCount(c
->argv
[2]);
5951 incrRefCount(c
->argv
[3]);
5954 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5957 static void hgetCommand(redisClient
*c
) {
5960 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5961 checkType(c
,o
,REDIS_HASH
)) return;
5963 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5964 unsigned char *zm
= o
->ptr
;
5969 field
= getDecodedObject(c
->argv
[2]);
5970 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
5971 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
5972 addReplySds(c
,sdsnewlen(val
,vlen
));
5973 addReply(c
,shared
.crlf
);
5974 decrRefCount(field
);
5977 addReply(c
,shared
.nullbulk
);
5978 decrRefCount(field
);
5982 struct dictEntry
*de
;
5984 de
= dictFind(o
->ptr
,c
->argv
[2]);
5986 addReply(c
,shared
.nullbulk
);
5988 robj
*e
= dictGetEntryVal(de
);
5995 static void hdelCommand(redisClient
*c
) {
5999 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6000 checkType(c
,o
,REDIS_HASH
)) return;
6002 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6003 robj
*field
= getDecodedObject(c
->argv
[2]);
6005 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6006 (unsigned char*) field
->ptr
,
6007 sdslen(field
->ptr
), &deleted
);
6008 decrRefCount(field
);
6009 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6010 deleteKey(c
->db
,c
->argv
[1]);
6012 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6013 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6014 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6016 if (deleted
) server
.dirty
++;
6017 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6020 static void hlenCommand(redisClient
*c
) {
6024 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6025 checkType(c
,o
,REDIS_HASH
)) return;
6027 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6028 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6029 addReplyUlong(c
,len
);
6032 #define REDIS_GETALL_KEYS 1
6033 #define REDIS_GETALL_VALS 2
6034 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6036 unsigned long count
= 0;
6038 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6039 || checkType(c
,o
,REDIS_HASH
)) return;
6041 lenobj
= createObject(REDIS_STRING
,NULL
);
6043 decrRefCount(lenobj
);
6045 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6046 unsigned char *p
= zipmapRewind(o
->ptr
);
6047 unsigned char *field
, *val
;
6048 unsigned int flen
, vlen
;
6050 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6053 if (flags
& REDIS_GETALL_KEYS
) {
6054 aux
= createStringObject((char*)field
,flen
);
6055 addReplyBulk(c
,aux
);
6059 if (flags
& REDIS_GETALL_VALS
) {
6060 aux
= createStringObject((char*)val
,vlen
);
6061 addReplyBulk(c
,aux
);
6067 dictIterator
*di
= dictGetIterator(o
->ptr
);
6070 while((de
= dictNext(di
)) != NULL
) {
6071 robj
*fieldobj
= dictGetEntryKey(de
);
6072 robj
*valobj
= dictGetEntryVal(de
);
6074 if (flags
& REDIS_GETALL_KEYS
) {
6075 addReplyBulk(c
,fieldobj
);
6078 if (flags
& REDIS_GETALL_VALS
) {
6079 addReplyBulk(c
,valobj
);
6083 dictReleaseIterator(di
);
6085 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6088 static void hkeysCommand(redisClient
*c
) {
6089 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6092 static void hvalsCommand(redisClient
*c
) {
6093 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6096 static void hgetallCommand(redisClient
*c
) {
6097 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6100 static void hexistsCommand(redisClient
*c
) {
6104 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6105 checkType(c
,o
,REDIS_HASH
)) return;
6107 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6109 unsigned char *zm
= o
->ptr
;
6111 field
= getDecodedObject(c
->argv
[2]);
6112 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6113 decrRefCount(field
);
6115 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6117 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6120 static void convertToRealHash(robj
*o
) {
6121 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6122 unsigned int klen
, vlen
;
6123 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6125 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6126 p
= zipmapRewind(zm
);
6127 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6128 robj
*keyobj
, *valobj
;
6130 keyobj
= createStringObject((char*)key
,klen
);
6131 valobj
= createStringObject((char*)val
,vlen
);
6132 tryObjectEncoding(keyobj
);
6133 tryObjectEncoding(valobj
);
6134 dictAdd(dict
,keyobj
,valobj
);
6136 o
->encoding
= REDIS_ENCODING_HT
;
6141 /* ========================= Non type-specific commands ==================== */
6143 static void flushdbCommand(redisClient
*c
) {
6144 server
.dirty
+= dictSize(c
->db
->dict
);
6145 dictEmpty(c
->db
->dict
);
6146 dictEmpty(c
->db
->expires
);
6147 addReply(c
,shared
.ok
);
6150 static void flushallCommand(redisClient
*c
) {
6151 server
.dirty
+= emptyDb();
6152 addReply(c
,shared
.ok
);
6153 if (server
.bgsavechildpid
!= -1) {
6154 kill(server
.bgsavechildpid
,SIGKILL
);
6155 rdbRemoveTempFile(server
.bgsavechildpid
);
6157 rdbSave(server
.dbfilename
);
6161 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6162 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6164 so
->pattern
= pattern
;
6168 /* Return the value associated to the key with a name obtained
6169 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6170 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6174 int prefixlen
, sublen
, postfixlen
;
6175 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6179 char buf
[REDIS_SORTKEY_MAX
+1];
6182 /* If the pattern is "#" return the substitution object itself in order
6183 * to implement the "SORT ... GET #" feature. */
6184 spat
= pattern
->ptr
;
6185 if (spat
[0] == '#' && spat
[1] == '\0') {
6189 /* The substitution object may be specially encoded. If so we create
6190 * a decoded object on the fly. Otherwise getDecodedObject will just
6191 * increment the ref count, that we'll decrement later. */
6192 subst
= getDecodedObject(subst
);
6195 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6196 p
= strchr(spat
,'*');
6198 decrRefCount(subst
);
6203 sublen
= sdslen(ssub
);
6204 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6205 memcpy(keyname
.buf
,spat
,prefixlen
);
6206 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6207 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6208 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6209 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6211 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6212 decrRefCount(subst
);
6214 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6215 return lookupKeyRead(db
,&keyobj
);
6218 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6219 * the additional parameter is not standard but a BSD-specific we have to
6220 * pass sorting parameters via the global 'server' structure */
6221 static int sortCompare(const void *s1
, const void *s2
) {
6222 const redisSortObject
*so1
= s1
, *so2
= s2
;
6225 if (!server
.sort_alpha
) {
6226 /* Numeric sorting. Here it's trivial as we precomputed scores */
6227 if (so1
->u
.score
> so2
->u
.score
) {
6229 } else if (so1
->u
.score
< so2
->u
.score
) {
6235 /* Alphanumeric sorting */
6236 if (server
.sort_bypattern
) {
6237 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6238 /* At least one compare object is NULL */
6239 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6241 else if (so1
->u
.cmpobj
== NULL
)
6246 /* We have both the objects, use strcoll */
6247 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6250 /* Compare elements directly */
6253 dec1
= getDecodedObject(so1
->obj
);
6254 dec2
= getDecodedObject(so2
->obj
);
6255 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6260 return server
.sort_desc
? -cmp
: cmp
;
6263 /* The SORT command is the most complex command in Redis. Warning: this code
6264 * is optimized for speed and a bit less for readability */
6265 static void sortCommand(redisClient
*c
) {
6268 int desc
= 0, alpha
= 0;
6269 int limit_start
= 0, limit_count
= -1, start
, end
;
6270 int j
, dontsort
= 0, vectorlen
;
6271 int getop
= 0; /* GET operation counter */
6272 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6273 redisSortObject
*vector
; /* Resulting vector to sort */
6275 /* Lookup the key to sort. It must be of the right types */
6276 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6277 if (sortval
== NULL
) {
6278 addReply(c
,shared
.nullmultibulk
);
6281 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6282 sortval
->type
!= REDIS_ZSET
)
6284 addReply(c
,shared
.wrongtypeerr
);
6288 /* Create a list of operations to perform for every sorted element.
6289 * Operations can be GET/DEL/INCR/DECR */
6290 operations
= listCreate();
6291 listSetFreeMethod(operations
,zfree
);
6294 /* Now we need to protect sortval incrementing its count, in the future
6295 * SORT may have options able to overwrite/delete keys during the sorting
6296 * and the sorted key itself may get destroied */
6297 incrRefCount(sortval
);
6299 /* The SORT command has an SQL-alike syntax, parse it */
6300 while(j
< c
->argc
) {
6301 int leftargs
= c
->argc
-j
-1;
6302 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6304 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6306 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6308 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6309 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6310 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6312 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6313 storekey
= c
->argv
[j
+1];
6315 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6316 sortby
= c
->argv
[j
+1];
6317 /* If the BY pattern does not contain '*', i.e. it is constant,
6318 * we don't need to sort nor to lookup the weight keys. */
6319 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6321 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6322 listAddNodeTail(operations
,createSortOperation(
6323 REDIS_SORT_GET
,c
->argv
[j
+1]));
6327 decrRefCount(sortval
);
6328 listRelease(operations
);
6329 addReply(c
,shared
.syntaxerr
);
6335 /* Load the sorting vector with all the objects to sort */
6336 switch(sortval
->type
) {
6337 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6338 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6339 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6340 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6342 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6345 if (sortval
->type
== REDIS_LIST
) {
6346 list
*list
= sortval
->ptr
;
6350 listRewind(list
,&li
);
6351 while((ln
= listNext(&li
))) {
6352 robj
*ele
= ln
->value
;
6353 vector
[j
].obj
= ele
;
6354 vector
[j
].u
.score
= 0;
6355 vector
[j
].u
.cmpobj
= NULL
;
6363 if (sortval
->type
== REDIS_SET
) {
6366 zset
*zs
= sortval
->ptr
;
6370 di
= dictGetIterator(set
);
6371 while((setele
= dictNext(di
)) != NULL
) {
6372 vector
[j
].obj
= dictGetEntryKey(setele
);
6373 vector
[j
].u
.score
= 0;
6374 vector
[j
].u
.cmpobj
= NULL
;
6377 dictReleaseIterator(di
);
6379 redisAssert(j
== vectorlen
);
6381 /* Now it's time to load the right scores in the sorting vector */
6382 if (dontsort
== 0) {
6383 for (j
= 0; j
< vectorlen
; j
++) {
6387 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6388 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6390 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6392 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6393 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6395 /* Don't need to decode the object if it's
6396 * integer-encoded (the only encoding supported) so
6397 * far. We can just cast it */
6398 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6399 vector
[j
].u
.score
= (long)byval
->ptr
;
6401 redisAssert(1 != 1);
6406 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6407 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6409 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6410 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6412 redisAssert(1 != 1);
6419 /* We are ready to sort the vector... perform a bit of sanity check
6420 * on the LIMIT option too. We'll use a partial version of quicksort. */
6421 start
= (limit_start
< 0) ? 0 : limit_start
;
6422 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6423 if (start
>= vectorlen
) {
6424 start
= vectorlen
-1;
6427 if (end
>= vectorlen
) end
= vectorlen
-1;
6429 if (dontsort
== 0) {
6430 server
.sort_desc
= desc
;
6431 server
.sort_alpha
= alpha
;
6432 server
.sort_bypattern
= sortby
? 1 : 0;
6433 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6434 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6436 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6439 /* Send command output to the output buffer, performing the specified
6440 * GET/DEL/INCR/DECR operations if any. */
6441 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6442 if (storekey
== NULL
) {
6443 /* STORE option not specified, sent the sorting result to client */
6444 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6445 for (j
= start
; j
<= end
; j
++) {
6449 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6450 listRewind(operations
,&li
);
6451 while((ln
= listNext(&li
))) {
6452 redisSortOperation
*sop
= ln
->value
;
6453 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6456 if (sop
->type
== REDIS_SORT_GET
) {
6457 if (!val
|| val
->type
!= REDIS_STRING
) {
6458 addReply(c
,shared
.nullbulk
);
6460 addReplyBulk(c
,val
);
6463 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6468 robj
*listObject
= createListObject();
6469 list
*listPtr
= (list
*) listObject
->ptr
;
6471 /* STORE option specified, set the sorting result as a List object */
6472 for (j
= start
; j
<= end
; j
++) {
6477 listAddNodeTail(listPtr
,vector
[j
].obj
);
6478 incrRefCount(vector
[j
].obj
);
6480 listRewind(operations
,&li
);
6481 while((ln
= listNext(&li
))) {
6482 redisSortOperation
*sop
= ln
->value
;
6483 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6486 if (sop
->type
== REDIS_SORT_GET
) {
6487 if (!val
|| val
->type
!= REDIS_STRING
) {
6488 listAddNodeTail(listPtr
,createStringObject("",0));
6490 listAddNodeTail(listPtr
,val
);
6494 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6498 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6499 incrRefCount(storekey
);
6501 /* Note: we add 1 because the DB is dirty anyway since even if the
6502 * SORT result is empty a new key is set and maybe the old content
6504 server
.dirty
+= 1+outputlen
;
6505 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6509 decrRefCount(sortval
);
6510 listRelease(operations
);
6511 for (j
= 0; j
< vectorlen
; j
++) {
6512 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6513 decrRefCount(vector
[j
].u
.cmpobj
);
6518 /* Convert an amount of bytes into a human readable string in the form
6519 * of 100B, 2G, 100M, 4K, and so forth. */
6520 static void bytesToHuman(char *s
, unsigned long long n
) {
6525 sprintf(s
,"%lluB",n
);
6527 } else if (n
< (1024*1024)) {
6528 d
= (double)n
/(1024);
6529 sprintf(s
,"%.2fK",d
);
6530 } else if (n
< (1024LL*1024*1024)) {
6531 d
= (double)n
/(1024*1024);
6532 sprintf(s
,"%.2fM",d
);
6533 } else if (n
< (1024LL*1024*1024*1024)) {
6534 d
= (double)n
/(1024LL*1024*1024);
6535 sprintf(s
,"%.2fG",d
);
6539 /* Create the string returned by the INFO command. This is decoupled
6540 * by the INFO command itself as we need to report the same information
6541 * on memory corruption problems. */
6542 static sds
genRedisInfoString(void) {
6544 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6548 bytesToHuman(hmem
,zmalloc_used_memory());
6549 info
= sdscatprintf(sdsempty(),
6550 "redis_version:%s\r\n"
6552 "multiplexing_api:%s\r\n"
6553 "process_id:%ld\r\n"
6554 "uptime_in_seconds:%ld\r\n"
6555 "uptime_in_days:%ld\r\n"
6556 "connected_clients:%d\r\n"
6557 "connected_slaves:%d\r\n"
6558 "blocked_clients:%d\r\n"
6559 "used_memory:%zu\r\n"
6560 "used_memory_human:%s\r\n"
6561 "changes_since_last_save:%lld\r\n"
6562 "bgsave_in_progress:%d\r\n"
6563 "last_save_time:%ld\r\n"
6564 "bgrewriteaof_in_progress:%d\r\n"
6565 "total_connections_received:%lld\r\n"
6566 "total_commands_processed:%lld\r\n"
6567 "expired_keys:%lld\r\n"
6568 "hash_max_zipmap_entries:%ld\r\n"
6569 "hash_max_zipmap_value:%ld\r\n"
6573 (sizeof(long) == 8) ? "64" : "32",
6578 listLength(server
.clients
)-listLength(server
.slaves
),
6579 listLength(server
.slaves
),
6580 server
.blpop_blocked_clients
,
6581 zmalloc_used_memory(),
6584 server
.bgsavechildpid
!= -1,
6586 server
.bgrewritechildpid
!= -1,
6587 server
.stat_numconnections
,
6588 server
.stat_numcommands
,
6589 server
.stat_expiredkeys
,
6590 server
.hash_max_zipmap_entries
,
6591 server
.hash_max_zipmap_value
,
6592 server
.vm_enabled
!= 0,
6593 server
.masterhost
== NULL
? "master" : "slave"
6595 if (server
.masterhost
) {
6596 info
= sdscatprintf(info
,
6597 "master_host:%s\r\n"
6598 "master_port:%d\r\n"
6599 "master_link_status:%s\r\n"
6600 "master_last_io_seconds_ago:%d\r\n"
6603 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6605 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6608 if (server
.vm_enabled
) {
6610 info
= sdscatprintf(info
,
6611 "vm_conf_max_memory:%llu\r\n"
6612 "vm_conf_page_size:%llu\r\n"
6613 "vm_conf_pages:%llu\r\n"
6614 "vm_stats_used_pages:%llu\r\n"
6615 "vm_stats_swapped_objects:%llu\r\n"
6616 "vm_stats_swappin_count:%llu\r\n"
6617 "vm_stats_swappout_count:%llu\r\n"
6618 "vm_stats_io_newjobs_len:%lu\r\n"
6619 "vm_stats_io_processing_len:%lu\r\n"
6620 "vm_stats_io_processed_len:%lu\r\n"
6621 "vm_stats_io_active_threads:%lu\r\n"
6622 "vm_stats_blocked_clients:%lu\r\n"
6623 ,(unsigned long long) server
.vm_max_memory
,
6624 (unsigned long long) server
.vm_page_size
,
6625 (unsigned long long) server
.vm_pages
,
6626 (unsigned long long) server
.vm_stats_used_pages
,
6627 (unsigned long long) server
.vm_stats_swapped_objects
,
6628 (unsigned long long) server
.vm_stats_swapins
,
6629 (unsigned long long) server
.vm_stats_swapouts
,
6630 (unsigned long) listLength(server
.io_newjobs
),
6631 (unsigned long) listLength(server
.io_processing
),
6632 (unsigned long) listLength(server
.io_processed
),
6633 (unsigned long) server
.io_active_threads
,
6634 (unsigned long) server
.vm_blocked_clients
6638 for (j
= 0; j
< server
.dbnum
; j
++) {
6639 long long keys
, vkeys
;
6641 keys
= dictSize(server
.db
[j
].dict
);
6642 vkeys
= dictSize(server
.db
[j
].expires
);
6643 if (keys
|| vkeys
) {
6644 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6651 static void infoCommand(redisClient
*c
) {
6652 sds info
= genRedisInfoString();
6653 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6654 (unsigned long)sdslen(info
)));
6655 addReplySds(c
,info
);
6656 addReply(c
,shared
.crlf
);
6659 static void monitorCommand(redisClient
*c
) {
6660 /* ignore MONITOR if aleady slave or in monitor mode */
6661 if (c
->flags
& REDIS_SLAVE
) return;
6663 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6665 listAddNodeTail(server
.monitors
,c
);
6666 addReply(c
,shared
.ok
);
6669 /* ================================= Expire ================================= */
6670 static int removeExpire(redisDb
*db
, robj
*key
) {
6671 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6678 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6679 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6687 /* Return the expire time of the specified key, or -1 if no expire
6688 * is associated with this key (i.e. the key is non volatile) */
6689 static time_t getExpire(redisDb
*db
, robj
*key
) {
6692 /* No expire? return ASAP */
6693 if (dictSize(db
->expires
) == 0 ||
6694 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6696 return (time_t) dictGetEntryVal(de
);
6699 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6703 /* No expire? return ASAP */
6704 if (dictSize(db
->expires
) == 0 ||
6705 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6707 /* Lookup the expire */
6708 when
= (time_t) dictGetEntryVal(de
);
6709 if (time(NULL
) <= when
) return 0;
6711 /* Delete the key */
6712 dictDelete(db
->expires
,key
);
6713 server
.stat_expiredkeys
++;
6714 return dictDelete(db
->dict
,key
) == DICT_OK
;
6717 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6720 /* No expire? return ASAP */
6721 if (dictSize(db
->expires
) == 0 ||
6722 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6724 /* Delete the key */
6726 server
.stat_expiredkeys
++;
6727 dictDelete(db
->expires
,key
);
6728 return dictDelete(db
->dict
,key
) == DICT_OK
;
6731 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6734 de
= dictFind(c
->db
->dict
,key
);
6736 addReply(c
,shared
.czero
);
6740 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6741 addReply(c
, shared
.cone
);
6744 time_t when
= time(NULL
)+seconds
;
6745 if (setExpire(c
->db
,key
,when
)) {
6746 addReply(c
,shared
.cone
);
6749 addReply(c
,shared
.czero
);
6755 static void expireCommand(redisClient
*c
) {
6756 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6759 static void expireatCommand(redisClient
*c
) {
6760 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6763 static void ttlCommand(redisClient
*c
) {
6767 expire
= getExpire(c
->db
,c
->argv
[1]);
6769 ttl
= (int) (expire
-time(NULL
));
6770 if (ttl
< 0) ttl
= -1;
6772 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6775 /* ================================ MULTI/EXEC ============================== */
6777 /* Client state initialization for MULTI/EXEC */
6778 static void initClientMultiState(redisClient
*c
) {
6779 c
->mstate
.commands
= NULL
;
6780 c
->mstate
.count
= 0;
6783 /* Release all the resources associated with MULTI/EXEC state */
6784 static void freeClientMultiState(redisClient
*c
) {
6787 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6789 multiCmd
*mc
= c
->mstate
.commands
+j
;
6791 for (i
= 0; i
< mc
->argc
; i
++)
6792 decrRefCount(mc
->argv
[i
]);
6795 zfree(c
->mstate
.commands
);
6798 /* Add a new command into the MULTI commands queue */
6799 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6803 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6804 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6805 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6808 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6809 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6810 for (j
= 0; j
< c
->argc
; j
++)
6811 incrRefCount(mc
->argv
[j
]);
6815 static void multiCommand(redisClient
*c
) {
6816 c
->flags
|= REDIS_MULTI
;
6817 addReply(c
,shared
.ok
);
6820 static void discardCommand(redisClient
*c
) {
6821 if (!(c
->flags
& REDIS_MULTI
)) {
6822 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6826 freeClientMultiState(c
);
6827 initClientMultiState(c
);
6828 c
->flags
&= (~REDIS_MULTI
);
6829 addReply(c
,shared
.ok
);
6832 static void execCommand(redisClient
*c
) {
6837 if (!(c
->flags
& REDIS_MULTI
)) {
6838 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6842 orig_argv
= c
->argv
;
6843 orig_argc
= c
->argc
;
6844 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6845 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6846 c
->argc
= c
->mstate
.commands
[j
].argc
;
6847 c
->argv
= c
->mstate
.commands
[j
].argv
;
6848 call(c
,c
->mstate
.commands
[j
].cmd
);
6850 c
->argv
= orig_argv
;
6851 c
->argc
= orig_argc
;
6852 freeClientMultiState(c
);
6853 initClientMultiState(c
);
6854 c
->flags
&= (~REDIS_MULTI
);
6857 /* =========================== Blocking Operations ========================= */
6859 /* Currently Redis blocking operations support is limited to list POP ops,
6860 * so the current implementation is not fully generic, but it is also not
6861 * completely specific so it will not require a rewrite to support new
6862 * kind of blocking operations in the future.
6864 * Still it's important to note that list blocking operations can be already
6865 * used as a notification mechanism in order to implement other blocking
6866 * operations at application level, so there must be a very strong evidence
6867 * of usefulness and generality before new blocking operations are implemented.
6869 * This is how the current blocking POP works, we use BLPOP as example:
6870 * - If the user calls BLPOP and the key exists and contains a non empty list
6871 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6872 * if there is not to block.
6873 * - If instead BLPOP is called and the key does not exists or the list is
6874 * empty we need to block. In order to do so we remove the notification for
6875 * new data to read in the client socket (so that we'll not serve new
6876 * requests if the blocking request is not served). Also we put the client
6877 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6878 * blocking for this keys.
6879 * - If a PUSH operation against a key with blocked clients waiting is
6880 * performed, we serve the first in the list: basically instead to push
6881 * the new element inside the list we return it to the (first / oldest)
6882 * blocking client, unblock the client, and remove it form the list.
6884 * The above comment and the source code should be enough in order to understand
6885 * the implementation and modify / fix it later.
6888 /* Set a client in blocking mode for the specified key, with the specified
6890 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6895 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6896 c
->blockingkeysnum
= numkeys
;
6897 c
->blockingto
= timeout
;
6898 for (j
= 0; j
< numkeys
; j
++) {
6899 /* Add the key in the client structure, to map clients -> keys */
6900 c
->blockingkeys
[j
] = keys
[j
];
6901 incrRefCount(keys
[j
]);
6903 /* And in the other "side", to map keys -> clients */
6904 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
6908 /* For every key we take a list of clients blocked for it */
6910 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
6911 incrRefCount(keys
[j
]);
6912 assert(retval
== DICT_OK
);
6914 l
= dictGetEntryVal(de
);
6916 listAddNodeTail(l
,c
);
6918 /* Mark the client as a blocked client */
6919 c
->flags
|= REDIS_BLOCKED
;
6920 server
.blpop_blocked_clients
++;
6923 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
6924 static void unblockClientWaitingData(redisClient
*c
) {
6929 assert(c
->blockingkeys
!= NULL
);
6930 /* The client may wait for multiple keys, so unblock it for every key. */
6931 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
6932 /* Remove this client from the list of clients waiting for this key. */
6933 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6935 l
= dictGetEntryVal(de
);
6936 listDelNode(l
,listSearchKey(l
,c
));
6937 /* If the list is empty we need to remove it to avoid wasting memory */
6938 if (listLength(l
) == 0)
6939 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6940 decrRefCount(c
->blockingkeys
[j
]);
6942 /* Cleanup the client structure */
6943 zfree(c
->blockingkeys
);
6944 c
->blockingkeys
= NULL
;
6945 c
->flags
&= (~REDIS_BLOCKED
);
6946 server
.blpop_blocked_clients
--;
6947 /* We want to process data if there is some command waiting
6948 * in the input buffer. Note that this is safe even if
6949 * unblockClientWaitingData() gets called from freeClient() because
6950 * freeClient() will be smart enough to call this function
6951 * *after* c->querybuf was set to NULL. */
6952 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
6955 /* This should be called from any function PUSHing into lists.
6956 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
6957 * 'ele' is the element pushed.
6959 * If the function returns 0 there was no client waiting for a list push
6962 * If the function returns 1 there was a client waiting for a list push
6963 * against this key, the element was passed to this client thus it's not
6964 * needed to actually add it to the list and the caller should return asap. */
6965 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
6966 struct dictEntry
*de
;
6967 redisClient
*receiver
;
6971 de
= dictFind(c
->db
->blockingkeys
,key
);
6972 if (de
== NULL
) return 0;
6973 l
= dictGetEntryVal(de
);
6976 receiver
= ln
->value
;
6978 addReplySds(receiver
,sdsnew("*2\r\n"));
6979 addReplyBulk(receiver
,key
);
6980 addReplyBulk(receiver
,ele
);
6981 unblockClientWaitingData(receiver
);
6985 /* Blocking RPOP/LPOP */
6986 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
6991 for (j
= 1; j
< c
->argc
-1; j
++) {
6992 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6994 if (o
->type
!= REDIS_LIST
) {
6995 addReply(c
,shared
.wrongtypeerr
);
6998 list
*list
= o
->ptr
;
6999 if (listLength(list
) != 0) {
7000 /* If the list contains elements fall back to the usual
7001 * non-blocking POP operation */
7002 robj
*argv
[2], **orig_argv
;
7005 /* We need to alter the command arguments before to call
7006 * popGenericCommand() as the command takes a single key. */
7007 orig_argv
= c
->argv
;
7008 orig_argc
= c
->argc
;
7009 argv
[1] = c
->argv
[j
];
7013 /* Also the return value is different, we need to output
7014 * the multi bulk reply header and the key name. The
7015 * "real" command will add the last element (the value)
7016 * for us. If this souds like an hack to you it's just
7017 * because it is... */
7018 addReplySds(c
,sdsnew("*2\r\n"));
7019 addReplyBulk(c
,argv
[1]);
7020 popGenericCommand(c
,where
);
7022 /* Fix the client structure with the original stuff */
7023 c
->argv
= orig_argv
;
7024 c
->argc
= orig_argc
;
7030 /* If the list is empty or the key does not exists we must block */
7031 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7032 if (timeout
> 0) timeout
+= time(NULL
);
7033 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7036 static void blpopCommand(redisClient
*c
) {
7037 blockingPopGenericCommand(c
,REDIS_HEAD
);
7040 static void brpopCommand(redisClient
*c
) {
7041 blockingPopGenericCommand(c
,REDIS_TAIL
);
7044 /* =============================== Replication ============================= */
7046 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7047 ssize_t nwritten
, ret
= size
;
7048 time_t start
= time(NULL
);
7052 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7053 nwritten
= write(fd
,ptr
,size
);
7054 if (nwritten
== -1) return -1;
7058 if ((time(NULL
)-start
) > timeout
) {
7066 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7067 ssize_t nread
, totread
= 0;
7068 time_t start
= time(NULL
);
7072 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7073 nread
= read(fd
,ptr
,size
);
7074 if (nread
== -1) return -1;
7079 if ((time(NULL
)-start
) > timeout
) {
7087 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7094 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7097 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7108 static void syncCommand(redisClient
*c
) {
7109 /* ignore SYNC if aleady slave or in monitor mode */
7110 if (c
->flags
& REDIS_SLAVE
) return;
7112 /* SYNC can't be issued when the server has pending data to send to
7113 * the client about already issued commands. We need a fresh reply
7114 * buffer registering the differences between the BGSAVE and the current
7115 * dataset, so that we can copy to other slaves if needed. */
7116 if (listLength(c
->reply
) != 0) {
7117 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7121 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7122 /* Here we need to check if there is a background saving operation
7123 * in progress, or if it is required to start one */
7124 if (server
.bgsavechildpid
!= -1) {
7125 /* Ok a background save is in progress. Let's check if it is a good
7126 * one for replication, i.e. if there is another slave that is
7127 * registering differences since the server forked to save */
7132 listRewind(server
.slaves
,&li
);
7133 while((ln
= listNext(&li
))) {
7135 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7138 /* Perfect, the server is already registering differences for
7139 * another slave. Set the right state, and copy the buffer. */
7140 listRelease(c
->reply
);
7141 c
->reply
= listDup(slave
->reply
);
7142 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7143 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7145 /* No way, we need to wait for the next BGSAVE in order to
7146 * register differences */
7147 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7148 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7151 /* Ok we don't have a BGSAVE in progress, let's start one */
7152 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7153 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7154 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7155 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7158 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7161 c
->flags
|= REDIS_SLAVE
;
7163 listAddNodeTail(server
.slaves
,c
);
7167 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7168 redisClient
*slave
= privdata
;
7170 REDIS_NOTUSED(mask
);
7171 char buf
[REDIS_IOBUF_LEN
];
7172 ssize_t nwritten
, buflen
;
7174 if (slave
->repldboff
== 0) {
7175 /* Write the bulk write count before to transfer the DB. In theory here
7176 * we don't know how much room there is in the output buffer of the
7177 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7178 * operations) will never be smaller than the few bytes we need. */
7181 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7183 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7191 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7192 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7194 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7195 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7199 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7200 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7205 slave
->repldboff
+= nwritten
;
7206 if (slave
->repldboff
== slave
->repldbsize
) {
7207 close(slave
->repldbfd
);
7208 slave
->repldbfd
= -1;
7209 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7210 slave
->replstate
= REDIS_REPL_ONLINE
;
7211 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7212 sendReplyToClient
, slave
) == AE_ERR
) {
7216 addReplySds(slave
,sdsempty());
7217 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7221 /* This function is called at the end of every backgrond saving.
7222 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7223 * otherwise REDIS_ERR is passed to the function.
7225 * The goal of this function is to handle slaves waiting for a successful
7226 * background saving in order to perform non-blocking synchronization. */
7227 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7229 int startbgsave
= 0;
7232 listRewind(server
.slaves
,&li
);
7233 while((ln
= listNext(&li
))) {
7234 redisClient
*slave
= ln
->value
;
7236 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7238 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7239 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7240 struct redis_stat buf
;
7242 if (bgsaveerr
!= REDIS_OK
) {
7244 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7247 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7248 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7250 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7253 slave
->repldboff
= 0;
7254 slave
->repldbsize
= buf
.st_size
;
7255 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7256 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7257 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7264 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7267 listRewind(server
.slaves
,&li
);
7268 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7269 while((ln
= listNext(&li
))) {
7270 redisClient
*slave
= ln
->value
;
7272 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7279 static int syncWithMaster(void) {
7280 char buf
[1024], tmpfile
[256], authcmd
[1024];
7282 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7283 int dfd
, maxtries
= 5;
7286 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7291 /* AUTH with the master if required. */
7292 if(server
.masterauth
) {
7293 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7294 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7296 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7300 /* Read the AUTH result. */
7301 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7303 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7307 if (buf
[0] != '+') {
7309 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7314 /* Issue the SYNC command */
7315 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7317 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7321 /* Read the bulk write count */
7322 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7324 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7328 if (buf
[0] != '$') {
7330 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7333 dumpsize
= strtol(buf
+1,NULL
,10);
7334 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7335 /* Read the bulk write data on a temp file */
7337 snprintf(tmpfile
,256,
7338 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7339 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7340 if (dfd
!= -1) break;
7345 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7349 int nread
, nwritten
;
7351 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7353 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7359 nwritten
= write(dfd
,buf
,nread
);
7360 if (nwritten
== -1) {
7361 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7369 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7370 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7376 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7377 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7381 server
.master
= createClient(fd
);
7382 server
.master
->flags
|= REDIS_MASTER
;
7383 server
.master
->authenticated
= 1;
7384 server
.replstate
= REDIS_REPL_CONNECTED
;
7388 static void slaveofCommand(redisClient
*c
) {
7389 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7390 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7391 if (server
.masterhost
) {
7392 sdsfree(server
.masterhost
);
7393 server
.masterhost
= NULL
;
7394 if (server
.master
) freeClient(server
.master
);
7395 server
.replstate
= REDIS_REPL_NONE
;
7396 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7399 sdsfree(server
.masterhost
);
7400 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7401 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7402 if (server
.master
) freeClient(server
.master
);
7403 server
.replstate
= REDIS_REPL_CONNECT
;
7404 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7405 server
.masterhost
, server
.masterport
);
7407 addReply(c
,shared
.ok
);
7410 /* ============================ Maxmemory directive ======================== */
7412 /* Try to free one object form the pre-allocated objects free list.
7413 * This is useful under low mem conditions as by default we take 1 million
7414 * free objects allocated. On success REDIS_OK is returned, otherwise
7416 static int tryFreeOneObjectFromFreelist(void) {
7419 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7420 if (listLength(server
.objfreelist
)) {
7421 listNode
*head
= listFirst(server
.objfreelist
);
7422 o
= listNodeValue(head
);
7423 listDelNode(server
.objfreelist
,head
);
7424 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7428 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7433 /* This function gets called when 'maxmemory' is set on the config file to limit
7434 * the max memory used by the server, and we are out of memory.
7435 * This function will try to, in order:
7437 * - Free objects from the free list
7438 * - Try to remove keys with an EXPIRE set
7440 * It is not possible to free enough memory to reach used-memory < maxmemory
7441 * the server will start refusing commands that will enlarge even more the
7444 static void freeMemoryIfNeeded(void) {
7445 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7446 int j
, k
, freed
= 0;
7448 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7449 for (j
= 0; j
< server
.dbnum
; j
++) {
7451 robj
*minkey
= NULL
;
7452 struct dictEntry
*de
;
7454 if (dictSize(server
.db
[j
].expires
)) {
7456 /* From a sample of three keys drop the one nearest to
7457 * the natural expire */
7458 for (k
= 0; k
< 3; k
++) {
7461 de
= dictGetRandomKey(server
.db
[j
].expires
);
7462 t
= (time_t) dictGetEntryVal(de
);
7463 if (minttl
== -1 || t
< minttl
) {
7464 minkey
= dictGetEntryKey(de
);
7468 deleteKey(server
.db
+j
,minkey
);
7471 if (!freed
) return; /* nothing to free... */
7475 /* ============================== Append Only file ========================== */
7477 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7478 sds buf
= sdsempty();
7484 /* The DB this command was targetting is not the same as the last command
7485 * we appendend. To issue a SELECT command is needed. */
7486 if (dictid
!= server
.appendseldb
) {
7489 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7490 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7491 (unsigned long)strlen(seldb
),seldb
);
7492 server
.appendseldb
= dictid
;
7495 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7496 * EXPIREs into EXPIREATs calls */
7497 if (cmd
->proc
== expireCommand
) {
7500 tmpargv
[0] = createStringObject("EXPIREAT",8);
7501 tmpargv
[1] = argv
[1];
7502 incrRefCount(argv
[1]);
7503 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7504 tmpargv
[2] = createObject(REDIS_STRING
,
7505 sdscatprintf(sdsempty(),"%ld",when
));
7509 /* Append the actual command */
7510 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7511 for (j
= 0; j
< argc
; j
++) {
7514 o
= getDecodedObject(o
);
7515 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7516 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7517 buf
= sdscatlen(buf
,"\r\n",2);
7521 /* Free the objects from the modified argv for EXPIREAT */
7522 if (cmd
->proc
== expireCommand
) {
7523 for (j
= 0; j
< 3; j
++)
7524 decrRefCount(argv
[j
]);
7527 /* We want to perform a single write. This should be guaranteed atomic
7528 * at least if the filesystem we are writing is a real physical one.
7529 * While this will save us against the server being killed I don't think
7530 * there is much to do about the whole server stopping for power problems
7532 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7533 if (nwritten
!= (signed)sdslen(buf
)) {
7534 /* Ooops, we are in troubles. The best thing to do for now is
7535 * to simply exit instead to give the illusion that everything is
7536 * working as expected. */
7537 if (nwritten
== -1) {
7538 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7540 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7544 /* If a background append only file rewriting is in progress we want to
7545 * accumulate the differences between the child DB and the current one
7546 * in a buffer, so that when the child process will do its work we
7547 * can append the differences to the new append only file. */
7548 if (server
.bgrewritechildpid
!= -1)
7549 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7553 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7554 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7555 now
-server
.lastfsync
> 1))
7557 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7558 server
.lastfsync
= now
;
7562 /* In Redis commands are always executed in the context of a client, so in
7563 * order to load the append only file we need to create a fake client. */
7564 static struct redisClient
*createFakeClient(void) {
7565 struct redisClient
*c
= zmalloc(sizeof(*c
));
7569 c
->querybuf
= sdsempty();
7573 /* We set the fake client as a slave waiting for the synchronization
7574 * so that Redis will not try to send replies to this client. */
7575 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7576 c
->reply
= listCreate();
7577 listSetFreeMethod(c
->reply
,decrRefCount
);
7578 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7582 static void freeFakeClient(struct redisClient
*c
) {
7583 sdsfree(c
->querybuf
);
7584 listRelease(c
->reply
);
7588 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7589 * error (the append only file is zero-length) REDIS_ERR is returned. On
7590 * fatal error an error message is logged and the program exists. */
7591 int loadAppendOnlyFile(char *filename
) {
7592 struct redisClient
*fakeClient
;
7593 FILE *fp
= fopen(filename
,"r");
7594 struct redis_stat sb
;
7595 unsigned long long loadedkeys
= 0;
7597 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7601 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7605 fakeClient
= createFakeClient();
7612 struct redisCommand
*cmd
;
7614 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7620 if (buf
[0] != '*') goto fmterr
;
7622 argv
= zmalloc(sizeof(robj
*)*argc
);
7623 for (j
= 0; j
< argc
; j
++) {
7624 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7625 if (buf
[0] != '$') goto fmterr
;
7626 len
= strtol(buf
+1,NULL
,10);
7627 argsds
= sdsnewlen(NULL
,len
);
7628 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7629 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7630 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7633 /* Command lookup */
7634 cmd
= lookupCommand(argv
[0]->ptr
);
7636 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7639 /* Try object sharing and encoding */
7640 if (server
.shareobjects
) {
7642 for(j
= 1; j
< argc
; j
++)
7643 argv
[j
] = tryObjectSharing(argv
[j
]);
7645 if (cmd
->flags
& REDIS_CMD_BULK
)
7646 tryObjectEncoding(argv
[argc
-1]);
7647 /* Run the command in the context of a fake client */
7648 fakeClient
->argc
= argc
;
7649 fakeClient
->argv
= argv
;
7650 cmd
->proc(fakeClient
);
7651 /* Discard the reply objects list from the fake client */
7652 while(listLength(fakeClient
->reply
))
7653 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7654 /* Clean up, ready for the next command */
7655 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7657 /* Handle swapping while loading big datasets when VM is on */
7659 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7660 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7661 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7666 freeFakeClient(fakeClient
);
7671 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7673 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7677 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7681 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7682 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7686 /* Avoid the incr/decr ref count business if possible to help
7687 * copy-on-write (we are often in a child process when this function
7689 * Also makes sure that key objects don't get incrRefCount-ed when VM
7691 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7692 obj
= getDecodedObject(obj
);
7695 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7696 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7697 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7699 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7700 if (decrrc
) decrRefCount(obj
);
7703 if (decrrc
) decrRefCount(obj
);
7707 /* Write binary-safe string into a file in the bulkformat
7708 * $<count>\r\n<payload>\r\n */
7709 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7712 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7713 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7714 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7715 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7719 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7720 static int fwriteBulkDouble(FILE *fp
, double d
) {
7721 char buf
[128], dbuf
[128];
7723 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7724 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7725 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7726 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7730 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7731 static int fwriteBulkLong(FILE *fp
, long l
) {
7732 char buf
[128], lbuf
[128];
7734 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7735 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7736 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7737 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7741 /* Write a sequence of commands able to fully rebuild the dataset into
7742 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7743 static int rewriteAppendOnlyFile(char *filename
) {
7744 dictIterator
*di
= NULL
;
7749 time_t now
= time(NULL
);
7751 /* Note that we have to use a different temp name here compared to the
7752 * one used by rewriteAppendOnlyFileBackground() function. */
7753 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7754 fp
= fopen(tmpfile
,"w");
7756 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7759 for (j
= 0; j
< server
.dbnum
; j
++) {
7760 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7761 redisDb
*db
= server
.db
+j
;
7763 if (dictSize(d
) == 0) continue;
7764 di
= dictGetIterator(d
);
7770 /* SELECT the new DB */
7771 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7772 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7774 /* Iterate this DB writing every entry */
7775 while((de
= dictNext(di
)) != NULL
) {
7780 key
= dictGetEntryKey(de
);
7781 /* If the value for this key is swapped, load a preview in memory.
7782 * We use a "swapped" flag to remember if we need to free the
7783 * value object instead to just increment the ref count anyway
7784 * in order to avoid copy-on-write of pages if we are forked() */
7785 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7786 key
->storage
== REDIS_VM_SWAPPING
) {
7787 o
= dictGetEntryVal(de
);
7790 o
= vmPreviewObject(key
);
7793 expiretime
= getExpire(db
,key
);
7795 /* Save the key and associated value */
7796 if (o
->type
== REDIS_STRING
) {
7797 /* Emit a SET command */
7798 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7799 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7801 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7802 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7803 } else if (o
->type
== REDIS_LIST
) {
7804 /* Emit the RPUSHes needed to rebuild the list */
7805 list
*list
= o
->ptr
;
7809 listRewind(list
,&li
);
7810 while((ln
= listNext(&li
))) {
7811 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7812 robj
*eleobj
= listNodeValue(ln
);
7814 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7815 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7816 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7818 } else if (o
->type
== REDIS_SET
) {
7819 /* Emit the SADDs needed to rebuild the set */
7821 dictIterator
*di
= dictGetIterator(set
);
7824 while((de
= dictNext(di
)) != NULL
) {
7825 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7826 robj
*eleobj
= dictGetEntryKey(de
);
7828 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7829 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7830 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7832 dictReleaseIterator(di
);
7833 } else if (o
->type
== REDIS_ZSET
) {
7834 /* Emit the ZADDs needed to rebuild the sorted set */
7836 dictIterator
*di
= dictGetIterator(zs
->dict
);
7839 while((de
= dictNext(di
)) != NULL
) {
7840 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7841 robj
*eleobj
= dictGetEntryKey(de
);
7842 double *score
= dictGetEntryVal(de
);
7844 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7845 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7846 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7847 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7849 dictReleaseIterator(di
);
7850 } else if (o
->type
== REDIS_HASH
) {
7851 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7853 /* Emit the HSETs needed to rebuild the hash */
7854 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7855 unsigned char *p
= zipmapRewind(o
->ptr
);
7856 unsigned char *field
, *val
;
7857 unsigned int flen
, vlen
;
7859 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7860 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7861 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7862 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7864 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7868 dictIterator
*di
= dictGetIterator(o
->ptr
);
7871 while((de
= dictNext(di
)) != NULL
) {
7872 robj
*field
= dictGetEntryKey(de
);
7873 robj
*val
= dictGetEntryVal(de
);
7875 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7876 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7877 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7878 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7880 dictReleaseIterator(di
);
7885 /* Save the expire time */
7886 if (expiretime
!= -1) {
7887 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7888 /* If this key is already expired skip it */
7889 if (expiretime
< now
) continue;
7890 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7891 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7892 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7894 if (swapped
) decrRefCount(o
);
7896 dictReleaseIterator(di
);
7899 /* Make sure data will not remain on the OS's output buffers */
7904 /* Use RENAME to make sure the DB file is changed atomically only
7905 * if the generate DB file is ok. */
7906 if (rename(tmpfile
,filename
) == -1) {
7907 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
7911 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
7917 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
7918 if (di
) dictReleaseIterator(di
);
7922 /* This is how rewriting of the append only file in background works:
7924 * 1) The user calls BGREWRITEAOF
7925 * 2) Redis calls this function, that forks():
7926 * 2a) the child rewrite the append only file in a temp file.
7927 * 2b) the parent accumulates differences in server.bgrewritebuf.
7928 * 3) When the child finished '2a' exists.
7929 * 4) The parent will trap the exit code, if it's OK, will append the
7930 * data accumulated into server.bgrewritebuf into the temp file, and
7931 * finally will rename(2) the temp file in the actual file name.
7932 * The the new file is reopened as the new append only file. Profit!
7934 static int rewriteAppendOnlyFileBackground(void) {
7937 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
7938 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
7939 if ((childpid
= fork()) == 0) {
7943 if (server
.vm_enabled
) vmReopenSwapFile();
7945 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
7946 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
7953 if (childpid
== -1) {
7954 redisLog(REDIS_WARNING
,
7955 "Can't rewrite append only file in background: fork: %s",
7959 redisLog(REDIS_NOTICE
,
7960 "Background append only file rewriting started by pid %d",childpid
);
7961 server
.bgrewritechildpid
= childpid
;
7962 /* We set appendseldb to -1 in order to force the next call to the
7963 * feedAppendOnlyFile() to issue a SELECT command, so the differences
7964 * accumulated by the parent into server.bgrewritebuf will start
7965 * with a SELECT statement and it will be safe to merge. */
7966 server
.appendseldb
= -1;
7969 return REDIS_OK
; /* unreached */
7972 static void bgrewriteaofCommand(redisClient
*c
) {
7973 if (server
.bgrewritechildpid
!= -1) {
7974 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
7977 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
7978 char *status
= "+Background append only file rewriting started\r\n";
7979 addReplySds(c
,sdsnew(status
));
7981 addReply(c
,shared
.err
);
7985 static void aofRemoveTempFile(pid_t childpid
) {
7988 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
7992 /* Virtual Memory is composed mainly of two subsystems:
7993 * - Blocking Virutal Memory
7994 * - Threaded Virtual Memory I/O
7995 * The two parts are not fully decoupled, but functions are split among two
7996 * different sections of the source code (delimited by comments) in order to
7997 * make more clear what functionality is about the blocking VM and what about
7998 * the threaded (not blocking) VM.
8002 * Redis VM is a blocking VM (one that blocks reading swapped values from
8003 * disk into memory when a value swapped out is needed in memory) that is made
8004 * unblocking by trying to examine the command argument vector in order to
8005 * load in background values that will likely be needed in order to exec
8006 * the command. The command is executed only once all the relevant keys
8007 * are loaded into memory.
8009 * This basically is almost as simple of a blocking VM, but almost as parallel
8010 * as a fully non-blocking VM.
8013 /* =================== Virtual Memory - Blocking Side ====================== */
8015 /* substitute the first occurrence of '%p' with the process pid in the
8016 * swap file name. */
8017 static void expandVmSwapFilename(void) {
8018 char *p
= strstr(server
.vm_swap_file
,"%p");
8024 new = sdscat(new,server
.vm_swap_file
);
8025 new = sdscatprintf(new,"%ld",(long) getpid());
8026 new = sdscat(new,p
+2);
8027 zfree(server
.vm_swap_file
);
8028 server
.vm_swap_file
= new;
8031 static void vmInit(void) {
8036 if (server
.vm_max_threads
!= 0)
8037 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8039 expandVmSwapFilename();
8040 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8041 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8042 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8044 if (server
.vm_fp
== NULL
) {
8045 redisLog(REDIS_WARNING
,
8046 "Impossible to open the swap file: %s. Exiting.",
8050 server
.vm_fd
= fileno(server
.vm_fp
);
8051 server
.vm_next_page
= 0;
8052 server
.vm_near_pages
= 0;
8053 server
.vm_stats_used_pages
= 0;
8054 server
.vm_stats_swapped_objects
= 0;
8055 server
.vm_stats_swapouts
= 0;
8056 server
.vm_stats_swapins
= 0;
8057 totsize
= server
.vm_pages
*server
.vm_page_size
;
8058 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8059 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8060 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8064 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8066 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8067 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8068 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8069 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8071 /* Initialize threaded I/O (used by Virtual Memory) */
8072 server
.io_newjobs
= listCreate();
8073 server
.io_processing
= listCreate();
8074 server
.io_processed
= listCreate();
8075 server
.io_ready_clients
= listCreate();
8076 pthread_mutex_init(&server
.io_mutex
,NULL
);
8077 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8078 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8079 server
.io_active_threads
= 0;
8080 if (pipe(pipefds
) == -1) {
8081 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8085 server
.io_ready_pipe_read
= pipefds
[0];
8086 server
.io_ready_pipe_write
= pipefds
[1];
8087 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8088 /* LZF requires a lot of stack */
8089 pthread_attr_init(&server
.io_threads_attr
);
8090 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8091 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8092 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8093 /* Listen for events in the threaded I/O pipe */
8094 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8095 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8096 oom("creating file event");
8099 /* Mark the page as used */
8100 static void vmMarkPageUsed(off_t page
) {
8101 off_t byte
= page
/8;
8103 redisAssert(vmFreePage(page
) == 1);
8104 server
.vm_bitmap
[byte
] |= 1<<bit
;
8107 /* Mark N contiguous pages as used, with 'page' being the first. */
8108 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8111 for (j
= 0; j
< count
; j
++)
8112 vmMarkPageUsed(page
+j
);
8113 server
.vm_stats_used_pages
+= count
;
8114 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8115 (long long)count
, (long long)page
);
8118 /* Mark the page as free */
8119 static void vmMarkPageFree(off_t page
) {
8120 off_t byte
= page
/8;
8122 redisAssert(vmFreePage(page
) == 0);
8123 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8126 /* Mark N contiguous pages as free, with 'page' being the first. */
8127 static void vmMarkPagesFree(off_t page
, off_t count
) {
8130 for (j
= 0; j
< count
; j
++)
8131 vmMarkPageFree(page
+j
);
8132 server
.vm_stats_used_pages
-= count
;
8133 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8134 (long long)count
, (long long)page
);
8137 /* Test if the page is free */
8138 static int vmFreePage(off_t page
) {
8139 off_t byte
= page
/8;
8141 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8144 /* Find N contiguous free pages storing the first page of the cluster in *first.
8145 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8146 * REDIS_ERR is returned.
8148 * This function uses a simple algorithm: we try to allocate
8149 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8150 * again from the start of the swap file searching for free spaces.
8152 * If it looks pretty clear that there are no free pages near our offset
8153 * we try to find less populated places doing a forward jump of
8154 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8155 * without hurry, and then we jump again and so forth...
8157 * This function can be improved using a free list to avoid to guess
8158 * too much, since we could collect data about freed pages.
8160 * note: I implemented this function just after watching an episode of
8161 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8163 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8164 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8166 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8167 server
.vm_near_pages
= 0;
8168 server
.vm_next_page
= 0;
8170 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8171 base
= server
.vm_next_page
;
8173 while(offset
< server
.vm_pages
) {
8174 off_t
this = base
+offset
;
8176 /* If we overflow, restart from page zero */
8177 if (this >= server
.vm_pages
) {
8178 this -= server
.vm_pages
;
8180 /* Just overflowed, what we found on tail is no longer
8181 * interesting, as it's no longer contiguous. */
8185 if (vmFreePage(this)) {
8186 /* This is a free page */
8188 /* Already got N free pages? Return to the caller, with success */
8190 *first
= this-(n
-1);
8191 server
.vm_next_page
= this+1;
8192 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8196 /* The current one is not a free page */
8200 /* Fast-forward if the current page is not free and we already
8201 * searched enough near this place. */
8203 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8204 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8206 /* Note that even if we rewind after the jump, we are don't need
8207 * to make sure numfree is set to zero as we only jump *if* it
8208 * is set to zero. */
8210 /* Otherwise just check the next page */
8217 /* Write the specified object at the specified page of the swap file */
8218 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8219 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8220 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8221 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8222 redisLog(REDIS_WARNING
,
8223 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8227 rdbSaveObject(server
.vm_fp
,o
);
8228 fflush(server
.vm_fp
);
8229 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8233 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8234 * needed to later retrieve the object into the key object.
8235 * If we can't find enough contiguous empty pages to swap the object on disk
8236 * REDIS_ERR is returned. */
8237 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8238 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8241 assert(key
->storage
== REDIS_VM_MEMORY
);
8242 assert(key
->refcount
== 1);
8243 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8244 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8245 key
->vm
.page
= page
;
8246 key
->vm
.usedpages
= pages
;
8247 key
->storage
= REDIS_VM_SWAPPED
;
8248 key
->vtype
= val
->type
;
8249 decrRefCount(val
); /* Deallocate the object from memory. */
8250 vmMarkPagesUsed(page
,pages
);
8251 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8252 (unsigned char*) key
->ptr
,
8253 (unsigned long long) page
, (unsigned long long) pages
);
8254 server
.vm_stats_swapped_objects
++;
8255 server
.vm_stats_swapouts
++;
8259 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8262 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8263 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8264 redisLog(REDIS_WARNING
,
8265 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8269 o
= rdbLoadObject(type
,server
.vm_fp
);
8271 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8274 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8278 /* Load the value object relative to the 'key' object from swap to memory.
8279 * The newly allocated object is returned.
8281 * If preview is true the unserialized object is returned to the caller but
8282 * no changes are made to the key object, nor the pages are marked as freed */
8283 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8286 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8287 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8289 key
->storage
= REDIS_VM_MEMORY
;
8290 key
->vm
.atime
= server
.unixtime
;
8291 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8292 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8293 (unsigned char*) key
->ptr
);
8294 server
.vm_stats_swapped_objects
--;
8296 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8297 (unsigned char*) key
->ptr
);
8299 server
.vm_stats_swapins
++;
8303 /* Plain object loading, from swap to memory */
8304 static robj
*vmLoadObject(robj
*key
) {
8305 /* If we are loading the object in background, stop it, we
8306 * need to load this object synchronously ASAP. */
8307 if (key
->storage
== REDIS_VM_LOADING
)
8308 vmCancelThreadedIOJob(key
);
8309 return vmGenericLoadObject(key
,0);
8312 /* Just load the value on disk, without to modify the key.
8313 * This is useful when we want to perform some operation on the value
8314 * without to really bring it from swap to memory, like while saving the
8315 * dataset or rewriting the append only log. */
8316 static robj
*vmPreviewObject(robj
*key
) {
8317 return vmGenericLoadObject(key
,1);
8320 /* How a good candidate is this object for swapping?
8321 * The better candidate it is, the greater the returned value.
8323 * Currently we try to perform a fast estimation of the object size in
8324 * memory, and combine it with aging informations.
8326 * Basically swappability = idle-time * log(estimated size)
8328 * Bigger objects are preferred over smaller objects, but not
8329 * proportionally, this is why we use the logarithm. This algorithm is
8330 * just a first try and will probably be tuned later. */
8331 static double computeObjectSwappability(robj
*o
) {
8332 time_t age
= server
.unixtime
- o
->vm
.atime
;
8336 struct dictEntry
*de
;
8339 if (age
<= 0) return 0;
8342 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8345 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8350 listNode
*ln
= listFirst(l
);
8352 asize
= sizeof(list
);
8354 robj
*ele
= ln
->value
;
8357 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8358 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8360 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8365 z
= (o
->type
== REDIS_ZSET
);
8366 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8368 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8369 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8374 de
= dictGetRandomKey(d
);
8375 ele
= dictGetEntryKey(de
);
8376 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8377 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8379 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8380 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8384 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8385 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8386 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8387 unsigned int klen
, vlen
;
8388 unsigned char *key
, *val
;
8390 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8394 asize
= len
*(klen
+vlen
+3);
8395 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8397 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8402 de
= dictGetRandomKey(d
);
8403 ele
= dictGetEntryKey(de
);
8404 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8405 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8407 ele
= dictGetEntryVal(de
);
8408 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8409 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8411 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8416 return (double)age
*log(1+asize
);
8419 /* Try to swap an object that's a good candidate for swapping.
8420 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8421 * to swap any object at all.
8423 * If 'usethreaded' is true, Redis will try to swap the object in background
8424 * using I/O threads. */
8425 static int vmSwapOneObject(int usethreads
) {
8427 struct dictEntry
*best
= NULL
;
8428 double best_swappability
= 0;
8429 redisDb
*best_db
= NULL
;
8432 for (j
= 0; j
< server
.dbnum
; j
++) {
8433 redisDb
*db
= server
.db
+j
;
8434 /* Why maxtries is set to 100?
8435 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8436 * are swappable objects */
8439 if (dictSize(db
->dict
) == 0) continue;
8440 for (i
= 0; i
< 5; i
++) {
8442 double swappability
;
8444 if (maxtries
) maxtries
--;
8445 de
= dictGetRandomKey(db
->dict
);
8446 key
= dictGetEntryKey(de
);
8447 val
= dictGetEntryVal(de
);
8448 /* Only swap objects that are currently in memory.
8450 * Also don't swap shared objects if threaded VM is on, as we
8451 * try to ensure that the main thread does not touch the
8452 * object while the I/O thread is using it, but we can't
8453 * control other keys without adding additional mutex. */
8454 if (key
->storage
!= REDIS_VM_MEMORY
||
8455 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8456 if (maxtries
) i
--; /* don't count this try */
8459 swappability
= computeObjectSwappability(val
);
8460 if (!best
|| swappability
> best_swappability
) {
8462 best_swappability
= swappability
;
8467 if (best
== NULL
) return REDIS_ERR
;
8468 key
= dictGetEntryKey(best
);
8469 val
= dictGetEntryVal(best
);
8471 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8472 key
->ptr
, best_swappability
);
8474 /* Unshare the key if needed */
8475 if (key
->refcount
> 1) {
8476 robj
*newkey
= dupStringObject(key
);
8478 key
= dictGetEntryKey(best
) = newkey
;
8482 vmSwapObjectThreaded(key
,val
,best_db
);
8485 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8486 dictGetEntryVal(best
) = NULL
;
8494 static int vmSwapOneObjectBlocking() {
8495 return vmSwapOneObject(0);
8498 static int vmSwapOneObjectThreaded() {
8499 return vmSwapOneObject(1);
8502 /* Return true if it's safe to swap out objects in a given moment.
8503 * Basically we don't want to swap objects out while there is a BGSAVE
8504 * or a BGAEOREWRITE running in backgroud. */
8505 static int vmCanSwapOut(void) {
8506 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8509 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8510 * and was deleted. Otherwise 0 is returned. */
8511 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8515 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8516 foundkey
= dictGetEntryKey(de
);
8517 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8522 /* =================== Virtual Memory - Threaded I/O ======================= */
8524 static void freeIOJob(iojob
*j
) {
8525 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8526 j
->type
== REDIS_IOJOB_DO_SWAP
||
8527 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8528 decrRefCount(j
->val
);
8529 decrRefCount(j
->key
);
8533 /* Every time a thread finished a Job, it writes a byte into the write side
8534 * of an unix pipe in order to "awake" the main thread, and this function
8536 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8540 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8542 REDIS_NOTUSED(mask
);
8543 REDIS_NOTUSED(privdata
);
8545 /* For every byte we read in the read side of the pipe, there is one
8546 * I/O job completed to process. */
8547 while((retval
= read(fd
,buf
,1)) == 1) {
8551 struct dictEntry
*de
;
8553 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8555 /* Get the processed element (the oldest one) */
8557 assert(listLength(server
.io_processed
) != 0);
8558 if (toprocess
== -1) {
8559 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8560 if (toprocess
<= 0) toprocess
= 1;
8562 ln
= listFirst(server
.io_processed
);
8564 listDelNode(server
.io_processed
,ln
);
8566 /* If this job is marked as canceled, just ignore it */
8571 /* Post process it in the main thread, as there are things we
8572 * can do just here to avoid race conditions and/or invasive locks */
8573 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8574 de
= dictFind(j
->db
->dict
,j
->key
);
8576 key
= dictGetEntryKey(de
);
8577 if (j
->type
== REDIS_IOJOB_LOAD
) {
8580 /* Key loaded, bring it at home */
8581 key
->storage
= REDIS_VM_MEMORY
;
8582 key
->vm
.atime
= server
.unixtime
;
8583 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8584 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8585 (unsigned char*) key
->ptr
);
8586 server
.vm_stats_swapped_objects
--;
8587 server
.vm_stats_swapins
++;
8588 dictGetEntryVal(de
) = j
->val
;
8589 incrRefCount(j
->val
);
8592 /* Handle clients waiting for this key to be loaded. */
8593 handleClientsBlockedOnSwappedKey(db
,key
);
8594 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8595 /* Now we know the amount of pages required to swap this object.
8596 * Let's find some space for it, and queue this task again
8597 * rebranded as REDIS_IOJOB_DO_SWAP. */
8598 if (!vmCanSwapOut() ||
8599 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8601 /* Ooops... no space or we can't swap as there is
8602 * a fork()ed Redis trying to save stuff on disk. */
8604 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8606 /* Note that we need to mark this pages as used now,
8607 * if the job will be canceled, we'll mark them as freed
8609 vmMarkPagesUsed(j
->page
,j
->pages
);
8610 j
->type
= REDIS_IOJOB_DO_SWAP
;
8615 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8618 /* Key swapped. We can finally free some memory. */
8619 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8620 printf("key->storage: %d\n",key
->storage
);
8621 printf("key->name: %s\n",(char*)key
->ptr
);
8622 printf("key->refcount: %d\n",key
->refcount
);
8623 printf("val: %p\n",(void*)j
->val
);
8624 printf("val->type: %d\n",j
->val
->type
);
8625 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8627 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8628 val
= dictGetEntryVal(de
);
8629 key
->vm
.page
= j
->page
;
8630 key
->vm
.usedpages
= j
->pages
;
8631 key
->storage
= REDIS_VM_SWAPPED
;
8632 key
->vtype
= j
->val
->type
;
8633 decrRefCount(val
); /* Deallocate the object from memory. */
8634 dictGetEntryVal(de
) = NULL
;
8635 redisLog(REDIS_DEBUG
,
8636 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8637 (unsigned char*) key
->ptr
,
8638 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8639 server
.vm_stats_swapped_objects
++;
8640 server
.vm_stats_swapouts
++;
8642 /* Put a few more swap requests in queue if we are still
8644 if (trytoswap
&& vmCanSwapOut() &&
8645 zmalloc_used_memory() > server
.vm_max_memory
)
8650 more
= listLength(server
.io_newjobs
) <
8651 (unsigned) server
.vm_max_threads
;
8653 /* Don't waste CPU time if swappable objects are rare. */
8654 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8662 if (processed
== toprocess
) return;
8664 if (retval
< 0 && errno
!= EAGAIN
) {
8665 redisLog(REDIS_WARNING
,
8666 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8671 static void lockThreadedIO(void) {
8672 pthread_mutex_lock(&server
.io_mutex
);
8675 static void unlockThreadedIO(void) {
8676 pthread_mutex_unlock(&server
.io_mutex
);
8679 /* Remove the specified object from the threaded I/O queue if still not
8680 * processed, otherwise make sure to flag it as canceled. */
8681 static void vmCancelThreadedIOJob(robj
*o
) {
8683 server
.io_newjobs
, /* 0 */
8684 server
.io_processing
, /* 1 */
8685 server
.io_processed
/* 2 */
8689 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8692 /* Search for a matching key in one of the queues */
8693 for (i
= 0; i
< 3; i
++) {
8697 listRewind(lists
[i
],&li
);
8698 while ((ln
= listNext(&li
)) != NULL
) {
8699 iojob
*job
= ln
->value
;
8701 if (job
->canceled
) continue; /* Skip this, already canceled. */
8702 if (compareStringObjects(job
->key
,o
) == 0) {
8703 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8704 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8705 /* Mark the pages as free since the swap didn't happened
8706 * or happened but is now discarded. */
8707 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8708 vmMarkPagesFree(job
->page
,job
->pages
);
8709 /* Cancel the job. It depends on the list the job is
8712 case 0: /* io_newjobs */
8713 /* If the job was yet not processed the best thing to do
8714 * is to remove it from the queue at all */
8716 listDelNode(lists
[i
],ln
);
8718 case 1: /* io_processing */
8719 /* Oh Shi- the thread is messing with the Job:
8721 * Probably it's accessing the object if this is a
8722 * PREPARE_SWAP or DO_SWAP job.
8723 * If it's a LOAD job it may be reading from disk and
8724 * if we don't wait for the job to terminate before to
8725 * cancel it, maybe in a few microseconds data can be
8726 * corrupted in this pages. So the short story is:
8728 * Better to wait for the job to move into the
8729 * next queue (processed)... */
8731 /* We try again and again until the job is completed. */
8733 /* But let's wait some time for the I/O thread
8734 * to finish with this job. After all this condition
8735 * should be very rare. */
8738 case 2: /* io_processed */
8739 /* The job was already processed, that's easy...
8740 * just mark it as canceled so that we'll ignore it
8741 * when processing completed jobs. */
8745 /* Finally we have to adjust the storage type of the object
8746 * in order to "UNDO" the operaiton. */
8747 if (o
->storage
== REDIS_VM_LOADING
)
8748 o
->storage
= REDIS_VM_SWAPPED
;
8749 else if (o
->storage
== REDIS_VM_SWAPPING
)
8750 o
->storage
= REDIS_VM_MEMORY
;
8757 assert(1 != 1); /* We should never reach this */
8760 static void *IOThreadEntryPoint(void *arg
) {
8765 pthread_detach(pthread_self());
8767 /* Get a new job to process */
8769 if (listLength(server
.io_newjobs
) == 0) {
8770 /* No new jobs in queue, exit. */
8771 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8772 (long) pthread_self());
8773 server
.io_active_threads
--;
8777 ln
= listFirst(server
.io_newjobs
);
8779 listDelNode(server
.io_newjobs
,ln
);
8780 /* Add the job in the processing queue */
8781 j
->thread
= pthread_self();
8782 listAddNodeTail(server
.io_processing
,j
);
8783 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8785 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8786 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8788 /* Process the Job */
8789 if (j
->type
== REDIS_IOJOB_LOAD
) {
8790 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8791 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8792 FILE *fp
= fopen("/dev/null","w+");
8793 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8795 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8796 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8800 /* Done: insert the job into the processed queue */
8801 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8802 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8804 listDelNode(server
.io_processing
,ln
);
8805 listAddNodeTail(server
.io_processed
,j
);
8808 /* Signal the main thread there is new stuff to process */
8809 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8811 return NULL
; /* never reached */
8814 static void spawnIOThread(void) {
8816 sigset_t mask
, omask
;
8820 sigaddset(&mask
,SIGCHLD
);
8821 sigaddset(&mask
,SIGHUP
);
8822 sigaddset(&mask
,SIGPIPE
);
8823 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8824 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8825 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8829 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8830 server
.io_active_threads
++;
8833 /* We need to wait for the last thread to exit before we are able to
8834 * fork() in order to BGSAVE or BGREWRITEAOF. */
8835 static void waitEmptyIOJobsQueue(void) {
8837 int io_processed_len
;
8840 if (listLength(server
.io_newjobs
) == 0 &&
8841 listLength(server
.io_processing
) == 0 &&
8842 server
.io_active_threads
== 0)
8847 /* While waiting for empty jobs queue condition we post-process some
8848 * finshed job, as I/O threads may be hanging trying to write against
8849 * the io_ready_pipe_write FD but there are so much pending jobs that
8851 io_processed_len
= listLength(server
.io_processed
);
8853 if (io_processed_len
) {
8854 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8855 usleep(1000); /* 1 millisecond */
8857 usleep(10000); /* 10 milliseconds */
8862 static void vmReopenSwapFile(void) {
8863 /* Note: we don't close the old one as we are in the child process
8864 * and don't want to mess at all with the original file object. */
8865 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8866 if (server
.vm_fp
== NULL
) {
8867 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8868 server
.vm_swap_file
);
8871 server
.vm_fd
= fileno(server
.vm_fp
);
8874 /* This function must be called while with threaded IO locked */
8875 static void queueIOJob(iojob
*j
) {
8876 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8877 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8878 listAddNodeTail(server
.io_newjobs
,j
);
8879 if (server
.io_active_threads
< server
.vm_max_threads
)
8883 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8886 assert(key
->storage
== REDIS_VM_MEMORY
);
8887 assert(key
->refcount
== 1);
8889 j
= zmalloc(sizeof(*j
));
8890 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8892 j
->key
= dupStringObject(key
);
8896 j
->thread
= (pthread_t
) -1;
8897 key
->storage
= REDIS_VM_SWAPPING
;
8905 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
8907 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
8908 * If there is not already a job loading the key, it is craeted.
8909 * The key is added to the io_keys list in the client structure, and also
8910 * in the hash table mapping swapped keys to waiting clients, that is,
8911 * server.io_waited_keys. */
8912 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
8913 struct dictEntry
*de
;
8917 /* If the key does not exist or is already in RAM we don't need to
8918 * block the client at all. */
8919 de
= dictFind(c
->db
->dict
,key
);
8920 if (de
== NULL
) return 0;
8921 o
= dictGetEntryKey(de
);
8922 if (o
->storage
== REDIS_VM_MEMORY
) {
8924 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
8925 /* We were swapping the key, undo it! */
8926 vmCancelThreadedIOJob(o
);
8930 /* OK: the key is either swapped, or being loaded just now. */
8932 /* Add the key to the list of keys this client is waiting for.
8933 * This maps clients to keys they are waiting for. */
8934 listAddNodeTail(c
->io_keys
,key
);
8937 /* Add the client to the swapped keys => clients waiting map. */
8938 de
= dictFind(c
->db
->io_keys
,key
);
8942 /* For every key we take a list of clients blocked for it */
8944 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
8946 assert(retval
== DICT_OK
);
8948 l
= dictGetEntryVal(de
);
8950 listAddNodeTail(l
,c
);
8952 /* Are we already loading the key from disk? If not create a job */
8953 if (o
->storage
== REDIS_VM_SWAPPED
) {
8956 o
->storage
= REDIS_VM_LOADING
;
8957 j
= zmalloc(sizeof(*j
));
8958 j
->type
= REDIS_IOJOB_LOAD
;
8960 j
->key
= dupStringObject(key
);
8961 j
->key
->vtype
= o
->vtype
;
8962 j
->page
= o
->vm
.page
;
8965 j
->thread
= (pthread_t
) -1;
8973 /* Preload keys needed for the ZUNION and ZINTER commands. */
8974 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
8976 num
= atoi(c
->argv
[2]->ptr
);
8977 for (i
= 0; i
< num
; i
++) {
8978 waitForSwappedKey(c
,c
->argv
[3+i
]);
8982 /* Is this client attempting to run a command against swapped keys?
8983 * If so, block it ASAP, load the keys in background, then resume it.
8985 * The important idea about this function is that it can fail! If keys will
8986 * still be swapped when the client is resumed, this key lookups will
8987 * just block loading keys from disk. In practical terms this should only
8988 * happen with SORT BY command or if there is a bug in this function.
8990 * Return 1 if the client is marked as blocked, 0 if the client can
8991 * continue as the keys it is going to access appear to be in memory. */
8992 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
8995 if (cmd
->vm_preload_proc
!= NULL
) {
8996 cmd
->vm_preload_proc(c
);
8998 if (cmd
->vm_firstkey
== 0) return 0;
8999 last
= cmd
->vm_lastkey
;
9000 if (last
< 0) last
= c
->argc
+last
;
9001 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9002 waitForSwappedKey(c
,c
->argv
[j
]);
9005 /* If the client was blocked for at least one key, mark it as blocked. */
9006 if (listLength(c
->io_keys
)) {
9007 c
->flags
|= REDIS_IO_WAIT
;
9008 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9009 server
.vm_blocked_clients
++;
9016 /* Remove the 'key' from the list of blocked keys for a given client.
9018 * The function returns 1 when there are no longer blocking keys after
9019 * the current one was removed (and the client can be unblocked). */
9020 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9024 struct dictEntry
*de
;
9026 /* Remove the key from the list of keys this client is waiting for. */
9027 listRewind(c
->io_keys
,&li
);
9028 while ((ln
= listNext(&li
)) != NULL
) {
9029 if (compareStringObjects(ln
->value
,key
) == 0) {
9030 listDelNode(c
->io_keys
,ln
);
9036 /* Remove the client form the key => waiting clients map. */
9037 de
= dictFind(c
->db
->io_keys
,key
);
9039 l
= dictGetEntryVal(de
);
9040 ln
= listSearchKey(l
,c
);
9043 if (listLength(l
) == 0)
9044 dictDelete(c
->db
->io_keys
,key
);
9046 return listLength(c
->io_keys
) == 0;
9049 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9050 struct dictEntry
*de
;
9055 de
= dictFind(db
->io_keys
,key
);
9058 l
= dictGetEntryVal(de
);
9059 len
= listLength(l
);
9060 /* Note: we can't use something like while(listLength(l)) as the list
9061 * can be freed by the calling function when we remove the last element. */
9064 redisClient
*c
= ln
->value
;
9066 if (dontWaitForSwappedKey(c
,key
)) {
9067 /* Put the client in the list of clients ready to go as we
9068 * loaded all the keys about it. */
9069 listAddNodeTail(server
.io_ready_clients
,c
);
9074 /* =========================== Remote Configuration ========================= */
9076 static void configSetCommand(redisClient
*c
) {
9077 robj
*o
= getDecodedObject(c
->argv
[3]);
9078 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9079 zfree(server
.dbfilename
);
9080 server
.dbfilename
= zstrdup(o
->ptr
);
9081 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9082 zfree(server
.requirepass
);
9083 server
.requirepass
= zstrdup(o
->ptr
);
9084 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9085 zfree(server
.masterauth
);
9086 server
.masterauth
= zstrdup(o
->ptr
);
9087 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9088 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9090 addReplySds(c
,sdscatprintf(sdsempty(),
9091 "-ERR not supported CONFIG parameter %s\r\n",
9092 (char*)c
->argv
[2]->ptr
));
9097 addReply(c
,shared
.ok
);
9100 static void configGetCommand(redisClient
*c
) {
9101 robj
*o
= getDecodedObject(c
->argv
[2]);
9102 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9103 char *pattern
= o
->ptr
;
9107 decrRefCount(lenobj
);
9109 if (stringmatch(pattern
,"dbfilename",0)) {
9110 addReplyBulkCString(c
,"dbfilename");
9111 addReplyBulkCString(c
,server
.dbfilename
);
9114 if (stringmatch(pattern
,"requirepass",0)) {
9115 addReplyBulkCString(c
,"requirepass");
9116 addReplyBulkCString(c
,server
.requirepass
);
9119 if (stringmatch(pattern
,"masterauth",0)) {
9120 addReplyBulkCString(c
,"masterauth");
9121 addReplyBulkCString(c
,server
.masterauth
);
9124 if (stringmatch(pattern
,"maxmemory",0)) {
9127 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9128 addReplyBulkCString(c
,"maxmemory");
9129 addReplyBulkCString(c
,buf
);
9133 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9136 static void configCommand(redisClient
*c
) {
9137 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9138 if (c
->argc
!= 4) goto badarity
;
9139 configSetCommand(c
);
9140 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9141 if (c
->argc
!= 3) goto badarity
;
9142 configGetCommand(c
);
9143 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9144 if (c
->argc
!= 2) goto badarity
;
9145 server
.stat_numcommands
= 0;
9146 server
.stat_numconnections
= 0;
9147 server
.stat_expiredkeys
= 0;
9148 server
.stat_starttime
= time(NULL
);
9149 addReply(c
,shared
.ok
);
9151 addReplySds(c
,sdscatprintf(sdsempty(),
9152 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9157 addReplySds(c
,sdscatprintf(sdsempty(),
9158 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9159 (char*) c
->argv
[1]->ptr
));
9162 /* ================================= Debugging ============================== */
9164 static void debugCommand(redisClient
*c
) {
9165 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9167 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9168 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9169 addReply(c
,shared
.err
);
9173 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9174 addReply(c
,shared
.err
);
9177 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9178 addReply(c
,shared
.ok
);
9179 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9181 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9182 addReply(c
,shared
.err
);
9185 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9186 addReply(c
,shared
.ok
);
9187 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9188 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9192 addReply(c
,shared
.nokeyerr
);
9195 key
= dictGetEntryKey(de
);
9196 val
= dictGetEntryVal(de
);
9197 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9198 key
->storage
== REDIS_VM_SWAPPING
)) {
9202 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9203 strenc
= strencoding
[val
->encoding
];
9205 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9208 addReplySds(c
,sdscatprintf(sdsempty(),
9209 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9210 "encoding:%s serializedlength:%lld\r\n",
9211 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9212 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9214 addReplySds(c
,sdscatprintf(sdsempty(),
9215 "+Key at:%p refcount:%d, value swapped at: page %llu "
9216 "using %llu pages\r\n",
9217 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9218 (unsigned long long) key
->vm
.usedpages
));
9220 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9221 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9224 if (!server
.vm_enabled
) {
9225 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9229 addReply(c
,shared
.nokeyerr
);
9232 key
= dictGetEntryKey(de
);
9233 val
= dictGetEntryVal(de
);
9234 /* If the key is shared we want to create a copy */
9235 if (key
->refcount
> 1) {
9236 robj
*newkey
= dupStringObject(key
);
9238 key
= dictGetEntryKey(de
) = newkey
;
9241 if (key
->storage
!= REDIS_VM_MEMORY
) {
9242 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9243 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9244 dictGetEntryVal(de
) = NULL
;
9245 addReply(c
,shared
.ok
);
9247 addReply(c
,shared
.err
);
9250 addReplySds(c
,sdsnew(
9251 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9255 static void _redisAssert(char *estr
, char *file
, int line
) {
9256 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9257 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9258 #ifdef HAVE_BACKTRACE
9259 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9264 /* =================================== Main! ================================ */
9267 int linuxOvercommitMemoryValue(void) {
9268 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9272 if (fgets(buf
,64,fp
) == NULL
) {
9281 void linuxOvercommitMemoryWarning(void) {
9282 if (linuxOvercommitMemoryValue() == 0) {
9283 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9286 #endif /* __linux__ */
9288 static void daemonize(void) {
9292 if (fork() != 0) exit(0); /* parent exits */
9293 setsid(); /* create a new session */
9295 /* Every output goes to /dev/null. If Redis is daemonized but
9296 * the 'logfile' is set to 'stdout' in the configuration file
9297 * it will not log at all. */
9298 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9299 dup2(fd
, STDIN_FILENO
);
9300 dup2(fd
, STDOUT_FILENO
);
9301 dup2(fd
, STDERR_FILENO
);
9302 if (fd
> STDERR_FILENO
) close(fd
);
9304 /* Try to write the pid file */
9305 fp
= fopen(server
.pidfile
,"w");
9307 fprintf(fp
,"%d\n",getpid());
9312 static void version() {
9313 printf("Redis server version %s\n", REDIS_VERSION
);
9317 static void usage() {
9318 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9319 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9323 int main(int argc
, char **argv
) {
9328 if (strcmp(argv
[1], "-v") == 0 ||
9329 strcmp(argv
[1], "--version") == 0) version();
9330 if (strcmp(argv
[1], "--help") == 0) usage();
9331 resetServerSaveParams();
9332 loadServerConfig(argv
[1]);
9333 } else if ((argc
> 2)) {
9336 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9338 if (server
.daemonize
) daemonize();
9340 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9342 linuxOvercommitMemoryWarning();
9345 if (server
.appendonly
) {
9346 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9347 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9349 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9350 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9352 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9353 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9355 aeDeleteEventLoop(server
.el
);
9359 /* ============================= Backtrace support ========================= */
9361 #ifdef HAVE_BACKTRACE
9362 static char *findFuncName(void *pointer
, unsigned long *offset
);
9364 static void *getMcontextEip(ucontext_t
*uc
) {
9365 #if defined(__FreeBSD__)
9366 return (void*) uc
->uc_mcontext
.mc_eip
;
9367 #elif defined(__dietlibc__)
9368 return (void*) uc
->uc_mcontext
.eip
;
9369 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9371 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9373 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9375 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9376 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9377 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9379 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9381 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9382 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9383 #elif defined(__ia64__) /* Linux IA64 */
9384 return (void*) uc
->uc_mcontext
.sc_ip
;
9390 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9392 char **messages
= NULL
;
9393 int i
, trace_size
= 0;
9394 unsigned long offset
=0;
9395 ucontext_t
*uc
= (ucontext_t
*) secret
;
9397 REDIS_NOTUSED(info
);
9399 redisLog(REDIS_WARNING
,
9400 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9401 infostring
= genRedisInfoString();
9402 redisLog(REDIS_WARNING
, "%s",infostring
);
9403 /* It's not safe to sdsfree() the returned string under memory
9404 * corruption conditions. Let it leak as we are going to abort */
9406 trace_size
= backtrace(trace
, 100);
9407 /* overwrite sigaction with caller's address */
9408 if (getMcontextEip(uc
) != NULL
) {
9409 trace
[1] = getMcontextEip(uc
);
9411 messages
= backtrace_symbols(trace
, trace_size
);
9413 for (i
=1; i
<trace_size
; ++i
) {
9414 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9416 p
= strchr(messages
[i
],'+');
9417 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9418 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9420 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9423 /* free(messages); Don't call free() with possibly corrupted memory. */
9427 static void setupSigSegvAction(void) {
9428 struct sigaction act
;
9430 sigemptyset (&act
.sa_mask
);
9431 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9432 * is used. Otherwise, sa_handler is used */
9433 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9434 act
.sa_sigaction
= segvHandler
;
9435 sigaction (SIGSEGV
, &act
, NULL
);
9436 sigaction (SIGBUS
, &act
, NULL
);
9437 sigaction (SIGFPE
, &act
, NULL
);
9438 sigaction (SIGILL
, &act
, NULL
);
9439 sigaction (SIGBUS
, &act
, NULL
);
9443 #include "staticsymbols.h"
9444 /* This function try to convert a pointer into a function name. It's used in
9445 * oreder to provide a backtrace under segmentation fault that's able to
9446 * display functions declared as static (otherwise the backtrace is useless). */
9447 static char *findFuncName(void *pointer
, unsigned long *offset
){
9449 unsigned long off
, minoff
= 0;
9451 /* Try to match against the Symbol with the smallest offset */
9452 for (i
=0; symsTable
[i
].pointer
; i
++) {
9453 unsigned long lp
= (unsigned long) pointer
;
9455 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9456 off
=lp
-symsTable
[i
].pointer
;
9457 if (ret
< 0 || off
< minoff
) {
9463 if (ret
== -1) return NULL
;
9465 return symsTable
[ret
].name
;
9467 #else /* HAVE_BACKTRACE */
9468 static void setupSigSegvAction(void) {
9470 #endif /* HAVE_BACKTRACE */