2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.7"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
337 /* Global server state structure */
342 dict
*sharingpool
; /* Poll used for object sharing */
343 unsigned int sharingpoolsize
;
344 long long dirty
; /* changes to DB from the last save */
346 list
*slaves
, *monitors
;
347 char neterr
[ANET_ERR_LEN
];
349 int cronloops
; /* number of times the cron function run */
350 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
351 time_t lastsave
; /* Unix time of last save succeeede */
352 /* Fields used only for stats */
353 time_t stat_starttime
; /* server start time */
354 long long stat_numcommands
; /* number of processed commands */
355 long long stat_numconnections
; /* number of connections received */
356 long long stat_expiredkeys
; /* number of expired keys */
369 pid_t bgsavechildpid
;
370 pid_t bgrewritechildpid
;
371 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
372 struct saveparam
*saveparams
;
377 char *appendfilename
;
381 /* Replication related */
386 redisClient
*master
; /* client that is master for this slave */
388 unsigned int maxclients
;
389 unsigned long long maxmemory
;
390 unsigned int blpop_blocked_clients
;
391 unsigned int vm_blocked_clients
;
392 /* Sort parameters - qsort_r() is only available under BSD so we
393 * have to take this state global, in order to pass it to sortCompare() */
397 /* Virtual memory configuration */
402 unsigned long long vm_max_memory
;
404 size_t hash_max_zipmap_entries
;
405 size_t hash_max_zipmap_value
;
406 /* Virtual memory state */
409 off_t vm_next_page
; /* Next probably empty page */
410 off_t vm_near_pages
; /* Number of pages allocated sequentially */
411 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
412 time_t unixtime
; /* Unix time sampled every second. */
413 /* Virtual memory I/O threads stuff */
414 /* An I/O thread process an element taken from the io_jobs queue and
415 * put the result of the operation in the io_done list. While the
416 * job is being processed, it's put on io_processing queue. */
417 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
418 list
*io_processing
; /* List of VM I/O jobs being processed */
419 list
*io_processed
; /* List of VM I/O jobs already processed */
420 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
421 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
422 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
423 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
424 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
425 int io_active_threads
; /* Number of running I/O threads */
426 int vm_max_threads
; /* Max number of I/O threads running at the same time */
427 /* Our main thread is blocked on the event loop, locking for sockets ready
428 * to be read or written, so when a threaded I/O operation is ready to be
429 * processed by the main thread, the I/O thread will use a unix pipe to
430 * awake the main thread. The followings are the two pipe FDs. */
431 int io_ready_pipe_read
;
432 int io_ready_pipe_write
;
433 /* Virtual memory stats */
434 unsigned long long vm_stats_used_pages
;
435 unsigned long long vm_stats_swapped_objects
;
436 unsigned long long vm_stats_swapouts
;
437 unsigned long long vm_stats_swapins
;
441 typedef void redisCommandProc(redisClient
*c
);
442 struct redisCommand
{
444 redisCommandProc
*proc
;
447 /* Use a function to determine which keys need to be loaded
448 * in the background prior to executing this command. Takes precedence
449 * over vm_firstkey and others, ignored when NULL */
450 redisCommandProc
*vm_preload_proc
;
451 /* What keys should be loaded in background when calling this command? */
452 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
453 int vm_lastkey
; /* THe last argument that's a key */
454 int vm_keystep
; /* The step between first and last key */
457 struct redisFunctionSym
{
459 unsigned long pointer
;
462 typedef struct _redisSortObject
{
470 typedef struct _redisSortOperation
{
473 } redisSortOperation
;
475 /* ZSETs use a specialized version of Skiplists */
477 typedef struct zskiplistNode
{
478 struct zskiplistNode
**forward
;
479 struct zskiplistNode
*backward
;
485 typedef struct zskiplist
{
486 struct zskiplistNode
*header
, *tail
;
487 unsigned long length
;
491 typedef struct zset
{
496 /* Our shared "common" objects */
498 struct sharedObjectsStruct
{
499 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
500 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
501 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
502 *outofrangeerr
, *plus
,
503 *select0
, *select1
, *select2
, *select3
, *select4
,
504 *select5
, *select6
, *select7
, *select8
, *select9
;
507 /* Global vars that are actally used as constants. The following double
508 * values are used for double on-disk serialization, and are initialized
509 * at runtime to avoid strange compiler optimizations. */
511 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
513 /* VM threaded I/O request message */
514 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
515 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
516 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
517 typedef struct iojob
{
518 int type
; /* Request type, REDIS_IOJOB_* */
519 redisDb
*db
;/* Redis database */
520 robj
*key
; /* This I/O request is about swapping this key */
521 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
522 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
523 off_t page
; /* Swap page where to read/write the object */
524 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
525 int canceled
; /* True if this command was canceled by blocking side of VM */
526 pthread_t thread
; /* ID of the thread processing this entry */
529 /*================================ Prototypes =============================== */
531 static void freeStringObject(robj
*o
);
532 static void freeListObject(robj
*o
);
533 static void freeSetObject(robj
*o
);
534 static void decrRefCount(void *o
);
535 static robj
*createObject(int type
, void *ptr
);
536 static void freeClient(redisClient
*c
);
537 static int rdbLoad(char *filename
);
538 static void addReply(redisClient
*c
, robj
*obj
);
539 static void addReplySds(redisClient
*c
, sds s
);
540 static void incrRefCount(robj
*o
);
541 static int rdbSaveBackground(char *filename
);
542 static robj
*createStringObject(char *ptr
, size_t len
);
543 static robj
*dupStringObject(robj
*o
);
544 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
545 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
546 static int syncWithMaster(void);
547 static robj
*tryObjectSharing(robj
*o
);
548 static int tryObjectEncoding(robj
*o
);
549 static robj
*getDecodedObject(robj
*o
);
550 static int removeExpire(redisDb
*db
, robj
*key
);
551 static int expireIfNeeded(redisDb
*db
, robj
*key
);
552 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
553 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
554 static int deleteKey(redisDb
*db
, robj
*key
);
555 static time_t getExpire(redisDb
*db
, robj
*key
);
556 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
557 static void updateSlavesWaitingBgsave(int bgsaveerr
);
558 static void freeMemoryIfNeeded(void);
559 static int processCommand(redisClient
*c
);
560 static void setupSigSegvAction(void);
561 static void rdbRemoveTempFile(pid_t childpid
);
562 static void aofRemoveTempFile(pid_t childpid
);
563 static size_t stringObjectLen(robj
*o
);
564 static void processInputBuffer(redisClient
*c
);
565 static zskiplist
*zslCreate(void);
566 static void zslFree(zskiplist
*zsl
);
567 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
568 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
569 static void initClientMultiState(redisClient
*c
);
570 static void freeClientMultiState(redisClient
*c
);
571 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
572 static void unblockClientWaitingData(redisClient
*c
);
573 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
574 static void vmInit(void);
575 static void vmMarkPagesFree(off_t page
, off_t count
);
576 static robj
*vmLoadObject(robj
*key
);
577 static robj
*vmPreviewObject(robj
*key
);
578 static int vmSwapOneObjectBlocking(void);
579 static int vmSwapOneObjectThreaded(void);
580 static int vmCanSwapOut(void);
581 static int tryFreeOneObjectFromFreelist(void);
582 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
583 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
584 static void vmCancelThreadedIOJob(robj
*o
);
585 static void lockThreadedIO(void);
586 static void unlockThreadedIO(void);
587 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
588 static void freeIOJob(iojob
*j
);
589 static void queueIOJob(iojob
*j
);
590 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
591 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
592 static void waitEmptyIOJobsQueue(void);
593 static void vmReopenSwapFile(void);
594 static int vmFreePage(off_t page
);
595 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
596 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
597 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
598 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
599 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static struct redisCommand
*lookupCommand(char *name
);
601 static void call(redisClient
*c
, struct redisCommand
*cmd
);
602 static void resetClient(redisClient
*c
);
603 static void convertToRealHash(robj
*o
);
605 static void authCommand(redisClient
*c
);
606 static void pingCommand(redisClient
*c
);
607 static void echoCommand(redisClient
*c
);
608 static void setCommand(redisClient
*c
);
609 static void setnxCommand(redisClient
*c
);
610 static void getCommand(redisClient
*c
);
611 static void delCommand(redisClient
*c
);
612 static void existsCommand(redisClient
*c
);
613 static void incrCommand(redisClient
*c
);
614 static void decrCommand(redisClient
*c
);
615 static void incrbyCommand(redisClient
*c
);
616 static void decrbyCommand(redisClient
*c
);
617 static void selectCommand(redisClient
*c
);
618 static void randomkeyCommand(redisClient
*c
);
619 static void keysCommand(redisClient
*c
);
620 static void dbsizeCommand(redisClient
*c
);
621 static void lastsaveCommand(redisClient
*c
);
622 static void saveCommand(redisClient
*c
);
623 static void bgsaveCommand(redisClient
*c
);
624 static void bgrewriteaofCommand(redisClient
*c
);
625 static void shutdownCommand(redisClient
*c
);
626 static void moveCommand(redisClient
*c
);
627 static void renameCommand(redisClient
*c
);
628 static void renamenxCommand(redisClient
*c
);
629 static void lpushCommand(redisClient
*c
);
630 static void rpushCommand(redisClient
*c
);
631 static void lpopCommand(redisClient
*c
);
632 static void rpopCommand(redisClient
*c
);
633 static void llenCommand(redisClient
*c
);
634 static void lindexCommand(redisClient
*c
);
635 static void lrangeCommand(redisClient
*c
);
636 static void ltrimCommand(redisClient
*c
);
637 static void typeCommand(redisClient
*c
);
638 static void lsetCommand(redisClient
*c
);
639 static void saddCommand(redisClient
*c
);
640 static void sremCommand(redisClient
*c
);
641 static void smoveCommand(redisClient
*c
);
642 static void sismemberCommand(redisClient
*c
);
643 static void scardCommand(redisClient
*c
);
644 static void spopCommand(redisClient
*c
);
645 static void srandmemberCommand(redisClient
*c
);
646 static void sinterCommand(redisClient
*c
);
647 static void sinterstoreCommand(redisClient
*c
);
648 static void sunionCommand(redisClient
*c
);
649 static void sunionstoreCommand(redisClient
*c
);
650 static void sdiffCommand(redisClient
*c
);
651 static void sdiffstoreCommand(redisClient
*c
);
652 static void syncCommand(redisClient
*c
);
653 static void flushdbCommand(redisClient
*c
);
654 static void flushallCommand(redisClient
*c
);
655 static void sortCommand(redisClient
*c
);
656 static void lremCommand(redisClient
*c
);
657 static void rpoplpushcommand(redisClient
*c
);
658 static void infoCommand(redisClient
*c
);
659 static void mgetCommand(redisClient
*c
);
660 static void monitorCommand(redisClient
*c
);
661 static void expireCommand(redisClient
*c
);
662 static void expireatCommand(redisClient
*c
);
663 static void getsetCommand(redisClient
*c
);
664 static void ttlCommand(redisClient
*c
);
665 static void slaveofCommand(redisClient
*c
);
666 static void debugCommand(redisClient
*c
);
667 static void msetCommand(redisClient
*c
);
668 static void msetnxCommand(redisClient
*c
);
669 static void zaddCommand(redisClient
*c
);
670 static void zincrbyCommand(redisClient
*c
);
671 static void zrangeCommand(redisClient
*c
);
672 static void zrangebyscoreCommand(redisClient
*c
);
673 static void zcountCommand(redisClient
*c
);
674 static void zrevrangeCommand(redisClient
*c
);
675 static void zcardCommand(redisClient
*c
);
676 static void zremCommand(redisClient
*c
);
677 static void zscoreCommand(redisClient
*c
);
678 static void zremrangebyscoreCommand(redisClient
*c
);
679 static void multiCommand(redisClient
*c
);
680 static void execCommand(redisClient
*c
);
681 static void discardCommand(redisClient
*c
);
682 static void blpopCommand(redisClient
*c
);
683 static void brpopCommand(redisClient
*c
);
684 static void appendCommand(redisClient
*c
);
685 static void substrCommand(redisClient
*c
);
686 static void zrankCommand(redisClient
*c
);
687 static void zrevrankCommand(redisClient
*c
);
688 static void hsetCommand(redisClient
*c
);
689 static void hgetCommand(redisClient
*c
);
690 static void hdelCommand(redisClient
*c
);
691 static void hlenCommand(redisClient
*c
);
692 static void zremrangebyrankCommand(redisClient
*c
);
693 static void zunionCommand(redisClient
*c
);
694 static void zinterCommand(redisClient
*c
);
695 static void hkeysCommand(redisClient
*c
);
696 static void hvalsCommand(redisClient
*c
);
697 static void hgetallCommand(redisClient
*c
);
698 static void hexistsCommand(redisClient
*c
);
699 static void configCommand(redisClient
*c
);
701 /*================================= Globals ================================= */
704 static struct redisServer server
; /* server global state */
705 static struct redisCommand cmdTable
[] = {
706 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
707 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
708 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
709 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
710 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
711 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
712 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
713 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
714 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
715 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
716 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
717 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
718 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
719 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
720 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
721 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
723 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
725 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
726 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
727 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
728 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
729 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
730 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
731 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
732 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
733 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
737 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
738 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
739 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
740 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
741 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
742 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
745 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
746 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
749 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
750 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
758 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
759 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
760 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
761 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
766 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
768 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
770 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
771 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
772 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
773 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
777 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
778 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
779 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
780 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
781 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
782 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
783 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
785 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
786 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
787 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
788 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
791 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
795 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
802 {NULL
,NULL
,0,0,NULL
,0,0,0}
807 /*============================ Utility functions ============================ */
809 /* Glob-style pattern matching. */
810 static int stringmatchlen(const char *pattern
, int patternLen
,
811 const char *string
, int stringLen
, int nocase
)
816 while (pattern
[1] == '*') {
821 return 1; /* match */
823 if (stringmatchlen(pattern
+1, patternLen
-1,
824 string
, stringLen
, nocase
))
825 return 1; /* match */
829 return 0; /* no match */
833 return 0; /* no match */
843 not = pattern
[0] == '^';
850 if (pattern
[0] == '\\') {
853 if (pattern
[0] == string
[0])
855 } else if (pattern
[0] == ']') {
857 } else if (patternLen
== 0) {
861 } else if (pattern
[1] == '-' && patternLen
>= 3) {
862 int start
= pattern
[0];
863 int end
= pattern
[2];
871 start
= tolower(start
);
877 if (c
>= start
&& c
<= end
)
881 if (pattern
[0] == string
[0])
884 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
894 return 0; /* no match */
900 if (patternLen
>= 2) {
907 if (pattern
[0] != string
[0])
908 return 0; /* no match */
910 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
911 return 0; /* no match */
919 if (stringLen
== 0) {
920 while(*pattern
== '*') {
927 if (patternLen
== 0 && stringLen
== 0)
932 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
933 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
936 static void redisLog(int level
, const char *fmt
, ...) {
940 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
944 if (level
>= server
.verbosity
) {
950 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
951 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
952 vfprintf(fp
, fmt
, ap
);
958 if (server
.logfile
) fclose(fp
);
961 /*====================== Hash table type implementation ==================== */
963 /* This is an hash table type that uses the SDS dynamic strings libary as
964 * keys and radis objects as values (objects can hold SDS strings,
967 static void dictVanillaFree(void *privdata
, void *val
)
969 DICT_NOTUSED(privdata
);
973 static void dictListDestructor(void *privdata
, void *val
)
975 DICT_NOTUSED(privdata
);
976 listRelease((list
*)val
);
979 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
983 DICT_NOTUSED(privdata
);
985 l1
= sdslen((sds
)key1
);
986 l2
= sdslen((sds
)key2
);
987 if (l1
!= l2
) return 0;
988 return memcmp(key1
, key2
, l1
) == 0;
991 static void dictRedisObjectDestructor(void *privdata
, void *val
)
993 DICT_NOTUSED(privdata
);
995 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
999 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1002 const robj
*o1
= key1
, *o2
= key2
;
1003 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1006 static unsigned int dictObjHash(const void *key
) {
1007 const robj
*o
= key
;
1008 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1011 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1014 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1017 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1018 o2
->encoding
== REDIS_ENCODING_INT
&&
1019 o1
->ptr
== o2
->ptr
) return 1;
1021 o1
= getDecodedObject(o1
);
1022 o2
= getDecodedObject(o2
);
1023 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1029 static unsigned int dictEncObjHash(const void *key
) {
1030 robj
*o
= (robj
*) key
;
1032 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1033 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1035 if (o
->encoding
== REDIS_ENCODING_INT
) {
1039 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1040 return dictGenHashFunction((unsigned char*)buf
, len
);
1044 o
= getDecodedObject(o
);
1045 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1052 /* Sets type and expires */
1053 static dictType setDictType
= {
1054 dictEncObjHash
, /* hash function */
1057 dictEncObjKeyCompare
, /* key compare */
1058 dictRedisObjectDestructor
, /* key destructor */
1059 NULL
/* val destructor */
1062 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1063 static dictType zsetDictType
= {
1064 dictEncObjHash
, /* hash function */
1067 dictEncObjKeyCompare
, /* key compare */
1068 dictRedisObjectDestructor
, /* key destructor */
1069 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1073 static dictType dbDictType
= {
1074 dictObjHash
, /* hash function */
1077 dictObjKeyCompare
, /* key compare */
1078 dictRedisObjectDestructor
, /* key destructor */
1079 dictRedisObjectDestructor
/* val destructor */
1083 static dictType keyptrDictType
= {
1084 dictObjHash
, /* hash function */
1087 dictObjKeyCompare
, /* key compare */
1088 dictRedisObjectDestructor
, /* key destructor */
1089 NULL
/* val destructor */
1092 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1093 static dictType hashDictType
= {
1094 dictEncObjHash
, /* hash function */
1097 dictEncObjKeyCompare
, /* key compare */
1098 dictRedisObjectDestructor
, /* key destructor */
1099 dictRedisObjectDestructor
/* val destructor */
1102 /* Keylist hash table type has unencoded redis objects as keys and
1103 * lists as values. It's used for blocking operations (BLPOP) and to
1104 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1105 static dictType keylistDictType
= {
1106 dictObjHash
, /* hash function */
1109 dictObjKeyCompare
, /* key compare */
1110 dictRedisObjectDestructor
, /* key destructor */
1111 dictListDestructor
/* val destructor */
1114 static void version();
1116 /* ========================= Random utility functions ======================= */
1118 /* Redis generally does not try to recover from out of memory conditions
1119 * when allocating objects or strings, it is not clear if it will be possible
1120 * to report this condition to the client since the networking layer itself
1121 * is based on heap allocation for send buffers, so we simply abort.
1122 * At least the code will be simpler to read... */
1123 static void oom(const char *msg
) {
1124 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1129 /* ====================== Redis server networking stuff ===================== */
1130 static void closeTimedoutClients(void) {
1133 time_t now
= time(NULL
);
1136 listRewind(server
.clients
,&li
);
1137 while ((ln
= listNext(&li
)) != NULL
) {
1138 c
= listNodeValue(ln
);
1139 if (server
.maxidletime
&&
1140 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1141 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1142 (now
- c
->lastinteraction
> server
.maxidletime
))
1144 redisLog(REDIS_VERBOSE
,"Closing idle client");
1146 } else if (c
->flags
& REDIS_BLOCKED
) {
1147 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1148 addReply(c
,shared
.nullmultibulk
);
1149 unblockClientWaitingData(c
);
1155 static int htNeedsResize(dict
*dict
) {
1156 long long size
, used
;
1158 size
= dictSlots(dict
);
1159 used
= dictSize(dict
);
1160 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1161 (used
*100/size
< REDIS_HT_MINFILL
));
1164 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1165 * we resize the hash table to save memory */
1166 static void tryResizeHashTables(void) {
1169 for (j
= 0; j
< server
.dbnum
; j
++) {
1170 if (htNeedsResize(server
.db
[j
].dict
)) {
1171 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1172 dictResize(server
.db
[j
].dict
);
1173 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1175 if (htNeedsResize(server
.db
[j
].expires
))
1176 dictResize(server
.db
[j
].expires
);
1180 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1181 void backgroundSaveDoneHandler(int statloc
) {
1182 int exitcode
= WEXITSTATUS(statloc
);
1183 int bysignal
= WIFSIGNALED(statloc
);
1185 if (!bysignal
&& exitcode
== 0) {
1186 redisLog(REDIS_NOTICE
,
1187 "Background saving terminated with success");
1189 server
.lastsave
= time(NULL
);
1190 } else if (!bysignal
&& exitcode
!= 0) {
1191 redisLog(REDIS_WARNING
, "Background saving error");
1193 redisLog(REDIS_WARNING
,
1194 "Background saving terminated by signal");
1195 rdbRemoveTempFile(server
.bgsavechildpid
);
1197 server
.bgsavechildpid
= -1;
1198 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1199 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1200 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1203 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1205 void backgroundRewriteDoneHandler(int statloc
) {
1206 int exitcode
= WEXITSTATUS(statloc
);
1207 int bysignal
= WIFSIGNALED(statloc
);
1209 if (!bysignal
&& exitcode
== 0) {
1213 redisLog(REDIS_NOTICE
,
1214 "Background append only file rewriting terminated with success");
1215 /* Now it's time to flush the differences accumulated by the parent */
1216 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1217 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1219 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1222 /* Flush our data... */
1223 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1224 (signed) sdslen(server
.bgrewritebuf
)) {
1225 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1229 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1230 /* Now our work is to rename the temp file into the stable file. And
1231 * switch the file descriptor used by the server for append only. */
1232 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1233 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1237 /* Mission completed... almost */
1238 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1239 if (server
.appendfd
!= -1) {
1240 /* If append only is actually enabled... */
1241 close(server
.appendfd
);
1242 server
.appendfd
= fd
;
1244 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1245 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1247 /* If append only is disabled we just generate a dump in this
1248 * format. Why not? */
1251 } else if (!bysignal
&& exitcode
!= 0) {
1252 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1254 redisLog(REDIS_WARNING
,
1255 "Background append only file rewriting terminated by signal");
1258 sdsfree(server
.bgrewritebuf
);
1259 server
.bgrewritebuf
= sdsempty();
1260 aofRemoveTempFile(server
.bgrewritechildpid
);
1261 server
.bgrewritechildpid
= -1;
1264 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1265 int j
, loops
= server
.cronloops
++;
1266 REDIS_NOTUSED(eventLoop
);
1268 REDIS_NOTUSED(clientData
);
1270 /* We take a cached value of the unix time in the global state because
1271 * with virtual memory and aging there is to store the current time
1272 * in objects at every object access, and accuracy is not needed.
1273 * To access a global var is faster than calling time(NULL) */
1274 server
.unixtime
= time(NULL
);
1276 /* Show some info about non-empty databases */
1277 for (j
= 0; j
< server
.dbnum
; j
++) {
1278 long long size
, used
, vkeys
;
1280 size
= dictSlots(server
.db
[j
].dict
);
1281 used
= dictSize(server
.db
[j
].dict
);
1282 vkeys
= dictSize(server
.db
[j
].expires
);
1283 if (!(loops
% 50) && (used
|| vkeys
)) {
1284 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1285 /* dictPrintStats(server.dict); */
1289 /* We don't want to resize the hash tables while a bacground saving
1290 * is in progress: the saving child is created using fork() that is
1291 * implemented with a copy-on-write semantic in most modern systems, so
1292 * if we resize the HT while there is the saving child at work actually
1293 * a lot of memory movements in the parent will cause a lot of pages
1295 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1297 /* Show information about connected clients */
1298 if (!(loops
% 50)) {
1299 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1300 listLength(server
.clients
)-listLength(server
.slaves
),
1301 listLength(server
.slaves
),
1302 zmalloc_used_memory(),
1303 dictSize(server
.sharingpool
));
1306 /* Close connections of timedout clients */
1307 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1308 closeTimedoutClients();
1310 /* Check if a background saving or AOF rewrite in progress terminated */
1311 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1315 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1316 if (pid
== server
.bgsavechildpid
) {
1317 backgroundSaveDoneHandler(statloc
);
1319 backgroundRewriteDoneHandler(statloc
);
1323 /* If there is not a background saving in progress check if
1324 * we have to save now */
1325 time_t now
= time(NULL
);
1326 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1327 struct saveparam
*sp
= server
.saveparams
+j
;
1329 if (server
.dirty
>= sp
->changes
&&
1330 now
-server
.lastsave
> sp
->seconds
) {
1331 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1332 sp
->changes
, sp
->seconds
);
1333 rdbSaveBackground(server
.dbfilename
);
1339 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1340 * will use few CPU cycles if there are few expiring keys, otherwise
1341 * it will get more aggressive to avoid that too much memory is used by
1342 * keys that can be removed from the keyspace. */
1343 for (j
= 0; j
< server
.dbnum
; j
++) {
1345 redisDb
*db
= server
.db
+j
;
1347 /* Continue to expire if at the end of the cycle more than 25%
1348 * of the keys were expired. */
1350 long num
= dictSize(db
->expires
);
1351 time_t now
= time(NULL
);
1354 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1355 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1360 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1361 t
= (time_t) dictGetEntryVal(de
);
1363 deleteKey(db
,dictGetEntryKey(de
));
1365 server
.stat_expiredkeys
++;
1368 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1371 /* Swap a few keys on disk if we are over the memory limit and VM
1372 * is enbled. Try to free objects from the free list first. */
1373 if (vmCanSwapOut()) {
1374 while (server
.vm_enabled
&& zmalloc_used_memory() >
1375 server
.vm_max_memory
)
1379 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1380 retval
= (server
.vm_max_threads
== 0) ?
1381 vmSwapOneObjectBlocking() :
1382 vmSwapOneObjectThreaded();
1383 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1384 zmalloc_used_memory() >
1385 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1387 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1389 /* Note that when using threade I/O we free just one object,
1390 * because anyway when the I/O thread in charge to swap this
1391 * object out will finish, the handler of completed jobs
1392 * will try to swap more objects if we are still out of memory. */
1393 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1397 /* Check if we should connect to a MASTER */
1398 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1399 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1400 if (syncWithMaster() == REDIS_OK
) {
1401 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1407 /* This function gets called every time Redis is entering the
1408 * main loop of the event driven library, that is, before to sleep
1409 * for ready file descriptors. */
1410 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1411 REDIS_NOTUSED(eventLoop
);
1413 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1417 listRewind(server
.io_ready_clients
,&li
);
1418 while((ln
= listNext(&li
))) {
1419 redisClient
*c
= ln
->value
;
1420 struct redisCommand
*cmd
;
1422 /* Resume the client. */
1423 listDelNode(server
.io_ready_clients
,ln
);
1424 c
->flags
&= (~REDIS_IO_WAIT
);
1425 server
.vm_blocked_clients
--;
1426 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1427 readQueryFromClient
, c
);
1428 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1429 assert(cmd
!= NULL
);
1432 /* There may be more data to process in the input buffer. */
1433 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1434 processInputBuffer(c
);
1439 static void createSharedObjects(void) {
1440 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1441 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1442 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1443 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1444 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1445 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1446 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1447 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1448 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1449 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1450 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1451 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1452 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1453 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1454 "-ERR no such key\r\n"));
1455 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1456 "-ERR syntax error\r\n"));
1457 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1458 "-ERR source and destination objects are the same\r\n"));
1459 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1460 "-ERR index out of range\r\n"));
1461 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1462 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1463 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1464 shared
.select0
= createStringObject("select 0\r\n",10);
1465 shared
.select1
= createStringObject("select 1\r\n",10);
1466 shared
.select2
= createStringObject("select 2\r\n",10);
1467 shared
.select3
= createStringObject("select 3\r\n",10);
1468 shared
.select4
= createStringObject("select 4\r\n",10);
1469 shared
.select5
= createStringObject("select 5\r\n",10);
1470 shared
.select6
= createStringObject("select 6\r\n",10);
1471 shared
.select7
= createStringObject("select 7\r\n",10);
1472 shared
.select8
= createStringObject("select 8\r\n",10);
1473 shared
.select9
= createStringObject("select 9\r\n",10);
1476 static void appendServerSaveParams(time_t seconds
, int changes
) {
1477 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1478 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1479 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1480 server
.saveparamslen
++;
1483 static void resetServerSaveParams() {
1484 zfree(server
.saveparams
);
1485 server
.saveparams
= NULL
;
1486 server
.saveparamslen
= 0;
1489 static void initServerConfig() {
1490 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1491 server
.port
= REDIS_SERVERPORT
;
1492 server
.verbosity
= REDIS_VERBOSE
;
1493 server
.maxidletime
= REDIS_MAXIDLETIME
;
1494 server
.saveparams
= NULL
;
1495 server
.logfile
= NULL
; /* NULL = log on standard output */
1496 server
.bindaddr
= NULL
;
1497 server
.glueoutputbuf
= 1;
1498 server
.daemonize
= 0;
1499 server
.appendonly
= 0;
1500 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1501 server
.lastfsync
= time(NULL
);
1502 server
.appendfd
= -1;
1503 server
.appendseldb
= -1; /* Make sure the first time will not match */
1504 server
.pidfile
= zstrdup("/var/run/redis.pid");
1505 server
.dbfilename
= zstrdup("dump.rdb");
1506 server
.appendfilename
= zstrdup("appendonly.aof");
1507 server
.requirepass
= NULL
;
1508 server
.shareobjects
= 0;
1509 server
.rdbcompression
= 1;
1510 server
.sharingpoolsize
= 1024;
1511 server
.maxclients
= 0;
1512 server
.blpop_blocked_clients
= 0;
1513 server
.maxmemory
= 0;
1514 server
.vm_enabled
= 0;
1515 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1516 server
.vm_page_size
= 256; /* 256 bytes per page */
1517 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1518 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1519 server
.vm_max_threads
= 4;
1520 server
.vm_blocked_clients
= 0;
1521 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1522 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1524 resetServerSaveParams();
1526 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1527 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1528 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1529 /* Replication related */
1531 server
.masterauth
= NULL
;
1532 server
.masterhost
= NULL
;
1533 server
.masterport
= 6379;
1534 server
.master
= NULL
;
1535 server
.replstate
= REDIS_REPL_NONE
;
1537 /* Double constants initialization */
1539 R_PosInf
= 1.0/R_Zero
;
1540 R_NegInf
= -1.0/R_Zero
;
1541 R_Nan
= R_Zero
/R_Zero
;
1544 static void initServer() {
1547 signal(SIGHUP
, SIG_IGN
);
1548 signal(SIGPIPE
, SIG_IGN
);
1549 setupSigSegvAction();
1551 server
.devnull
= fopen("/dev/null","w");
1552 if (server
.devnull
== NULL
) {
1553 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1556 server
.clients
= listCreate();
1557 server
.slaves
= listCreate();
1558 server
.monitors
= listCreate();
1559 server
.objfreelist
= listCreate();
1560 createSharedObjects();
1561 server
.el
= aeCreateEventLoop();
1562 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1563 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1564 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1565 if (server
.fd
== -1) {
1566 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1569 for (j
= 0; j
< server
.dbnum
; j
++) {
1570 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1571 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1572 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1573 if (server
.vm_enabled
)
1574 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1575 server
.db
[j
].id
= j
;
1577 server
.cronloops
= 0;
1578 server
.bgsavechildpid
= -1;
1579 server
.bgrewritechildpid
= -1;
1580 server
.bgrewritebuf
= sdsempty();
1581 server
.lastsave
= time(NULL
);
1583 server
.stat_numcommands
= 0;
1584 server
.stat_numconnections
= 0;
1585 server
.stat_expiredkeys
= 0;
1586 server
.stat_starttime
= time(NULL
);
1587 server
.unixtime
= time(NULL
);
1588 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1589 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1590 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1592 if (server
.appendonly
) {
1593 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1594 if (server
.appendfd
== -1) {
1595 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1601 if (server
.vm_enabled
) vmInit();
1604 /* Empty the whole database */
1605 static long long emptyDb() {
1607 long long removed
= 0;
1609 for (j
= 0; j
< server
.dbnum
; j
++) {
1610 removed
+= dictSize(server
.db
[j
].dict
);
1611 dictEmpty(server
.db
[j
].dict
);
1612 dictEmpty(server
.db
[j
].expires
);
1617 static int yesnotoi(char *s
) {
1618 if (!strcasecmp(s
,"yes")) return 1;
1619 else if (!strcasecmp(s
,"no")) return 0;
1623 /* I agree, this is a very rudimental way to load a configuration...
1624 will improve later if the config gets more complex */
1625 static void loadServerConfig(char *filename
) {
1627 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1630 char *errormsg
= "Fatal error, can't open config file '%s'";
1631 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1632 sprintf(errorbuf
, errormsg
, filename
);
1634 if (filename
[0] == '-' && filename
[1] == '\0')
1637 if ((fp
= fopen(filename
,"r")) == NULL
) {
1638 redisLog(REDIS_WARNING
, errorbuf
);
1643 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1649 line
= sdstrim(line
," \t\r\n");
1651 /* Skip comments and blank lines*/
1652 if (line
[0] == '#' || line
[0] == '\0') {
1657 /* Split into arguments */
1658 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1659 sdstolower(argv
[0]);
1661 /* Execute config directives */
1662 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1663 server
.maxidletime
= atoi(argv
[1]);
1664 if (server
.maxidletime
< 0) {
1665 err
= "Invalid timeout value"; goto loaderr
;
1667 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1668 server
.port
= atoi(argv
[1]);
1669 if (server
.port
< 1 || server
.port
> 65535) {
1670 err
= "Invalid port"; goto loaderr
;
1672 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1673 server
.bindaddr
= zstrdup(argv
[1]);
1674 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1675 int seconds
= atoi(argv
[1]);
1676 int changes
= atoi(argv
[2]);
1677 if (seconds
< 1 || changes
< 0) {
1678 err
= "Invalid save parameters"; goto loaderr
;
1680 appendServerSaveParams(seconds
,changes
);
1681 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1682 if (chdir(argv
[1]) == -1) {
1683 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1684 argv
[1], strerror(errno
));
1687 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1688 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1689 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1690 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1691 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1693 err
= "Invalid log level. Must be one of debug, notice, warning";
1696 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1699 server
.logfile
= zstrdup(argv
[1]);
1700 if (!strcasecmp(server
.logfile
,"stdout")) {
1701 zfree(server
.logfile
);
1702 server
.logfile
= NULL
;
1704 if (server
.logfile
) {
1705 /* Test if we are able to open the file. The server will not
1706 * be able to abort just for this problem later... */
1707 logfp
= fopen(server
.logfile
,"a");
1708 if (logfp
== NULL
) {
1709 err
= sdscatprintf(sdsempty(),
1710 "Can't open the log file: %s", strerror(errno
));
1715 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1716 server
.dbnum
= atoi(argv
[1]);
1717 if (server
.dbnum
< 1) {
1718 err
= "Invalid number of databases"; goto loaderr
;
1720 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1721 loadServerConfig(argv
[1]);
1722 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1723 server
.maxclients
= atoi(argv
[1]);
1724 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1725 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1726 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1727 server
.masterhost
= sdsnew(argv
[1]);
1728 server
.masterport
= atoi(argv
[2]);
1729 server
.replstate
= REDIS_REPL_CONNECT
;
1730 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1731 server
.masterauth
= zstrdup(argv
[1]);
1732 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1733 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1734 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1736 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1737 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1738 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1740 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1741 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1742 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1744 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1745 server
.sharingpoolsize
= atoi(argv
[1]);
1746 if (server
.sharingpoolsize
< 1) {
1747 err
= "invalid object sharing pool size"; goto loaderr
;
1749 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1750 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1751 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1753 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1754 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1755 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1757 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1758 if (!strcasecmp(argv
[1],"no")) {
1759 server
.appendfsync
= APPENDFSYNC_NO
;
1760 } else if (!strcasecmp(argv
[1],"always")) {
1761 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1762 } else if (!strcasecmp(argv
[1],"everysec")) {
1763 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1765 err
= "argument must be 'no', 'always' or 'everysec'";
1768 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1769 server
.requirepass
= zstrdup(argv
[1]);
1770 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1771 zfree(server
.pidfile
);
1772 server
.pidfile
= zstrdup(argv
[1]);
1773 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1774 zfree(server
.dbfilename
);
1775 server
.dbfilename
= zstrdup(argv
[1]);
1776 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1777 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1778 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1780 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1781 zfree(server
.vm_swap_file
);
1782 server
.vm_swap_file
= zstrdup(argv
[1]);
1783 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1784 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1785 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1786 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1787 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1788 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1789 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1790 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1791 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1792 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1793 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1794 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1795 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1796 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1798 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1800 for (j
= 0; j
< argc
; j
++)
1805 if (fp
!= stdin
) fclose(fp
);
1809 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1810 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1811 fprintf(stderr
, ">>> '%s'\n", line
);
1812 fprintf(stderr
, "%s\n", err
);
1816 static void freeClientArgv(redisClient
*c
) {
1819 for (j
= 0; j
< c
->argc
; j
++)
1820 decrRefCount(c
->argv
[j
]);
1821 for (j
= 0; j
< c
->mbargc
; j
++)
1822 decrRefCount(c
->mbargv
[j
]);
1827 static void freeClient(redisClient
*c
) {
1830 /* Note that if the client we are freeing is blocked into a blocking
1831 * call, we have to set querybuf to NULL *before* to call
1832 * unblockClientWaitingData() to avoid processInputBuffer() will get
1833 * called. Also it is important to remove the file events after
1834 * this, because this call adds the READABLE event. */
1835 sdsfree(c
->querybuf
);
1837 if (c
->flags
& REDIS_BLOCKED
)
1838 unblockClientWaitingData(c
);
1840 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1841 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1842 listRelease(c
->reply
);
1845 /* Remove from the list of clients */
1846 ln
= listSearchKey(server
.clients
,c
);
1847 redisAssert(ln
!= NULL
);
1848 listDelNode(server
.clients
,ln
);
1849 /* Remove from the list of clients waiting for swapped keys */
1850 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1851 ln
= listSearchKey(server
.io_ready_clients
,c
);
1853 listDelNode(server
.io_ready_clients
,ln
);
1854 server
.vm_blocked_clients
--;
1857 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1858 ln
= listFirst(c
->io_keys
);
1859 dontWaitForSwappedKey(c
,ln
->value
);
1861 listRelease(c
->io_keys
);
1863 if (c
->flags
& REDIS_SLAVE
) {
1864 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1866 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1867 ln
= listSearchKey(l
,c
);
1868 redisAssert(ln
!= NULL
);
1871 if (c
->flags
& REDIS_MASTER
) {
1872 server
.master
= NULL
;
1873 server
.replstate
= REDIS_REPL_CONNECT
;
1877 freeClientMultiState(c
);
1881 #define GLUEREPLY_UP_TO (1024)
1882 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1884 char buf
[GLUEREPLY_UP_TO
];
1889 listRewind(c
->reply
,&li
);
1890 while((ln
= listNext(&li
))) {
1894 objlen
= sdslen(o
->ptr
);
1895 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1896 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1898 listDelNode(c
->reply
,ln
);
1900 if (copylen
== 0) return;
1904 /* Now the output buffer is empty, add the new single element */
1905 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1906 listAddNodeHead(c
->reply
,o
);
1909 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1910 redisClient
*c
= privdata
;
1911 int nwritten
= 0, totwritten
= 0, objlen
;
1914 REDIS_NOTUSED(mask
);
1916 /* Use writev() if we have enough buffers to send */
1917 if (!server
.glueoutputbuf
&&
1918 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1919 !(c
->flags
& REDIS_MASTER
))
1921 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1925 while(listLength(c
->reply
)) {
1926 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1927 glueReplyBuffersIfNeeded(c
);
1929 o
= listNodeValue(listFirst(c
->reply
));
1930 objlen
= sdslen(o
->ptr
);
1933 listDelNode(c
->reply
,listFirst(c
->reply
));
1937 if (c
->flags
& REDIS_MASTER
) {
1938 /* Don't reply to a master */
1939 nwritten
= objlen
- c
->sentlen
;
1941 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1942 if (nwritten
<= 0) break;
1944 c
->sentlen
+= nwritten
;
1945 totwritten
+= nwritten
;
1946 /* If we fully sent the object on head go to the next one */
1947 if (c
->sentlen
== objlen
) {
1948 listDelNode(c
->reply
,listFirst(c
->reply
));
1951 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1952 * bytes, in a single threaded server it's a good idea to serve
1953 * other clients as well, even if a very large request comes from
1954 * super fast link that is always able to accept data (in real world
1955 * scenario think about 'KEYS *' against the loopback interfae) */
1956 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1958 if (nwritten
== -1) {
1959 if (errno
== EAGAIN
) {
1962 redisLog(REDIS_VERBOSE
,
1963 "Error writing to client: %s", strerror(errno
));
1968 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1969 if (listLength(c
->reply
) == 0) {
1971 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1975 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
1977 redisClient
*c
= privdata
;
1978 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
1980 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
1981 int offset
, ion
= 0;
1983 REDIS_NOTUSED(mask
);
1986 while (listLength(c
->reply
)) {
1987 offset
= c
->sentlen
;
1991 /* fill-in the iov[] array */
1992 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
1993 o
= listNodeValue(node
);
1994 objlen
= sdslen(o
->ptr
);
1996 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
1999 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2000 break; /* no more iovecs */
2002 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2003 iov
[ion
].iov_len
= objlen
- offset
;
2004 willwrite
+= objlen
- offset
;
2005 offset
= 0; /* just for the first item */
2012 /* write all collected blocks at once */
2013 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2014 if (errno
!= EAGAIN
) {
2015 redisLog(REDIS_VERBOSE
,
2016 "Error writing to client: %s", strerror(errno
));
2023 totwritten
+= nwritten
;
2024 offset
= c
->sentlen
;
2026 /* remove written robjs from c->reply */
2027 while (nwritten
&& listLength(c
->reply
)) {
2028 o
= listNodeValue(listFirst(c
->reply
));
2029 objlen
= sdslen(o
->ptr
);
2031 if(nwritten
>= objlen
- offset
) {
2032 listDelNode(c
->reply
, listFirst(c
->reply
));
2033 nwritten
-= objlen
- offset
;
2037 c
->sentlen
+= nwritten
;
2045 c
->lastinteraction
= time(NULL
);
2047 if (listLength(c
->reply
) == 0) {
2049 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2053 static struct redisCommand
*lookupCommand(char *name
) {
2055 while(cmdTable
[j
].name
!= NULL
) {
2056 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2062 /* resetClient prepare the client to process the next command */
2063 static void resetClient(redisClient
*c
) {
2069 /* Call() is the core of Redis execution of a command */
2070 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2073 dirty
= server
.dirty
;
2075 if (server
.appendonly
&& server
.dirty
-dirty
)
2076 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2077 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2078 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2079 if (listLength(server
.monitors
))
2080 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2081 server
.stat_numcommands
++;
2084 /* If this function gets called we already read a whole
2085 * command, argments are in the client argv/argc fields.
2086 * processCommand() execute the command or prepare the
2087 * server for a bulk read from the client.
2089 * If 1 is returned the client is still alive and valid and
2090 * and other operations can be performed by the caller. Otherwise
2091 * if 0 is returned the client was destroied (i.e. after QUIT). */
2092 static int processCommand(redisClient
*c
) {
2093 struct redisCommand
*cmd
;
2095 /* Free some memory if needed (maxmemory setting) */
2096 if (server
.maxmemory
) freeMemoryIfNeeded();
2098 /* Handle the multi bulk command type. This is an alternative protocol
2099 * supported by Redis in order to receive commands that are composed of
2100 * multiple binary-safe "bulk" arguments. The latency of processing is
2101 * a bit higher but this allows things like multi-sets, so if this
2102 * protocol is used only for MSET and similar commands this is a big win. */
2103 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2104 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2105 if (c
->multibulk
<= 0) {
2109 decrRefCount(c
->argv
[c
->argc
-1]);
2113 } else if (c
->multibulk
) {
2114 if (c
->bulklen
== -1) {
2115 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2116 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2120 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2121 decrRefCount(c
->argv
[0]);
2122 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2124 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2129 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2133 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2134 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2138 if (c
->multibulk
== 0) {
2142 /* Here we need to swap the multi-bulk argc/argv with the
2143 * normal argc/argv of the client structure. */
2145 c
->argv
= c
->mbargv
;
2146 c
->mbargv
= auxargv
;
2149 c
->argc
= c
->mbargc
;
2150 c
->mbargc
= auxargc
;
2152 /* We need to set bulklen to something different than -1
2153 * in order for the code below to process the command without
2154 * to try to read the last argument of a bulk command as
2155 * a special argument. */
2157 /* continue below and process the command */
2164 /* -- end of multi bulk commands processing -- */
2166 /* The QUIT command is handled as a special case. Normal command
2167 * procs are unable to close the client connection safely */
2168 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2173 /* Now lookup the command and check ASAP about trivial error conditions
2174 * such wrong arity, bad command name and so forth. */
2175 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2178 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2179 (char*)c
->argv
[0]->ptr
));
2182 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2183 (c
->argc
< -cmd
->arity
)) {
2185 sdscatprintf(sdsempty(),
2186 "-ERR wrong number of arguments for '%s' command\r\n",
2190 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2191 /* This is a bulk command, we have to read the last argument yet. */
2192 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2194 decrRefCount(c
->argv
[c
->argc
-1]);
2195 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2197 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2202 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2203 /* It is possible that the bulk read is already in the
2204 * buffer. Check this condition and handle it accordingly.
2205 * This is just a fast path, alternative to call processInputBuffer().
2206 * It's a good idea since the code is small and this condition
2207 * happens most of the times. */
2208 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2209 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2211 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2213 /* Otherwise return... there is to read the last argument
2214 * from the socket. */
2218 /* Let's try to share objects on the command arguments vector */
2219 if (server
.shareobjects
) {
2221 for(j
= 1; j
< c
->argc
; j
++)
2222 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2224 /* Let's try to encode the bulk object to save space. */
2225 if (cmd
->flags
& REDIS_CMD_BULK
)
2226 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2228 /* Check if the user is authenticated */
2229 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2230 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2235 /* Handle the maxmemory directive */
2236 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2237 zmalloc_used_memory() > server
.maxmemory
)
2239 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2244 /* Exec the command */
2245 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2246 queueMultiCommand(c
,cmd
);
2247 addReply(c
,shared
.queued
);
2249 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2250 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2254 /* Prepare the client for the next command */
2259 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2264 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2265 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2266 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2267 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2270 if (argc
<= REDIS_STATIC_ARGS
) {
2273 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2276 lenobj
= createObject(REDIS_STRING
,
2277 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2278 lenobj
->refcount
= 0;
2279 outv
[outc
++] = lenobj
;
2280 for (j
= 0; j
< argc
; j
++) {
2281 lenobj
= createObject(REDIS_STRING
,
2282 sdscatprintf(sdsempty(),"$%lu\r\n",
2283 (unsigned long) stringObjectLen(argv
[j
])));
2284 lenobj
->refcount
= 0;
2285 outv
[outc
++] = lenobj
;
2286 outv
[outc
++] = argv
[j
];
2287 outv
[outc
++] = shared
.crlf
;
2290 /* Increment all the refcounts at start and decrement at end in order to
2291 * be sure to free objects if there is no slave in a replication state
2292 * able to be feed with commands */
2293 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2294 listRewind(slaves
,&li
);
2295 while((ln
= listNext(&li
))) {
2296 redisClient
*slave
= ln
->value
;
2298 /* Don't feed slaves that are still waiting for BGSAVE to start */
2299 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2301 /* Feed all the other slaves, MONITORs and so on */
2302 if (slave
->slaveseldb
!= dictid
) {
2306 case 0: selectcmd
= shared
.select0
; break;
2307 case 1: selectcmd
= shared
.select1
; break;
2308 case 2: selectcmd
= shared
.select2
; break;
2309 case 3: selectcmd
= shared
.select3
; break;
2310 case 4: selectcmd
= shared
.select4
; break;
2311 case 5: selectcmd
= shared
.select5
; break;
2312 case 6: selectcmd
= shared
.select6
; break;
2313 case 7: selectcmd
= shared
.select7
; break;
2314 case 8: selectcmd
= shared
.select8
; break;
2315 case 9: selectcmd
= shared
.select9
; break;
2317 selectcmd
= createObject(REDIS_STRING
,
2318 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2319 selectcmd
->refcount
= 0;
2322 addReply(slave
,selectcmd
);
2323 slave
->slaveseldb
= dictid
;
2325 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2327 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2328 if (outv
!= static_outv
) zfree(outv
);
2331 static void processInputBuffer(redisClient
*c
) {
2333 /* Before to process the input buffer, make sure the client is not
2334 * waitig for a blocking operation such as BLPOP. Note that the first
2335 * iteration the client is never blocked, otherwise the processInputBuffer
2336 * would not be called at all, but after the execution of the first commands
2337 * in the input buffer the client may be blocked, and the "goto again"
2338 * will try to reiterate. The following line will make it return asap. */
2339 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2340 if (c
->bulklen
== -1) {
2341 /* Read the first line of the query */
2342 char *p
= strchr(c
->querybuf
,'\n');
2349 query
= c
->querybuf
;
2350 c
->querybuf
= sdsempty();
2351 querylen
= 1+(p
-(query
));
2352 if (sdslen(query
) > querylen
) {
2353 /* leave data after the first line of the query in the buffer */
2354 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2356 *p
= '\0'; /* remove "\n" */
2357 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2358 sdsupdatelen(query
);
2360 /* Now we can split the query in arguments */
2361 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2364 if (c
->argv
) zfree(c
->argv
);
2365 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2367 for (j
= 0; j
< argc
; j
++) {
2368 if (sdslen(argv
[j
])) {
2369 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2377 /* Execute the command. If the client is still valid
2378 * after processCommand() return and there is something
2379 * on the query buffer try to process the next command. */
2380 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2382 /* Nothing to process, argc == 0. Just process the query
2383 * buffer if it's not empty or return to the caller */
2384 if (sdslen(c
->querybuf
)) goto again
;
2387 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2388 redisLog(REDIS_VERBOSE
, "Client protocol error");
2393 /* Bulk read handling. Note that if we are at this point
2394 the client already sent a command terminated with a newline,
2395 we are reading the bulk data that is actually the last
2396 argument of the command. */
2397 int qbl
= sdslen(c
->querybuf
);
2399 if (c
->bulklen
<= qbl
) {
2400 /* Copy everything but the final CRLF as final argument */
2401 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2403 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2404 /* Process the command. If the client is still valid after
2405 * the processing and there is more data in the buffer
2406 * try to parse it. */
2407 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2413 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2414 redisClient
*c
= (redisClient
*) privdata
;
2415 char buf
[REDIS_IOBUF_LEN
];
2418 REDIS_NOTUSED(mask
);
2420 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2422 if (errno
== EAGAIN
) {
2425 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2429 } else if (nread
== 0) {
2430 redisLog(REDIS_VERBOSE
, "Client closed connection");
2435 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2436 c
->lastinteraction
= time(NULL
);
2440 if (!(c
->flags
& REDIS_BLOCKED
))
2441 processInputBuffer(c
);
2444 static int selectDb(redisClient
*c
, int id
) {
2445 if (id
< 0 || id
>= server
.dbnum
)
2447 c
->db
= &server
.db
[id
];
2451 static void *dupClientReplyValue(void *o
) {
2452 incrRefCount((robj
*)o
);
2456 static redisClient
*createClient(int fd
) {
2457 redisClient
*c
= zmalloc(sizeof(*c
));
2459 anetNonBlock(NULL
,fd
);
2460 anetTcpNoDelay(NULL
,fd
);
2461 if (!c
) return NULL
;
2464 c
->querybuf
= sdsempty();
2473 c
->lastinteraction
= time(NULL
);
2474 c
->authenticated
= 0;
2475 c
->replstate
= REDIS_REPL_NONE
;
2476 c
->reply
= listCreate();
2477 listSetFreeMethod(c
->reply
,decrRefCount
);
2478 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2479 c
->blockingkeys
= NULL
;
2480 c
->blockingkeysnum
= 0;
2481 c
->io_keys
= listCreate();
2482 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2483 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2484 readQueryFromClient
, c
) == AE_ERR
) {
2488 listAddNodeTail(server
.clients
,c
);
2489 initClientMultiState(c
);
2493 static void addReply(redisClient
*c
, robj
*obj
) {
2494 if (listLength(c
->reply
) == 0 &&
2495 (c
->replstate
== REDIS_REPL_NONE
||
2496 c
->replstate
== REDIS_REPL_ONLINE
) &&
2497 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2498 sendReplyToClient
, c
) == AE_ERR
) return;
2500 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2501 obj
= dupStringObject(obj
);
2502 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2504 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2507 static void addReplySds(redisClient
*c
, sds s
) {
2508 robj
*o
= createObject(REDIS_STRING
,s
);
2513 static void addReplyDouble(redisClient
*c
, double d
) {
2516 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2517 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2518 (unsigned long) strlen(buf
),buf
));
2521 static void addReplyLong(redisClient
*c
, long l
) {
2526 addReply(c
,shared
.czero
);
2528 } else if (l
== 1) {
2529 addReply(c
,shared
.cone
);
2532 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2533 addReplySds(c
,sdsnewlen(buf
,len
));
2536 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2541 addReply(c
,shared
.czero
);
2543 } else if (ul
== 1) {
2544 addReply(c
,shared
.cone
);
2547 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2548 addReplySds(c
,sdsnewlen(buf
,len
));
2551 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2554 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2555 len
= sdslen(obj
->ptr
);
2557 long n
= (long)obj
->ptr
;
2559 /* Compute how many bytes will take this integer as a radix 10 string */
2565 while((n
= n
/10) != 0) {
2569 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2572 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2573 addReplyBulkLen(c
,obj
);
2575 addReply(c
,shared
.crlf
);
2578 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2579 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2581 addReply(c
,shared
.nullbulk
);
2583 robj
*o
= createStringObject(s
,strlen(s
));
2589 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2594 REDIS_NOTUSED(mask
);
2595 REDIS_NOTUSED(privdata
);
2597 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2598 if (cfd
== AE_ERR
) {
2599 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2602 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2603 if ((c
= createClient(cfd
)) == NULL
) {
2604 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2605 close(cfd
); /* May be already closed, just ingore errors */
2608 /* If maxclient directive is set and this is one client more... close the
2609 * connection. Note that we create the client instead to check before
2610 * for this condition, since now the socket is already set in nonblocking
2611 * mode and we can send an error for free using the Kernel I/O */
2612 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2613 char *err
= "-ERR max number of clients reached\r\n";
2615 /* That's a best effort error message, don't check write errors */
2616 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2617 /* Nothing to do, Just to avoid the warning... */
2622 server
.stat_numconnections
++;
2625 /* ======================= Redis objects implementation ===================== */
2627 static robj
*createObject(int type
, void *ptr
) {
2630 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2631 if (listLength(server
.objfreelist
)) {
2632 listNode
*head
= listFirst(server
.objfreelist
);
2633 o
= listNodeValue(head
);
2634 listDelNode(server
.objfreelist
,head
);
2635 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2637 if (server
.vm_enabled
) {
2638 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2639 o
= zmalloc(sizeof(*o
));
2641 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2645 o
->encoding
= REDIS_ENCODING_RAW
;
2648 if (server
.vm_enabled
) {
2649 /* Note that this code may run in the context of an I/O thread
2650 * and accessing to server.unixtime in theory is an error
2651 * (no locks). But in practice this is safe, and even if we read
2652 * garbage Redis will not fail, as it's just a statistical info */
2653 o
->vm
.atime
= server
.unixtime
;
2654 o
->storage
= REDIS_VM_MEMORY
;
2659 static robj
*createStringObject(char *ptr
, size_t len
) {
2660 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2663 static robj
*dupStringObject(robj
*o
) {
2664 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2665 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2668 static robj
*createListObject(void) {
2669 list
*l
= listCreate();
2671 listSetFreeMethod(l
,decrRefCount
);
2672 return createObject(REDIS_LIST
,l
);
2675 static robj
*createSetObject(void) {
2676 dict
*d
= dictCreate(&setDictType
,NULL
);
2677 return createObject(REDIS_SET
,d
);
2680 static robj
*createHashObject(void) {
2681 /* All the Hashes start as zipmaps. Will be automatically converted
2682 * into hash tables if there are enough elements or big elements
2684 unsigned char *zm
= zipmapNew();
2685 robj
*o
= createObject(REDIS_HASH
,zm
);
2686 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2690 static robj
*createZsetObject(void) {
2691 zset
*zs
= zmalloc(sizeof(*zs
));
2693 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2694 zs
->zsl
= zslCreate();
2695 return createObject(REDIS_ZSET
,zs
);
2698 static void freeStringObject(robj
*o
) {
2699 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2704 static void freeListObject(robj
*o
) {
2705 listRelease((list
*) o
->ptr
);
2708 static void freeSetObject(robj
*o
) {
2709 dictRelease((dict
*) o
->ptr
);
2712 static void freeZsetObject(robj
*o
) {
2715 dictRelease(zs
->dict
);
2720 static void freeHashObject(robj
*o
) {
2721 switch (o
->encoding
) {
2722 case REDIS_ENCODING_HT
:
2723 dictRelease((dict
*) o
->ptr
);
2725 case REDIS_ENCODING_ZIPMAP
:
2734 static void incrRefCount(robj
*o
) {
2735 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2739 static void decrRefCount(void *obj
) {
2742 /* Object is a key of a swapped out value, or in the process of being
2744 if (server
.vm_enabled
&&
2745 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2747 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2748 redisAssert(o
->refcount
== 1);
2750 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2751 redisAssert(o
->type
== REDIS_STRING
);
2752 freeStringObject(o
);
2753 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2754 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2755 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2756 !listAddNodeHead(server
.objfreelist
,o
))
2758 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2759 server
.vm_stats_swapped_objects
--;
2762 /* Object is in memory, or in the process of being swapped out. */
2763 if (--(o
->refcount
) == 0) {
2764 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2765 vmCancelThreadedIOJob(obj
);
2767 case REDIS_STRING
: freeStringObject(o
); break;
2768 case REDIS_LIST
: freeListObject(o
); break;
2769 case REDIS_SET
: freeSetObject(o
); break;
2770 case REDIS_ZSET
: freeZsetObject(o
); break;
2771 case REDIS_HASH
: freeHashObject(o
); break;
2772 default: redisAssert(0); break;
2774 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2775 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2776 !listAddNodeHead(server
.objfreelist
,o
))
2778 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2782 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2783 dictEntry
*de
= dictFind(db
->dict
,key
);
2785 robj
*key
= dictGetEntryKey(de
);
2786 robj
*val
= dictGetEntryVal(de
);
2788 if (server
.vm_enabled
) {
2789 if (key
->storage
== REDIS_VM_MEMORY
||
2790 key
->storage
== REDIS_VM_SWAPPING
)
2792 /* If we were swapping the object out, stop it, this key
2794 if (key
->storage
== REDIS_VM_SWAPPING
)
2795 vmCancelThreadedIOJob(key
);
2796 /* Update the access time of the key for the aging algorithm. */
2797 key
->vm
.atime
= server
.unixtime
;
2799 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2801 /* Our value was swapped on disk. Bring it at home. */
2802 redisAssert(val
== NULL
);
2803 val
= vmLoadObject(key
);
2804 dictGetEntryVal(de
) = val
;
2806 /* Clients blocked by the VM subsystem may be waiting for
2808 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2817 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2818 expireIfNeeded(db
,key
);
2819 return lookupKey(db
,key
);
2822 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2823 deleteIfVolatile(db
,key
);
2824 return lookupKey(db
,key
);
2827 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2828 robj
*o
= lookupKeyRead(c
->db
, key
);
2829 if (!o
) addReply(c
,reply
);
2833 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2834 robj
*o
= lookupKeyWrite(c
->db
, key
);
2835 if (!o
) addReply(c
,reply
);
2839 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2840 if (o
->type
!= type
) {
2841 addReply(c
,shared
.wrongtypeerr
);
2847 static int deleteKey(redisDb
*db
, robj
*key
) {
2850 /* We need to protect key from destruction: after the first dictDelete()
2851 * it may happen that 'key' is no longer valid if we don't increment
2852 * it's count. This may happen when we get the object reference directly
2853 * from the hash table with dictRandomKey() or dict iterators */
2855 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2856 retval
= dictDelete(db
->dict
,key
);
2859 return retval
== DICT_OK
;
2862 /* Try to share an object against the shared objects pool */
2863 static robj
*tryObjectSharing(robj
*o
) {
2864 struct dictEntry
*de
;
2867 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2869 redisAssert(o
->type
== REDIS_STRING
);
2870 de
= dictFind(server
.sharingpool
,o
);
2872 robj
*shared
= dictGetEntryKey(de
);
2874 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2875 dictGetEntryVal(de
) = (void*) c
;
2876 incrRefCount(shared
);
2880 /* Here we are using a stream algorihtm: Every time an object is
2881 * shared we increment its count, everytime there is a miss we
2882 * recrement the counter of a random object. If this object reaches
2883 * zero we remove the object and put the current object instead. */
2884 if (dictSize(server
.sharingpool
) >=
2885 server
.sharingpoolsize
) {
2886 de
= dictGetRandomKey(server
.sharingpool
);
2887 redisAssert(de
!= NULL
);
2888 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2889 dictGetEntryVal(de
) = (void*) c
;
2891 dictDelete(server
.sharingpool
,de
->key
);
2894 c
= 0; /* If the pool is empty we want to add this object */
2899 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2900 redisAssert(retval
== DICT_OK
);
2907 /* Check if the nul-terminated string 's' can be represented by a long
2908 * (that is, is a number that fits into long without any other space or
2909 * character before or after the digits).
2911 * If so, the function returns REDIS_OK and *longval is set to the value
2912 * of the number. Otherwise REDIS_ERR is returned */
2913 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2914 char buf
[32], *endptr
;
2918 value
= strtol(s
, &endptr
, 10);
2919 if (endptr
[0] != '\0') return REDIS_ERR
;
2920 slen
= snprintf(buf
,32,"%ld",value
);
2922 /* If the number converted back into a string is not identical
2923 * then it's not possible to encode the string as integer */
2924 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2925 if (longval
) *longval
= value
;
2929 /* Try to encode a string object in order to save space */
2930 static int tryObjectEncoding(robj
*o
) {
2934 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2935 return REDIS_ERR
; /* Already encoded */
2937 /* It's not save to encode shared objects: shared objects can be shared
2938 * everywhere in the "object space" of Redis. Encoded objects can only
2939 * appear as "values" (and not, for instance, as keys) */
2940 if (o
->refcount
> 1) return REDIS_ERR
;
2942 /* Currently we try to encode only strings */
2943 redisAssert(o
->type
== REDIS_STRING
);
2945 /* Check if we can represent this string as a long integer */
2946 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2948 /* Ok, this object can be encoded */
2949 o
->encoding
= REDIS_ENCODING_INT
;
2951 o
->ptr
= (void*) value
;
2955 /* Get a decoded version of an encoded object (returned as a new object).
2956 * If the object is already raw-encoded just increment the ref count. */
2957 static robj
*getDecodedObject(robj
*o
) {
2960 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2964 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2967 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2968 dec
= createStringObject(buf
,strlen(buf
));
2971 redisAssert(1 != 1);
2975 /* Compare two string objects via strcmp() or alike.
2976 * Note that the objects may be integer-encoded. In such a case we
2977 * use snprintf() to get a string representation of the numbers on the stack
2978 * and compare the strings, it's much faster than calling getDecodedObject().
2980 * Important note: if objects are not integer encoded, but binary-safe strings,
2981 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2983 static int compareStringObjects(robj
*a
, robj
*b
) {
2984 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
2985 char bufa
[128], bufb
[128], *astr
, *bstr
;
2988 if (a
== b
) return 0;
2989 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
2990 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
2996 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
2997 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3003 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3006 static size_t stringObjectLen(robj
*o
) {
3007 redisAssert(o
->type
== REDIS_STRING
);
3008 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3009 return sdslen(o
->ptr
);
3013 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3017 /*============================ RDB saving/loading =========================== */
3019 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3020 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3024 static int rdbSaveTime(FILE *fp
, time_t t
) {
3025 int32_t t32
= (int32_t) t
;
3026 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3030 /* check rdbLoadLen() comments for more info */
3031 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3032 unsigned char buf
[2];
3035 /* Save a 6 bit len */
3036 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3037 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3038 } else if (len
< (1<<14)) {
3039 /* Save a 14 bit len */
3040 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3042 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3044 /* Save a 32 bit len */
3045 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3046 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3048 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3053 /* String objects in the form "2391" "-100" without any space and with a
3054 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3055 * encoded as integers to save space */
3056 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3058 char *endptr
, buf
[32];
3060 /* Check if it's possible to encode this value as a number */
3061 value
= strtoll(s
, &endptr
, 10);
3062 if (endptr
[0] != '\0') return 0;
3063 snprintf(buf
,32,"%lld",value
);
3065 /* If the number converted back into a string is not identical
3066 * then it's not possible to encode the string as integer */
3067 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3069 /* Finally check if it fits in our ranges */
3070 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3071 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3072 enc
[1] = value
&0xFF;
3074 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3075 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3076 enc
[1] = value
&0xFF;
3077 enc
[2] = (value
>>8)&0xFF;
3079 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3080 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3081 enc
[1] = value
&0xFF;
3082 enc
[2] = (value
>>8)&0xFF;
3083 enc
[3] = (value
>>16)&0xFF;
3084 enc
[4] = (value
>>24)&0xFF;
3091 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3092 size_t comprlen
, outlen
;
3096 /* We require at least four bytes compression for this to be worth it */
3097 if (len
<= 4) return 0;
3099 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3100 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3101 if (comprlen
== 0) {
3105 /* Data compressed! Let's save it on disk */
3106 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3107 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3108 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3109 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3110 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3119 /* Save a string objet as [len][data] on disk. If the object is a string
3120 * representation of an integer value we try to safe it in a special form */
3121 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3124 /* Try integer encoding */
3126 unsigned char buf
[5];
3127 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3128 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3133 /* Try LZF compression - under 20 bytes it's unable to compress even
3134 * aaaaaaaaaaaaaaaaaa so skip it */
3135 if (server
.rdbcompression
&& len
> 20) {
3138 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3139 if (retval
== -1) return -1;
3140 if (retval
> 0) return 0;
3141 /* retval == 0 means data can't be compressed, save the old way */
3144 /* Store verbatim */
3145 if (rdbSaveLen(fp
,len
) == -1) return -1;
3146 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3150 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3151 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3154 /* Avoid incr/decr ref count business when possible.
3155 * This plays well with copy-on-write given that we are probably
3156 * in a child process (BGSAVE). Also this makes sure key objects
3157 * of swapped objects are not incRefCount-ed (an assert does not allow
3158 * this in order to avoid bugs) */
3159 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3160 obj
= getDecodedObject(obj
);
3161 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3164 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3169 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3170 * 8 bit integer specifing the length of the representation.
3171 * This 8 bit integer has special values in order to specify the following
3177 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3178 unsigned char buf
[128];
3184 } else if (!isfinite(val
)) {
3186 buf
[0] = (val
< 0) ? 255 : 254;
3188 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3189 buf
[0] = strlen((char*)buf
+1);
3192 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3196 /* Save a Redis object. */
3197 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3198 if (o
->type
== REDIS_STRING
) {
3199 /* Save a string value */
3200 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3201 } else if (o
->type
== REDIS_LIST
) {
3202 /* Save a list value */
3203 list
*list
= o
->ptr
;
3207 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3208 listRewind(list
,&li
);
3209 while((ln
= listNext(&li
))) {
3210 robj
*eleobj
= listNodeValue(ln
);
3212 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3214 } else if (o
->type
== REDIS_SET
) {
3215 /* Save a set value */
3217 dictIterator
*di
= dictGetIterator(set
);
3220 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3221 while((de
= dictNext(di
)) != NULL
) {
3222 robj
*eleobj
= dictGetEntryKey(de
);
3224 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3226 dictReleaseIterator(di
);
3227 } else if (o
->type
== REDIS_ZSET
) {
3228 /* Save a set value */
3230 dictIterator
*di
= dictGetIterator(zs
->dict
);
3233 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3234 while((de
= dictNext(di
)) != NULL
) {
3235 robj
*eleobj
= dictGetEntryKey(de
);
3236 double *score
= dictGetEntryVal(de
);
3238 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3239 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3241 dictReleaseIterator(di
);
3242 } else if (o
->type
== REDIS_HASH
) {
3243 /* Save a hash value */
3244 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3245 unsigned char *p
= zipmapRewind(o
->ptr
);
3246 unsigned int count
= zipmapLen(o
->ptr
);
3247 unsigned char *key
, *val
;
3248 unsigned int klen
, vlen
;
3250 if (rdbSaveLen(fp
,count
) == -1) return -1;
3251 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3252 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3253 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3256 dictIterator
*di
= dictGetIterator(o
->ptr
);
3259 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3260 while((de
= dictNext(di
)) != NULL
) {
3261 robj
*key
= dictGetEntryKey(de
);
3262 robj
*val
= dictGetEntryVal(de
);
3264 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3265 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3267 dictReleaseIterator(di
);
3275 /* Return the length the object will have on disk if saved with
3276 * the rdbSaveObject() function. Currently we use a trick to get
3277 * this length with very little changes to the code. In the future
3278 * we could switch to a faster solution. */
3279 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3280 if (fp
== NULL
) fp
= server
.devnull
;
3282 assert(rdbSaveObject(fp
,o
) != 1);
3286 /* Return the number of pages required to save this object in the swap file */
3287 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3288 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3290 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3293 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3294 static int rdbSave(char *filename
) {
3295 dictIterator
*di
= NULL
;
3300 time_t now
= time(NULL
);
3302 /* Wait for I/O therads to terminate, just in case this is a
3303 * foreground-saving, to avoid seeking the swap file descriptor at the
3305 if (server
.vm_enabled
)
3306 waitEmptyIOJobsQueue();
3308 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3309 fp
= fopen(tmpfile
,"w");
3311 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3314 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3315 for (j
= 0; j
< server
.dbnum
; j
++) {
3316 redisDb
*db
= server
.db
+j
;
3318 if (dictSize(d
) == 0) continue;
3319 di
= dictGetIterator(d
);
3325 /* Write the SELECT DB opcode */
3326 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3327 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3329 /* Iterate this DB writing every entry */
3330 while((de
= dictNext(di
)) != NULL
) {
3331 robj
*key
= dictGetEntryKey(de
);
3332 robj
*o
= dictGetEntryVal(de
);
3333 time_t expiretime
= getExpire(db
,key
);
3335 /* Save the expire time */
3336 if (expiretime
!= -1) {
3337 /* If this key is already expired skip it */
3338 if (expiretime
< now
) continue;
3339 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3340 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3342 /* Save the key and associated value. This requires special
3343 * handling if the value is swapped out. */
3344 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3345 key
->storage
== REDIS_VM_SWAPPING
) {
3346 /* Save type, key, value */
3347 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3348 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3349 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3351 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3353 /* Get a preview of the object in memory */
3354 po
= vmPreviewObject(key
);
3355 /* Save type, key, value */
3356 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3357 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3358 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3359 /* Remove the loaded object from memory */
3363 dictReleaseIterator(di
);
3366 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3368 /* Make sure data will not remain on the OS's output buffers */
3373 /* Use RENAME to make sure the DB file is changed atomically only
3374 * if the generate DB file is ok. */
3375 if (rename(tmpfile
,filename
) == -1) {
3376 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3380 redisLog(REDIS_NOTICE
,"DB saved on disk");
3382 server
.lastsave
= time(NULL
);
3388 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3389 if (di
) dictReleaseIterator(di
);
3393 static int rdbSaveBackground(char *filename
) {
3396 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3397 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3398 if ((childpid
= fork()) == 0) {
3400 if (server
.vm_enabled
) vmReopenSwapFile();
3402 if (rdbSave(filename
) == REDIS_OK
) {
3409 if (childpid
== -1) {
3410 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3414 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3415 server
.bgsavechildpid
= childpid
;
3418 return REDIS_OK
; /* unreached */
3421 static void rdbRemoveTempFile(pid_t childpid
) {
3424 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3428 static int rdbLoadType(FILE *fp
) {
3430 if (fread(&type
,1,1,fp
) == 0) return -1;
3434 static time_t rdbLoadTime(FILE *fp
) {
3436 if (fread(&t32
,4,1,fp
) == 0) return -1;
3437 return (time_t) t32
;
3440 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3441 * of this file for a description of how this are stored on disk.
3443 * isencoded is set to 1 if the readed length is not actually a length but
3444 * an "encoding type", check the above comments for more info */
3445 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3446 unsigned char buf
[2];
3450 if (isencoded
) *isencoded
= 0;
3451 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3452 type
= (buf
[0]&0xC0)>>6;
3453 if (type
== REDIS_RDB_6BITLEN
) {
3454 /* Read a 6 bit len */
3456 } else if (type
== REDIS_RDB_ENCVAL
) {
3457 /* Read a 6 bit len encoding type */
3458 if (isencoded
) *isencoded
= 1;
3460 } else if (type
== REDIS_RDB_14BITLEN
) {
3461 /* Read a 14 bit len */
3462 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3463 return ((buf
[0]&0x3F)<<8)|buf
[1];
3465 /* Read a 32 bit len */
3466 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3471 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3472 unsigned char enc
[4];
3475 if (enctype
== REDIS_RDB_ENC_INT8
) {
3476 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3477 val
= (signed char)enc
[0];
3478 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3480 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3481 v
= enc
[0]|(enc
[1]<<8);
3483 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3485 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3486 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3489 val
= 0; /* anti-warning */
3492 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3495 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3496 unsigned int len
, clen
;
3497 unsigned char *c
= NULL
;
3500 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3501 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3502 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3503 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3504 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3505 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3507 return createObject(REDIS_STRING
,val
);
3514 static robj
*rdbLoadStringObject(FILE*fp
) {
3519 len
= rdbLoadLen(fp
,&isencoded
);
3522 case REDIS_RDB_ENC_INT8
:
3523 case REDIS_RDB_ENC_INT16
:
3524 case REDIS_RDB_ENC_INT32
:
3525 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3526 case REDIS_RDB_ENC_LZF
:
3527 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3533 if (len
== REDIS_RDB_LENERR
) return NULL
;
3534 val
= sdsnewlen(NULL
,len
);
3535 if (len
&& fread(val
,len
,1,fp
) == 0) {
3539 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3542 /* For information about double serialization check rdbSaveDoubleValue() */
3543 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3547 if (fread(&len
,1,1,fp
) == 0) return -1;
3549 case 255: *val
= R_NegInf
; return 0;
3550 case 254: *val
= R_PosInf
; return 0;
3551 case 253: *val
= R_Nan
; return 0;
3553 if (fread(buf
,len
,1,fp
) == 0) return -1;
3555 sscanf(buf
, "%lg", val
);
3560 /* Load a Redis object of the specified type from the specified file.
3561 * On success a newly allocated object is returned, otherwise NULL. */
3562 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3565 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3566 if (type
== REDIS_STRING
) {
3567 /* Read string value */
3568 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3569 tryObjectEncoding(o
);
3570 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3571 /* Read list/set value */
3574 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3575 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3576 /* It's faster to expand the dict to the right size asap in order
3577 * to avoid rehashing */
3578 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3579 dictExpand(o
->ptr
,listlen
);
3580 /* Load every single element of the list/set */
3584 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3585 tryObjectEncoding(ele
);
3586 if (type
== REDIS_LIST
) {
3587 listAddNodeTail((list
*)o
->ptr
,ele
);
3589 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3592 } else if (type
== REDIS_ZSET
) {
3593 /* Read list/set value */
3597 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3598 o
= createZsetObject();
3600 /* Load every single element of the list/set */
3603 double *score
= zmalloc(sizeof(double));
3605 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3606 tryObjectEncoding(ele
);
3607 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3608 dictAdd(zs
->dict
,ele
,score
);
3609 zslInsert(zs
->zsl
,*score
,ele
);
3610 incrRefCount(ele
); /* added to skiplist */
3612 } else if (type
== REDIS_HASH
) {
3615 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3616 o
= createHashObject();
3617 /* Too many entries? Use an hash table. */
3618 if (hashlen
> server
.hash_max_zipmap_entries
)
3619 convertToRealHash(o
);
3620 /* Load every key/value, then set it into the zipmap or hash
3621 * table, as needed. */
3625 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3626 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3627 /* If we are using a zipmap and there are too big values
3628 * the object is converted to real hash table encoding. */
3629 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3630 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3631 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3633 convertToRealHash(o
);
3636 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3637 unsigned char *zm
= o
->ptr
;
3639 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3640 val
->ptr
,sdslen(val
->ptr
),NULL
);
3645 tryObjectEncoding(key
);
3646 tryObjectEncoding(val
);
3647 dictAdd((dict
*)o
->ptr
,key
,val
);
3656 static int rdbLoad(char *filename
) {
3658 robj
*keyobj
= NULL
;
3660 int type
, retval
, rdbver
;
3661 dict
*d
= server
.db
[0].dict
;
3662 redisDb
*db
= server
.db
+0;
3664 time_t expiretime
= -1, now
= time(NULL
);
3665 long long loadedkeys
= 0;
3667 fp
= fopen(filename
,"r");
3668 if (!fp
) return REDIS_ERR
;
3669 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3671 if (memcmp(buf
,"REDIS",5) != 0) {
3673 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3676 rdbver
= atoi(buf
+5);
3679 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3686 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3687 if (type
== REDIS_EXPIRETIME
) {
3688 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3689 /* We read the time so we need to read the object type again */
3690 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3692 if (type
== REDIS_EOF
) break;
3693 /* Handle SELECT DB opcode as a special case */
3694 if (type
== REDIS_SELECTDB
) {
3695 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3697 if (dbid
>= (unsigned)server
.dbnum
) {
3698 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3701 db
= server
.db
+dbid
;
3706 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3708 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3709 /* Add the new object in the hash table */
3710 retval
= dictAdd(d
,keyobj
,o
);
3711 if (retval
== DICT_ERR
) {
3712 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3715 /* Set the expire time if needed */
3716 if (expiretime
!= -1) {
3717 setExpire(db
,keyobj
,expiretime
);
3718 /* Delete this key if already expired */
3719 if (expiretime
< now
) deleteKey(db
,keyobj
);
3723 /* Handle swapping while loading big datasets when VM is on */
3725 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3726 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3727 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3734 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3735 if (keyobj
) decrRefCount(keyobj
);
3736 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3738 return REDIS_ERR
; /* Just to avoid warning */
3741 /*================================== Commands =============================== */
3743 static void authCommand(redisClient
*c
) {
3744 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3745 c
->authenticated
= 1;
3746 addReply(c
,shared
.ok
);
3748 c
->authenticated
= 0;
3749 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3753 static void pingCommand(redisClient
*c
) {
3754 addReply(c
,shared
.pong
);
3757 static void echoCommand(redisClient
*c
) {
3758 addReplyBulk(c
,c
->argv
[1]);
3761 /*=================================== Strings =============================== */
3763 static void setGenericCommand(redisClient
*c
, int nx
) {
3766 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3767 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3768 if (retval
== DICT_ERR
) {
3770 /* If the key is about a swapped value, we want a new key object
3771 * to overwrite the old. So we delete the old key in the database.
3772 * This will also make sure that swap pages about the old object
3773 * will be marked as free. */
3774 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3775 incrRefCount(c
->argv
[1]);
3776 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3777 incrRefCount(c
->argv
[2]);
3779 addReply(c
,shared
.czero
);
3783 incrRefCount(c
->argv
[1]);
3784 incrRefCount(c
->argv
[2]);
3787 removeExpire(c
->db
,c
->argv
[1]);
3788 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3791 static void setCommand(redisClient
*c
) {
3792 setGenericCommand(c
,0);
3795 static void setnxCommand(redisClient
*c
) {
3796 setGenericCommand(c
,1);
3799 static int getGenericCommand(redisClient
*c
) {
3802 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3805 if (o
->type
!= REDIS_STRING
) {
3806 addReply(c
,shared
.wrongtypeerr
);
3814 static void getCommand(redisClient
*c
) {
3815 getGenericCommand(c
);
3818 static void getsetCommand(redisClient
*c
) {
3819 if (getGenericCommand(c
) == REDIS_ERR
) return;
3820 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3821 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3823 incrRefCount(c
->argv
[1]);
3825 incrRefCount(c
->argv
[2]);
3827 removeExpire(c
->db
,c
->argv
[1]);
3830 static void mgetCommand(redisClient
*c
) {
3833 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3834 for (j
= 1; j
< c
->argc
; j
++) {
3835 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3837 addReply(c
,shared
.nullbulk
);
3839 if (o
->type
!= REDIS_STRING
) {
3840 addReply(c
,shared
.nullbulk
);
3848 static void msetGenericCommand(redisClient
*c
, int nx
) {
3849 int j
, busykeys
= 0;
3851 if ((c
->argc
% 2) == 0) {
3852 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3855 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3856 * set nothing at all if at least one already key exists. */
3858 for (j
= 1; j
< c
->argc
; j
+= 2) {
3859 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3865 addReply(c
, shared
.czero
);
3869 for (j
= 1; j
< c
->argc
; j
+= 2) {
3872 tryObjectEncoding(c
->argv
[j
+1]);
3873 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3874 if (retval
== DICT_ERR
) {
3875 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3876 incrRefCount(c
->argv
[j
+1]);
3878 incrRefCount(c
->argv
[j
]);
3879 incrRefCount(c
->argv
[j
+1]);
3881 removeExpire(c
->db
,c
->argv
[j
]);
3883 server
.dirty
+= (c
->argc
-1)/2;
3884 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3887 static void msetCommand(redisClient
*c
) {
3888 msetGenericCommand(c
,0);
3891 static void msetnxCommand(redisClient
*c
) {
3892 msetGenericCommand(c
,1);
3895 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3900 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3904 if (o
->type
!= REDIS_STRING
) {
3909 if (o
->encoding
== REDIS_ENCODING_RAW
)
3910 value
= strtoll(o
->ptr
, &eptr
, 10);
3911 else if (o
->encoding
== REDIS_ENCODING_INT
)
3912 value
= (long)o
->ptr
;
3914 redisAssert(1 != 1);
3919 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3920 tryObjectEncoding(o
);
3921 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3922 if (retval
== DICT_ERR
) {
3923 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3924 removeExpire(c
->db
,c
->argv
[1]);
3926 incrRefCount(c
->argv
[1]);
3929 addReply(c
,shared
.colon
);
3931 addReply(c
,shared
.crlf
);
3934 static void incrCommand(redisClient
*c
) {
3935 incrDecrCommand(c
,1);
3938 static void decrCommand(redisClient
*c
) {
3939 incrDecrCommand(c
,-1);
3942 static void incrbyCommand(redisClient
*c
) {
3943 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3944 incrDecrCommand(c
,incr
);
3947 static void decrbyCommand(redisClient
*c
) {
3948 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3949 incrDecrCommand(c
,-incr
);
3952 static void appendCommand(redisClient
*c
) {
3957 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3959 /* Create the key */
3960 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3961 incrRefCount(c
->argv
[1]);
3962 incrRefCount(c
->argv
[2]);
3963 totlen
= stringObjectLen(c
->argv
[2]);
3967 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3970 o
= dictGetEntryVal(de
);
3971 if (o
->type
!= REDIS_STRING
) {
3972 addReply(c
,shared
.wrongtypeerr
);
3975 /* If the object is specially encoded or shared we have to make
3977 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
3978 robj
*decoded
= getDecodedObject(o
);
3980 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
3981 decrRefCount(decoded
);
3982 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3985 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
3986 o
->ptr
= sdscatlen(o
->ptr
,
3987 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
3989 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
3990 (unsigned long) c
->argv
[2]->ptr
);
3992 totlen
= sdslen(o
->ptr
);
3995 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
3998 static void substrCommand(redisClient
*c
) {
4000 long start
= atoi(c
->argv
[2]->ptr
);
4001 long end
= atoi(c
->argv
[3]->ptr
);
4002 size_t rangelen
, strlen
;
4005 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4006 checkType(c
,o
,REDIS_STRING
)) return;
4008 o
= getDecodedObject(o
);
4009 strlen
= sdslen(o
->ptr
);
4011 /* convert negative indexes */
4012 if (start
< 0) start
= strlen
+start
;
4013 if (end
< 0) end
= strlen
+end
;
4014 if (start
< 0) start
= 0;
4015 if (end
< 0) end
= 0;
4017 /* indexes sanity checks */
4018 if (start
> end
|| (size_t)start
>= strlen
) {
4019 /* Out of range start or start > end result in null reply */
4020 addReply(c
,shared
.nullbulk
);
4024 if ((size_t)end
>= strlen
) end
= strlen
-1;
4025 rangelen
= (end
-start
)+1;
4027 /* Return the result */
4028 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4029 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4030 addReplySds(c
,range
);
4031 addReply(c
,shared
.crlf
);
4035 /* ========================= Type agnostic commands ========================= */
4037 static void delCommand(redisClient
*c
) {
4040 for (j
= 1; j
< c
->argc
; j
++) {
4041 if (deleteKey(c
->db
,c
->argv
[j
])) {
4046 addReplyLong(c
,deleted
);
4049 static void existsCommand(redisClient
*c
) {
4050 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4053 static void selectCommand(redisClient
*c
) {
4054 int id
= atoi(c
->argv
[1]->ptr
);
4056 if (selectDb(c
,id
) == REDIS_ERR
) {
4057 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4059 addReply(c
,shared
.ok
);
4063 static void randomkeyCommand(redisClient
*c
) {
4067 de
= dictGetRandomKey(c
->db
->dict
);
4068 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4071 addReply(c
,shared
.plus
);
4072 addReply(c
,shared
.crlf
);
4074 addReply(c
,shared
.plus
);
4075 addReply(c
,dictGetEntryKey(de
));
4076 addReply(c
,shared
.crlf
);
4080 static void keysCommand(redisClient
*c
) {
4083 sds pattern
= c
->argv
[1]->ptr
;
4084 int plen
= sdslen(pattern
);
4085 unsigned long numkeys
= 0;
4086 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4088 di
= dictGetIterator(c
->db
->dict
);
4090 decrRefCount(lenobj
);
4091 while((de
= dictNext(di
)) != NULL
) {
4092 robj
*keyobj
= dictGetEntryKey(de
);
4094 sds key
= keyobj
->ptr
;
4095 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4096 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4097 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4098 addReplyBulk(c
,keyobj
);
4103 dictReleaseIterator(di
);
4104 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4107 static void dbsizeCommand(redisClient
*c
) {
4109 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4112 static void lastsaveCommand(redisClient
*c
) {
4114 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4117 static void typeCommand(redisClient
*c
) {
4121 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4126 case REDIS_STRING
: type
= "+string"; break;
4127 case REDIS_LIST
: type
= "+list"; break;
4128 case REDIS_SET
: type
= "+set"; break;
4129 case REDIS_ZSET
: type
= "+zset"; break;
4130 case REDIS_HASH
: type
= "+hash"; break;
4131 default: type
= "+unknown"; break;
4134 addReplySds(c
,sdsnew(type
));
4135 addReply(c
,shared
.crlf
);
4138 static void saveCommand(redisClient
*c
) {
4139 if (server
.bgsavechildpid
!= -1) {
4140 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4143 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4144 addReply(c
,shared
.ok
);
4146 addReply(c
,shared
.err
);
4150 static void bgsaveCommand(redisClient
*c
) {
4151 if (server
.bgsavechildpid
!= -1) {
4152 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4155 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4156 char *status
= "+Background saving started\r\n";
4157 addReplySds(c
,sdsnew(status
));
4159 addReply(c
,shared
.err
);
4163 static void shutdownCommand(redisClient
*c
) {
4164 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4165 /* Kill the saving child if there is a background saving in progress.
4166 We want to avoid race conditions, for instance our saving child may
4167 overwrite the synchronous saving did by SHUTDOWN. */
4168 if (server
.bgsavechildpid
!= -1) {
4169 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4170 kill(server
.bgsavechildpid
,SIGKILL
);
4171 rdbRemoveTempFile(server
.bgsavechildpid
);
4173 if (server
.appendonly
) {
4174 /* Append only file: fsync() the AOF and exit */
4175 fsync(server
.appendfd
);
4176 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4179 /* Snapshotting. Perform a SYNC SAVE and exit */
4180 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4181 if (server
.daemonize
)
4182 unlink(server
.pidfile
);
4183 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4184 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4185 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4188 /* Ooops.. error saving! The best we can do is to continue
4189 * operating. Note that if there was a background saving process,
4190 * in the next cron() Redis will be notified that the background
4191 * saving aborted, handling special stuff like slaves pending for
4192 * synchronization... */
4193 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4195 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4200 static void renameGenericCommand(redisClient
*c
, int nx
) {
4203 /* To use the same key as src and dst is probably an error */
4204 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4205 addReply(c
,shared
.sameobjecterr
);
4209 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4213 deleteIfVolatile(c
->db
,c
->argv
[2]);
4214 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4217 addReply(c
,shared
.czero
);
4220 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4222 incrRefCount(c
->argv
[2]);
4224 deleteKey(c
->db
,c
->argv
[1]);
4226 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4229 static void renameCommand(redisClient
*c
) {
4230 renameGenericCommand(c
,0);
4233 static void renamenxCommand(redisClient
*c
) {
4234 renameGenericCommand(c
,1);
4237 static void moveCommand(redisClient
*c
) {
4242 /* Obtain source and target DB pointers */
4245 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4246 addReply(c
,shared
.outofrangeerr
);
4250 selectDb(c
,srcid
); /* Back to the source DB */
4252 /* If the user is moving using as target the same
4253 * DB as the source DB it is probably an error. */
4255 addReply(c
,shared
.sameobjecterr
);
4259 /* Check if the element exists and get a reference */
4260 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4262 addReply(c
,shared
.czero
);
4266 /* Try to add the element to the target DB */
4267 deleteIfVolatile(dst
,c
->argv
[1]);
4268 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4269 addReply(c
,shared
.czero
);
4272 incrRefCount(c
->argv
[1]);
4275 /* OK! key moved, free the entry in the source DB */
4276 deleteKey(src
,c
->argv
[1]);
4278 addReply(c
,shared
.cone
);
4281 /* =================================== Lists ================================ */
4282 static void pushGenericCommand(redisClient
*c
, int where
) {
4286 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4288 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4289 addReply(c
,shared
.cone
);
4292 lobj
= createListObject();
4294 if (where
== REDIS_HEAD
) {
4295 listAddNodeHead(list
,c
->argv
[2]);
4297 listAddNodeTail(list
,c
->argv
[2]);
4299 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4300 incrRefCount(c
->argv
[1]);
4301 incrRefCount(c
->argv
[2]);
4303 if (lobj
->type
!= REDIS_LIST
) {
4304 addReply(c
,shared
.wrongtypeerr
);
4307 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4308 addReply(c
,shared
.cone
);
4312 if (where
== REDIS_HEAD
) {
4313 listAddNodeHead(list
,c
->argv
[2]);
4315 listAddNodeTail(list
,c
->argv
[2]);
4317 incrRefCount(c
->argv
[2]);
4320 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4323 static void lpushCommand(redisClient
*c
) {
4324 pushGenericCommand(c
,REDIS_HEAD
);
4327 static void rpushCommand(redisClient
*c
) {
4328 pushGenericCommand(c
,REDIS_TAIL
);
4331 static void llenCommand(redisClient
*c
) {
4335 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4336 checkType(c
,o
,REDIS_LIST
)) return;
4339 addReplyUlong(c
,listLength(l
));
4342 static void lindexCommand(redisClient
*c
) {
4344 int index
= atoi(c
->argv
[2]->ptr
);
4348 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4349 checkType(c
,o
,REDIS_LIST
)) return;
4352 ln
= listIndex(list
, index
);
4354 addReply(c
,shared
.nullbulk
);
4356 robj
*ele
= listNodeValue(ln
);
4357 addReplyBulk(c
,ele
);
4361 static void lsetCommand(redisClient
*c
) {
4363 int index
= atoi(c
->argv
[2]->ptr
);
4367 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4368 checkType(c
,o
,REDIS_LIST
)) return;
4371 ln
= listIndex(list
, index
);
4373 addReply(c
,shared
.outofrangeerr
);
4375 robj
*ele
= listNodeValue(ln
);
4378 listNodeValue(ln
) = c
->argv
[3];
4379 incrRefCount(c
->argv
[3]);
4380 addReply(c
,shared
.ok
);
4385 static void popGenericCommand(redisClient
*c
, int where
) {
4390 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4391 checkType(c
,o
,REDIS_LIST
)) return;
4394 if (where
== REDIS_HEAD
)
4395 ln
= listFirst(list
);
4397 ln
= listLast(list
);
4400 addReply(c
,shared
.nullbulk
);
4402 robj
*ele
= listNodeValue(ln
);
4403 addReplyBulk(c
,ele
);
4404 listDelNode(list
,ln
);
4405 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4410 static void lpopCommand(redisClient
*c
) {
4411 popGenericCommand(c
,REDIS_HEAD
);
4414 static void rpopCommand(redisClient
*c
) {
4415 popGenericCommand(c
,REDIS_TAIL
);
4418 static void lrangeCommand(redisClient
*c
) {
4420 int start
= atoi(c
->argv
[2]->ptr
);
4421 int end
= atoi(c
->argv
[3]->ptr
);
4428 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4429 checkType(c
,o
,REDIS_LIST
)) return;
4431 llen
= listLength(list
);
4433 /* convert negative indexes */
4434 if (start
< 0) start
= llen
+start
;
4435 if (end
< 0) end
= llen
+end
;
4436 if (start
< 0) start
= 0;
4437 if (end
< 0) end
= 0;
4439 /* indexes sanity checks */
4440 if (start
> end
|| start
>= llen
) {
4441 /* Out of range start or start > end result in empty list */
4442 addReply(c
,shared
.emptymultibulk
);
4445 if (end
>= llen
) end
= llen
-1;
4446 rangelen
= (end
-start
)+1;
4448 /* Return the result in form of a multi-bulk reply */
4449 ln
= listIndex(list
, start
);
4450 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4451 for (j
= 0; j
< rangelen
; j
++) {
4452 ele
= listNodeValue(ln
);
4453 addReplyBulk(c
,ele
);
4458 static void ltrimCommand(redisClient
*c
) {
4460 int start
= atoi(c
->argv
[2]->ptr
);
4461 int end
= atoi(c
->argv
[3]->ptr
);
4463 int j
, ltrim
, rtrim
;
4467 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4468 checkType(c
,o
,REDIS_LIST
)) return;
4470 llen
= listLength(list
);
4472 /* convert negative indexes */
4473 if (start
< 0) start
= llen
+start
;
4474 if (end
< 0) end
= llen
+end
;
4475 if (start
< 0) start
= 0;
4476 if (end
< 0) end
= 0;
4478 /* indexes sanity checks */
4479 if (start
> end
|| start
>= llen
) {
4480 /* Out of range start or start > end result in empty list */
4484 if (end
>= llen
) end
= llen
-1;
4489 /* Remove list elements to perform the trim */
4490 for (j
= 0; j
< ltrim
; j
++) {
4491 ln
= listFirst(list
);
4492 listDelNode(list
,ln
);
4494 for (j
= 0; j
< rtrim
; j
++) {
4495 ln
= listLast(list
);
4496 listDelNode(list
,ln
);
4498 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4500 addReply(c
,shared
.ok
);
4503 static void lremCommand(redisClient
*c
) {
4506 listNode
*ln
, *next
;
4507 int toremove
= atoi(c
->argv
[2]->ptr
);
4511 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4512 checkType(c
,o
,REDIS_LIST
)) return;
4516 toremove
= -toremove
;
4519 ln
= fromtail
? list
->tail
: list
->head
;
4521 robj
*ele
= listNodeValue(ln
);
4523 next
= fromtail
? ln
->prev
: ln
->next
;
4524 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4525 listDelNode(list
,ln
);
4528 if (toremove
&& removed
== toremove
) break;
4532 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4533 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4536 /* This is the semantic of this command:
4537 * RPOPLPUSH srclist dstlist:
4538 * IF LLEN(srclist) > 0
4539 * element = RPOP srclist
4540 * LPUSH dstlist element
4547 * The idea is to be able to get an element from a list in a reliable way
4548 * since the element is not just returned but pushed against another list
4549 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4551 static void rpoplpushcommand(redisClient
*c
) {
4556 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4557 checkType(c
,sobj
,REDIS_LIST
)) return;
4558 srclist
= sobj
->ptr
;
4559 ln
= listLast(srclist
);
4562 addReply(c
,shared
.nullbulk
);
4564 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4565 robj
*ele
= listNodeValue(ln
);
4568 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4569 addReply(c
,shared
.wrongtypeerr
);
4573 /* Add the element to the target list (unless it's directly
4574 * passed to some BLPOP-ing client */
4575 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4577 /* Create the list if the key does not exist */
4578 dobj
= createListObject();
4579 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4580 incrRefCount(c
->argv
[2]);
4582 dstlist
= dobj
->ptr
;
4583 listAddNodeHead(dstlist
,ele
);
4587 /* Send the element to the client as reply as well */
4588 addReplyBulk(c
,ele
);
4590 /* Finally remove the element from the source list */
4591 listDelNode(srclist
,ln
);
4592 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4597 /* ==================================== Sets ================================ */
4599 static void saddCommand(redisClient
*c
) {
4602 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4604 set
= createSetObject();
4605 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4606 incrRefCount(c
->argv
[1]);
4608 if (set
->type
!= REDIS_SET
) {
4609 addReply(c
,shared
.wrongtypeerr
);
4613 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4614 incrRefCount(c
->argv
[2]);
4616 addReply(c
,shared
.cone
);
4618 addReply(c
,shared
.czero
);
4622 static void sremCommand(redisClient
*c
) {
4625 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4626 checkType(c
,set
,REDIS_SET
)) return;
4628 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4630 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4631 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4632 addReply(c
,shared
.cone
);
4634 addReply(c
,shared
.czero
);
4638 static void smoveCommand(redisClient
*c
) {
4639 robj
*srcset
, *dstset
;
4641 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4642 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4644 /* If the source key does not exist return 0, if it's of the wrong type
4646 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4647 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4650 /* Error if the destination key is not a set as well */
4651 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4652 addReply(c
,shared
.wrongtypeerr
);
4655 /* Remove the element from the source set */
4656 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4657 /* Key not found in the src set! return zero */
4658 addReply(c
,shared
.czero
);
4661 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4662 deleteKey(c
->db
,c
->argv
[1]);
4664 /* Add the element to the destination set */
4666 dstset
= createSetObject();
4667 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4668 incrRefCount(c
->argv
[2]);
4670 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4671 incrRefCount(c
->argv
[3]);
4672 addReply(c
,shared
.cone
);
4675 static void sismemberCommand(redisClient
*c
) {
4678 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4679 checkType(c
,set
,REDIS_SET
)) return;
4681 if (dictFind(set
->ptr
,c
->argv
[2]))
4682 addReply(c
,shared
.cone
);
4684 addReply(c
,shared
.czero
);
4687 static void scardCommand(redisClient
*c
) {
4691 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4692 checkType(c
,o
,REDIS_SET
)) return;
4695 addReplyUlong(c
,dictSize(s
));
4698 static void spopCommand(redisClient
*c
) {
4702 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4703 checkType(c
,set
,REDIS_SET
)) return;
4705 de
= dictGetRandomKey(set
->ptr
);
4707 addReply(c
,shared
.nullbulk
);
4709 robj
*ele
= dictGetEntryKey(de
);
4711 addReplyBulk(c
,ele
);
4712 dictDelete(set
->ptr
,ele
);
4713 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4714 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4719 static void srandmemberCommand(redisClient
*c
) {
4723 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4724 checkType(c
,set
,REDIS_SET
)) return;
4726 de
= dictGetRandomKey(set
->ptr
);
4728 addReply(c
,shared
.nullbulk
);
4730 robj
*ele
= dictGetEntryKey(de
);
4732 addReplyBulk(c
,ele
);
4736 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4737 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4739 return dictSize(*d1
)-dictSize(*d2
);
4742 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4743 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4746 robj
*lenobj
= NULL
, *dstset
= NULL
;
4747 unsigned long j
, cardinality
= 0;
4749 for (j
= 0; j
< setsnum
; j
++) {
4753 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4754 lookupKeyRead(c
->db
,setskeys
[j
]);
4758 if (deleteKey(c
->db
,dstkey
))
4760 addReply(c
,shared
.czero
);
4762 addReply(c
,shared
.nullmultibulk
);
4766 if (setobj
->type
!= REDIS_SET
) {
4768 addReply(c
,shared
.wrongtypeerr
);
4771 dv
[j
] = setobj
->ptr
;
4773 /* Sort sets from the smallest to largest, this will improve our
4774 * algorithm's performace */
4775 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4777 /* The first thing we should output is the total number of elements...
4778 * since this is a multi-bulk write, but at this stage we don't know
4779 * the intersection set size, so we use a trick, append an empty object
4780 * to the output list and save the pointer to later modify it with the
4783 lenobj
= createObject(REDIS_STRING
,NULL
);
4785 decrRefCount(lenobj
);
4787 /* If we have a target key where to store the resulting set
4788 * create this key with an empty set inside */
4789 dstset
= createSetObject();
4792 /* Iterate all the elements of the first (smallest) set, and test
4793 * the element against all the other sets, if at least one set does
4794 * not include the element it is discarded */
4795 di
= dictGetIterator(dv
[0]);
4797 while((de
= dictNext(di
)) != NULL
) {
4800 for (j
= 1; j
< setsnum
; j
++)
4801 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4803 continue; /* at least one set does not contain the member */
4804 ele
= dictGetEntryKey(de
);
4806 addReplyBulk(c
,ele
);
4809 dictAdd(dstset
->ptr
,ele
,NULL
);
4813 dictReleaseIterator(di
);
4816 /* Store the resulting set into the target, if the intersection
4817 * is not an empty set. */
4818 deleteKey(c
->db
,dstkey
);
4819 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4820 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4821 incrRefCount(dstkey
);
4822 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4824 decrRefCount(dstset
);
4825 addReply(c
,shared
.czero
);
4829 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4834 static void sinterCommand(redisClient
*c
) {
4835 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4838 static void sinterstoreCommand(redisClient
*c
) {
4839 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4842 #define REDIS_OP_UNION 0
4843 #define REDIS_OP_DIFF 1
4844 #define REDIS_OP_INTER 2
4846 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4847 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4850 robj
*dstset
= NULL
;
4851 int j
, cardinality
= 0;
4853 for (j
= 0; j
< setsnum
; j
++) {
4857 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4858 lookupKeyRead(c
->db
,setskeys
[j
]);
4863 if (setobj
->type
!= REDIS_SET
) {
4865 addReply(c
,shared
.wrongtypeerr
);
4868 dv
[j
] = setobj
->ptr
;
4871 /* We need a temp set object to store our union. If the dstkey
4872 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4873 * this set object will be the resulting object to set into the target key*/
4874 dstset
= createSetObject();
4876 /* Iterate all the elements of all the sets, add every element a single
4877 * time to the result set */
4878 for (j
= 0; j
< setsnum
; j
++) {
4879 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4880 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4882 di
= dictGetIterator(dv
[j
]);
4884 while((de
= dictNext(di
)) != NULL
) {
4887 /* dictAdd will not add the same element multiple times */
4888 ele
= dictGetEntryKey(de
);
4889 if (op
== REDIS_OP_UNION
|| j
== 0) {
4890 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4894 } else if (op
== REDIS_OP_DIFF
) {
4895 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4900 dictReleaseIterator(di
);
4902 /* result set is empty? Exit asap. */
4903 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4906 /* Output the content of the resulting set, if not in STORE mode */
4908 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4909 di
= dictGetIterator(dstset
->ptr
);
4910 while((de
= dictNext(di
)) != NULL
) {
4913 ele
= dictGetEntryKey(de
);
4914 addReplyBulk(c
,ele
);
4916 dictReleaseIterator(di
);
4917 decrRefCount(dstset
);
4919 /* If we have a target key where to store the resulting set
4920 * create this key with the result set inside */
4921 deleteKey(c
->db
,dstkey
);
4922 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4923 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4924 incrRefCount(dstkey
);
4925 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4927 decrRefCount(dstset
);
4928 addReply(c
,shared
.czero
);
4935 static void sunionCommand(redisClient
*c
) {
4936 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4939 static void sunionstoreCommand(redisClient
*c
) {
4940 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4943 static void sdiffCommand(redisClient
*c
) {
4944 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4947 static void sdiffstoreCommand(redisClient
*c
) {
4948 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4951 /* ==================================== ZSets =============================== */
4953 /* ZSETs are ordered sets using two data structures to hold the same elements
4954 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4957 * The elements are added to an hash table mapping Redis objects to scores.
4958 * At the same time the elements are added to a skip list mapping scores
4959 * to Redis objects (so objects are sorted by scores in this "view"). */
4961 /* This skiplist implementation is almost a C translation of the original
4962 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4963 * Alternative to Balanced Trees", modified in three ways:
4964 * a) this implementation allows for repeated values.
4965 * b) the comparison is not just by key (our 'score') but by satellite data.
4966 * c) there is a back pointer, so it's a doubly linked list with the back
4967 * pointers being only at "level 1". This allows to traverse the list
4968 * from tail to head, useful for ZREVRANGE. */
4970 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4971 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4973 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4975 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
4981 static zskiplist
*zslCreate(void) {
4985 zsl
= zmalloc(sizeof(*zsl
));
4988 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
4989 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
4990 zsl
->header
->forward
[j
] = NULL
;
4992 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
4993 if (j
< ZSKIPLIST_MAXLEVEL
-1)
4994 zsl
->header
->span
[j
] = 0;
4996 zsl
->header
->backward
= NULL
;
5001 static void zslFreeNode(zskiplistNode
*node
) {
5002 decrRefCount(node
->obj
);
5003 zfree(node
->forward
);
5008 static void zslFree(zskiplist
*zsl
) {
5009 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5011 zfree(zsl
->header
->forward
);
5012 zfree(zsl
->header
->span
);
5015 next
= node
->forward
[0];
5022 static int zslRandomLevel(void) {
5024 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5029 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5030 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5031 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5035 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5036 /* store rank that is crossed to reach the insert position */
5037 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5039 while (x
->forward
[i
] &&
5040 (x
->forward
[i
]->score
< score
||
5041 (x
->forward
[i
]->score
== score
&&
5042 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5043 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5048 /* we assume the key is not already inside, since we allow duplicated
5049 * scores, and the re-insertion of score and redis object should never
5050 * happpen since the caller of zslInsert() should test in the hash table
5051 * if the element is already inside or not. */
5052 level
= zslRandomLevel();
5053 if (level
> zsl
->level
) {
5054 for (i
= zsl
->level
; i
< level
; i
++) {
5056 update
[i
] = zsl
->header
;
5057 update
[i
]->span
[i
-1] = zsl
->length
;
5061 x
= zslCreateNode(level
,score
,obj
);
5062 for (i
= 0; i
< level
; i
++) {
5063 x
->forward
[i
] = update
[i
]->forward
[i
];
5064 update
[i
]->forward
[i
] = x
;
5066 /* update span covered by update[i] as x is inserted here */
5068 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5069 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5073 /* increment span for untouched levels */
5074 for (i
= level
; i
< zsl
->level
; i
++) {
5075 update
[i
]->span
[i
-1]++;
5078 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5080 x
->forward
[0]->backward
= x
;
5086 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5087 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5089 for (i
= 0; i
< zsl
->level
; i
++) {
5090 if (update
[i
]->forward
[i
] == x
) {
5092 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5094 update
[i
]->forward
[i
] = x
->forward
[i
];
5096 /* invariant: i > 0, because update[0]->forward[0]
5097 * is always equal to x */
5098 update
[i
]->span
[i
-1] -= 1;
5101 if (x
->forward
[0]) {
5102 x
->forward
[0]->backward
= x
->backward
;
5104 zsl
->tail
= x
->backward
;
5106 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5111 /* Delete an element with matching score/object from the skiplist. */
5112 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5113 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5117 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5118 while (x
->forward
[i
] &&
5119 (x
->forward
[i
]->score
< score
||
5120 (x
->forward
[i
]->score
== score
&&
5121 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5125 /* We may have multiple elements with the same score, what we need
5126 * is to find the element with both the right score and object. */
5128 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5129 zslDeleteNode(zsl
, x
, update
);
5133 return 0; /* not found */
5135 return 0; /* not found */
5138 /* Delete all the elements with score between min and max from the skiplist.
5139 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5140 * Note that this function takes the reference to the hash table view of the
5141 * sorted set, in order to remove the elements from the hash table too. */
5142 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5143 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5144 unsigned long removed
= 0;
5148 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5149 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5153 /* We may have multiple elements with the same score, what we need
5154 * is to find the element with both the right score and object. */
5156 while (x
&& x
->score
<= max
) {
5157 zskiplistNode
*next
= x
->forward
[0];
5158 zslDeleteNode(zsl
, x
, update
);
5159 dictDelete(dict
,x
->obj
);
5164 return removed
; /* not found */
5167 /* Delete all the elements with rank between start and end from the skiplist.
5168 * Start and end are inclusive. Note that start and end need to be 1-based */
5169 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5170 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5171 unsigned long traversed
= 0, removed
= 0;
5175 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5176 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5177 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5185 while (x
&& traversed
<= end
) {
5186 zskiplistNode
*next
= x
->forward
[0];
5187 zslDeleteNode(zsl
, x
, update
);
5188 dictDelete(dict
,x
->obj
);
5197 /* Find the first node having a score equal or greater than the specified one.
5198 * Returns NULL if there is no match. */
5199 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5204 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5205 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5208 /* We may have multiple elements with the same score, what we need
5209 * is to find the element with both the right score and object. */
5210 return x
->forward
[0];
5213 /* Find the rank for an element by both score and key.
5214 * Returns 0 when the element cannot be found, rank otherwise.
5215 * Note that the rank is 1-based due to the span of zsl->header to the
5217 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5219 unsigned long rank
= 0;
5223 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5224 while (x
->forward
[i
] &&
5225 (x
->forward
[i
]->score
< score
||
5226 (x
->forward
[i
]->score
== score
&&
5227 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5228 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5232 /* x might be equal to zsl->header, so test if obj is non-NULL */
5233 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5240 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5241 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5243 unsigned long traversed
= 0;
5247 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5248 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5250 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5253 if (traversed
== rank
) {
5260 /* The actual Z-commands implementations */
5262 /* This generic command implements both ZADD and ZINCRBY.
5263 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5264 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5265 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5270 zsetobj
= lookupKeyWrite(c
->db
,key
);
5271 if (zsetobj
== NULL
) {
5272 zsetobj
= createZsetObject();
5273 dictAdd(c
->db
->dict
,key
,zsetobj
);
5276 if (zsetobj
->type
!= REDIS_ZSET
) {
5277 addReply(c
,shared
.wrongtypeerr
);
5283 /* Ok now since we implement both ZADD and ZINCRBY here the code
5284 * needs to handle the two different conditions. It's all about setting
5285 * '*score', that is, the new score to set, to the right value. */
5286 score
= zmalloc(sizeof(double));
5290 /* Read the old score. If the element was not present starts from 0 */
5291 de
= dictFind(zs
->dict
,ele
);
5293 double *oldscore
= dictGetEntryVal(de
);
5294 *score
= *oldscore
+ scoreval
;
5302 /* What follows is a simple remove and re-insert operation that is common
5303 * to both ZADD and ZINCRBY... */
5304 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5305 /* case 1: New element */
5306 incrRefCount(ele
); /* added to hash */
5307 zslInsert(zs
->zsl
,*score
,ele
);
5308 incrRefCount(ele
); /* added to skiplist */
5311 addReplyDouble(c
,*score
);
5313 addReply(c
,shared
.cone
);
5318 /* case 2: Score update operation */
5319 de
= dictFind(zs
->dict
,ele
);
5320 redisAssert(de
!= NULL
);
5321 oldscore
= dictGetEntryVal(de
);
5322 if (*score
!= *oldscore
) {
5325 /* Remove and insert the element in the skip list with new score */
5326 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5327 redisAssert(deleted
!= 0);
5328 zslInsert(zs
->zsl
,*score
,ele
);
5330 /* Update the score in the hash table */
5331 dictReplace(zs
->dict
,ele
,score
);
5337 addReplyDouble(c
,*score
);
5339 addReply(c
,shared
.czero
);
5343 static void zaddCommand(redisClient
*c
) {
5346 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5347 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5350 static void zincrbyCommand(redisClient
*c
) {
5353 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5354 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5357 static void zremCommand(redisClient
*c
) {
5364 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5365 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5368 de
= dictFind(zs
->dict
,c
->argv
[2]);
5370 addReply(c
,shared
.czero
);
5373 /* Delete from the skiplist */
5374 oldscore
= dictGetEntryVal(de
);
5375 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5376 redisAssert(deleted
!= 0);
5378 /* Delete from the hash table */
5379 dictDelete(zs
->dict
,c
->argv
[2]);
5380 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5381 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5383 addReply(c
,shared
.cone
);
5386 static void zremrangebyscoreCommand(redisClient
*c
) {
5387 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5388 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5393 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5394 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5397 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5398 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5399 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5400 server
.dirty
+= deleted
;
5401 addReplyLong(c
,deleted
);
5404 static void zremrangebyrankCommand(redisClient
*c
) {
5405 int start
= atoi(c
->argv
[2]->ptr
);
5406 int end
= atoi(c
->argv
[3]->ptr
);
5412 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5413 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5415 llen
= zs
->zsl
->length
;
5417 /* convert negative indexes */
5418 if (start
< 0) start
= llen
+start
;
5419 if (end
< 0) end
= llen
+end
;
5420 if (start
< 0) start
= 0;
5421 if (end
< 0) end
= 0;
5423 /* indexes sanity checks */
5424 if (start
> end
|| start
>= llen
) {
5425 addReply(c
,shared
.czero
);
5428 if (end
>= llen
) end
= llen
-1;
5430 /* increment start and end because zsl*Rank functions
5431 * use 1-based rank */
5432 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5433 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5434 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5435 server
.dirty
+= deleted
;
5436 addReplyLong(c
, deleted
);
5444 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5445 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5446 unsigned long size1
, size2
;
5447 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5448 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5449 return size1
- size2
;
5452 #define REDIS_AGGR_SUM 1
5453 #define REDIS_AGGR_MIN 2
5454 #define REDIS_AGGR_MAX 3
5456 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5457 if (aggregate
== REDIS_AGGR_SUM
) {
5458 *target
= *target
+ val
;
5459 } else if (aggregate
== REDIS_AGGR_MIN
) {
5460 *target
= val
< *target
? val
: *target
;
5461 } else if (aggregate
== REDIS_AGGR_MAX
) {
5462 *target
= val
> *target
? val
: *target
;
5465 redisAssert(0 != 0);
5469 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5471 int aggregate
= REDIS_AGGR_SUM
;
5478 /* expect zsetnum input keys to be given */
5479 zsetnum
= atoi(c
->argv
[2]->ptr
);
5481 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5485 /* test if the expected number of keys would overflow */
5486 if (3+zsetnum
> c
->argc
) {
5487 addReply(c
,shared
.syntaxerr
);
5491 /* read keys to be used for input */
5492 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5493 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5494 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5498 if (zsetobj
->type
!= REDIS_ZSET
) {
5500 addReply(c
,shared
.wrongtypeerr
);
5503 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5506 /* default all weights to 1 */
5507 src
[i
].weight
= 1.0;
5510 /* parse optional extra arguments */
5512 int remaining
= c
->argc
- j
;
5515 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5517 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5518 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5520 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5522 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5523 aggregate
= REDIS_AGGR_SUM
;
5524 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5525 aggregate
= REDIS_AGGR_MIN
;
5526 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5527 aggregate
= REDIS_AGGR_MAX
;
5530 addReply(c
,shared
.syntaxerr
);
5536 addReply(c
,shared
.syntaxerr
);
5542 /* sort sets from the smallest to largest, this will improve our
5543 * algorithm's performance */
5544 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5546 dstobj
= createZsetObject();
5547 dstzset
= dstobj
->ptr
;
5549 if (op
== REDIS_OP_INTER
) {
5550 /* skip going over all entries if the smallest zset is NULL or empty */
5551 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5552 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5553 * from small to large, all src[i > 0].dict are non-empty too */
5554 di
= dictGetIterator(src
[0].dict
);
5555 while((de
= dictNext(di
)) != NULL
) {
5556 double *score
= zmalloc(sizeof(double)), value
;
5557 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5559 for (j
= 1; j
< zsetnum
; j
++) {
5560 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5562 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5563 zunionInterAggregate(score
, value
, aggregate
);
5569 /* skip entry when not present in every source dict */
5573 robj
*o
= dictGetEntryKey(de
);
5574 dictAdd(dstzset
->dict
,o
,score
);
5575 incrRefCount(o
); /* added to dictionary */
5576 zslInsert(dstzset
->zsl
,*score
,o
);
5577 incrRefCount(o
); /* added to skiplist */
5580 dictReleaseIterator(di
);
5582 } else if (op
== REDIS_OP_UNION
) {
5583 for (i
= 0; i
< zsetnum
; i
++) {
5584 if (!src
[i
].dict
) continue;
5586 di
= dictGetIterator(src
[i
].dict
);
5587 while((de
= dictNext(di
)) != NULL
) {
5588 /* skip key when already processed */
5589 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5591 double *score
= zmalloc(sizeof(double)), value
;
5592 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5594 /* because the zsets are sorted by size, its only possible
5595 * for sets at larger indices to hold this entry */
5596 for (j
= (i
+1); j
< zsetnum
; j
++) {
5597 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5599 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5600 zunionInterAggregate(score
, value
, aggregate
);
5604 robj
*o
= dictGetEntryKey(de
);
5605 dictAdd(dstzset
->dict
,o
,score
);
5606 incrRefCount(o
); /* added to dictionary */
5607 zslInsert(dstzset
->zsl
,*score
,o
);
5608 incrRefCount(o
); /* added to skiplist */
5610 dictReleaseIterator(di
);
5613 /* unknown operator */
5614 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5617 deleteKey(c
->db
,dstkey
);
5618 if (dstzset
->zsl
->length
) {
5619 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5620 incrRefCount(dstkey
);
5621 addReplyLong(c
, dstzset
->zsl
->length
);
5624 decrRefCount(dstzset
);
5625 addReply(c
, shared
.czero
);
5630 static void zunionCommand(redisClient
*c
) {
5631 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5634 static void zinterCommand(redisClient
*c
) {
5635 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5638 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5640 int start
= atoi(c
->argv
[2]->ptr
);
5641 int end
= atoi(c
->argv
[3]->ptr
);
5650 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5652 } else if (c
->argc
>= 5) {
5653 addReply(c
,shared
.syntaxerr
);
5657 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5658 checkType(c
,o
,REDIS_ZSET
)) return;
5663 /* convert negative indexes */
5664 if (start
< 0) start
= llen
+start
;
5665 if (end
< 0) end
= llen
+end
;
5666 if (start
< 0) start
= 0;
5667 if (end
< 0) end
= 0;
5669 /* indexes sanity checks */
5670 if (start
> end
|| start
>= llen
) {
5671 /* Out of range start or start > end result in empty list */
5672 addReply(c
,shared
.emptymultibulk
);
5675 if (end
>= llen
) end
= llen
-1;
5676 rangelen
= (end
-start
)+1;
5678 /* check if starting point is trivial, before searching
5679 * the element in log(N) time */
5681 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5684 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5687 /* Return the result in form of a multi-bulk reply */
5688 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5689 withscores
? (rangelen
*2) : rangelen
));
5690 for (j
= 0; j
< rangelen
; j
++) {
5692 addReplyBulk(c
,ele
);
5694 addReplyDouble(c
,ln
->score
);
5695 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5699 static void zrangeCommand(redisClient
*c
) {
5700 zrangeGenericCommand(c
,0);
5703 static void zrevrangeCommand(redisClient
*c
) {
5704 zrangeGenericCommand(c
,1);
5707 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5708 * If justcount is non-zero, just the count is returned. */
5709 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5712 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5713 int offset
= 0, limit
= -1;
5717 /* Parse the min-max interval. If one of the values is prefixed
5718 * by the "(" character, it's considered "open". For instance
5719 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5720 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5721 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5722 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5725 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5727 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5728 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5731 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5734 /* Parse "WITHSCORES": note that if the command was called with
5735 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5736 * enter the following paths to parse WITHSCORES and LIMIT. */
5737 if (c
->argc
== 5 || c
->argc
== 8) {
5738 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5743 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5747 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5752 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5753 addReply(c
,shared
.syntaxerr
);
5755 } else if (c
->argc
== (7 + withscores
)) {
5756 offset
= atoi(c
->argv
[5]->ptr
);
5757 limit
= atoi(c
->argv
[6]->ptr
);
5758 if (offset
< 0) offset
= 0;
5761 /* Ok, lookup the key and get the range */
5762 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5764 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5766 if (o
->type
!= REDIS_ZSET
) {
5767 addReply(c
,shared
.wrongtypeerr
);
5769 zset
*zsetobj
= o
->ptr
;
5770 zskiplist
*zsl
= zsetobj
->zsl
;
5772 robj
*ele
, *lenobj
= NULL
;
5773 unsigned long rangelen
= 0;
5775 /* Get the first node with the score >= min, or with
5776 * score > min if 'minex' is true. */
5777 ln
= zslFirstWithScore(zsl
,min
);
5778 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5781 /* No element matching the speciifed interval */
5782 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5786 /* We don't know in advance how many matching elements there
5787 * are in the list, so we push this object that will represent
5788 * the multi-bulk length in the output buffer, and will "fix"
5791 lenobj
= createObject(REDIS_STRING
,NULL
);
5793 decrRefCount(lenobj
);
5796 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5799 ln
= ln
->forward
[0];
5802 if (limit
== 0) break;
5805 addReplyBulk(c
,ele
);
5807 addReplyDouble(c
,ln
->score
);
5809 ln
= ln
->forward
[0];
5811 if (limit
> 0) limit
--;
5814 addReplyLong(c
,(long)rangelen
);
5816 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5817 withscores
? (rangelen
*2) : rangelen
);
5823 static void zrangebyscoreCommand(redisClient
*c
) {
5824 genericZrangebyscoreCommand(c
,0);
5827 static void zcountCommand(redisClient
*c
) {
5828 genericZrangebyscoreCommand(c
,1);
5831 static void zcardCommand(redisClient
*c
) {
5835 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5836 checkType(c
,o
,REDIS_ZSET
)) return;
5839 addReplyUlong(c
,zs
->zsl
->length
);
5842 static void zscoreCommand(redisClient
*c
) {
5847 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5848 checkType(c
,o
,REDIS_ZSET
)) return;
5851 de
= dictFind(zs
->dict
,c
->argv
[2]);
5853 addReply(c
,shared
.nullbulk
);
5855 double *score
= dictGetEntryVal(de
);
5857 addReplyDouble(c
,*score
);
5861 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5869 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5870 checkType(c
,o
,REDIS_ZSET
)) return;
5874 de
= dictFind(zs
->dict
,c
->argv
[2]);
5876 addReply(c
,shared
.nullbulk
);
5880 score
= dictGetEntryVal(de
);
5881 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5884 addReplyLong(c
, zsl
->length
- rank
);
5886 addReplyLong(c
, rank
-1);
5889 addReply(c
,shared
.nullbulk
);
5893 static void zrankCommand(redisClient
*c
) {
5894 zrankGenericCommand(c
, 0);
5897 static void zrevrankCommand(redisClient
*c
) {
5898 zrankGenericCommand(c
, 1);
5901 /* =================================== Hashes =============================== */
5902 static void hsetCommand(redisClient
*c
) {
5904 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5907 o
= createHashObject();
5908 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5909 incrRefCount(c
->argv
[1]);
5911 if (o
->type
!= REDIS_HASH
) {
5912 addReply(c
,shared
.wrongtypeerr
);
5916 /* We want to convert the zipmap into an hash table right now if the
5917 * entry to be added is too big. Note that we check if the object
5918 * is integer encoded before to try fetching the length in the test below.
5919 * This is because integers are small, but currently stringObjectLen()
5920 * performs a slow conversion: not worth it. */
5921 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5922 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5923 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5924 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5925 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5927 convertToRealHash(o
);
5930 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5931 unsigned char *zm
= o
->ptr
;
5932 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5934 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5935 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5936 decrRefCount(valobj
);
5939 /* And here there is the second check for hash conversion...
5940 * we want to do it only if the operation was not just an update as
5941 * zipmapLen() is O(N). */
5942 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5943 convertToRealHash(o
);
5945 tryObjectEncoding(c
->argv
[2]);
5946 /* note that c->argv[3] is already encoded, as the latest arg
5947 * of a bulk command is always integer encoded if possible. */
5948 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5949 incrRefCount(c
->argv
[2]);
5953 incrRefCount(c
->argv
[3]);
5956 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5959 static void hgetCommand(redisClient
*c
) {
5962 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5963 checkType(c
,o
,REDIS_HASH
)) return;
5965 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5966 unsigned char *zm
= o
->ptr
;
5971 field
= getDecodedObject(c
->argv
[2]);
5972 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
5973 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
5974 addReplySds(c
,sdsnewlen(val
,vlen
));
5975 addReply(c
,shared
.crlf
);
5976 decrRefCount(field
);
5979 addReply(c
,shared
.nullbulk
);
5980 decrRefCount(field
);
5984 struct dictEntry
*de
;
5986 de
= dictFind(o
->ptr
,c
->argv
[2]);
5988 addReply(c
,shared
.nullbulk
);
5990 robj
*e
= dictGetEntryVal(de
);
5997 static void hdelCommand(redisClient
*c
) {
6001 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6002 checkType(c
,o
,REDIS_HASH
)) return;
6004 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6005 robj
*field
= getDecodedObject(c
->argv
[2]);
6007 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6008 (unsigned char*) field
->ptr
,
6009 sdslen(field
->ptr
), &deleted
);
6010 decrRefCount(field
);
6011 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6012 deleteKey(c
->db
,c
->argv
[1]);
6014 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6015 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6016 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6018 if (deleted
) server
.dirty
++;
6019 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6022 static void hlenCommand(redisClient
*c
) {
6026 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6027 checkType(c
,o
,REDIS_HASH
)) return;
6029 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6030 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6031 addReplyUlong(c
,len
);
6034 #define REDIS_GETALL_KEYS 1
6035 #define REDIS_GETALL_VALS 2
6036 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6038 unsigned long count
= 0;
6040 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6041 || checkType(c
,o
,REDIS_HASH
)) return;
6043 lenobj
= createObject(REDIS_STRING
,NULL
);
6045 decrRefCount(lenobj
);
6047 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6048 unsigned char *p
= zipmapRewind(o
->ptr
);
6049 unsigned char *field
, *val
;
6050 unsigned int flen
, vlen
;
6052 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6055 if (flags
& REDIS_GETALL_KEYS
) {
6056 aux
= createStringObject((char*)field
,flen
);
6057 addReplyBulk(c
,aux
);
6061 if (flags
& REDIS_GETALL_VALS
) {
6062 aux
= createStringObject((char*)val
,vlen
);
6063 addReplyBulk(c
,aux
);
6069 dictIterator
*di
= dictGetIterator(o
->ptr
);
6072 while((de
= dictNext(di
)) != NULL
) {
6073 robj
*fieldobj
= dictGetEntryKey(de
);
6074 robj
*valobj
= dictGetEntryVal(de
);
6076 if (flags
& REDIS_GETALL_KEYS
) {
6077 addReplyBulk(c
,fieldobj
);
6080 if (flags
& REDIS_GETALL_VALS
) {
6081 addReplyBulk(c
,valobj
);
6085 dictReleaseIterator(di
);
6087 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6090 static void hkeysCommand(redisClient
*c
) {
6091 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6094 static void hvalsCommand(redisClient
*c
) {
6095 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6098 static void hgetallCommand(redisClient
*c
) {
6099 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6102 static void hexistsCommand(redisClient
*c
) {
6106 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6107 checkType(c
,o
,REDIS_HASH
)) return;
6109 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6111 unsigned char *zm
= o
->ptr
;
6113 field
= getDecodedObject(c
->argv
[2]);
6114 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6115 decrRefCount(field
);
6117 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6119 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6122 static void convertToRealHash(robj
*o
) {
6123 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6124 unsigned int klen
, vlen
;
6125 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6127 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6128 p
= zipmapRewind(zm
);
6129 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6130 robj
*keyobj
, *valobj
;
6132 keyobj
= createStringObject((char*)key
,klen
);
6133 valobj
= createStringObject((char*)val
,vlen
);
6134 tryObjectEncoding(keyobj
);
6135 tryObjectEncoding(valobj
);
6136 dictAdd(dict
,keyobj
,valobj
);
6138 o
->encoding
= REDIS_ENCODING_HT
;
6143 /* ========================= Non type-specific commands ==================== */
6145 static void flushdbCommand(redisClient
*c
) {
6146 server
.dirty
+= dictSize(c
->db
->dict
);
6147 dictEmpty(c
->db
->dict
);
6148 dictEmpty(c
->db
->expires
);
6149 addReply(c
,shared
.ok
);
6152 static void flushallCommand(redisClient
*c
) {
6153 server
.dirty
+= emptyDb();
6154 addReply(c
,shared
.ok
);
6155 if (server
.bgsavechildpid
!= -1) {
6156 kill(server
.bgsavechildpid
,SIGKILL
);
6157 rdbRemoveTempFile(server
.bgsavechildpid
);
6159 rdbSave(server
.dbfilename
);
6163 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6164 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6166 so
->pattern
= pattern
;
6170 /* Return the value associated to the key with a name obtained
6171 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6172 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6176 int prefixlen
, sublen
, postfixlen
;
6177 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6181 char buf
[REDIS_SORTKEY_MAX
+1];
6184 /* If the pattern is "#" return the substitution object itself in order
6185 * to implement the "SORT ... GET #" feature. */
6186 spat
= pattern
->ptr
;
6187 if (spat
[0] == '#' && spat
[1] == '\0') {
6191 /* The substitution object may be specially encoded. If so we create
6192 * a decoded object on the fly. Otherwise getDecodedObject will just
6193 * increment the ref count, that we'll decrement later. */
6194 subst
= getDecodedObject(subst
);
6197 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6198 p
= strchr(spat
,'*');
6200 decrRefCount(subst
);
6205 sublen
= sdslen(ssub
);
6206 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6207 memcpy(keyname
.buf
,spat
,prefixlen
);
6208 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6209 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6210 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6211 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6213 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6214 decrRefCount(subst
);
6216 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6217 return lookupKeyRead(db
,&keyobj
);
6220 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6221 * the additional parameter is not standard but a BSD-specific we have to
6222 * pass sorting parameters via the global 'server' structure */
6223 static int sortCompare(const void *s1
, const void *s2
) {
6224 const redisSortObject
*so1
= s1
, *so2
= s2
;
6227 if (!server
.sort_alpha
) {
6228 /* Numeric sorting. Here it's trivial as we precomputed scores */
6229 if (so1
->u
.score
> so2
->u
.score
) {
6231 } else if (so1
->u
.score
< so2
->u
.score
) {
6237 /* Alphanumeric sorting */
6238 if (server
.sort_bypattern
) {
6239 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6240 /* At least one compare object is NULL */
6241 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6243 else if (so1
->u
.cmpobj
== NULL
)
6248 /* We have both the objects, use strcoll */
6249 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6252 /* Compare elements directly */
6255 dec1
= getDecodedObject(so1
->obj
);
6256 dec2
= getDecodedObject(so2
->obj
);
6257 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6262 return server
.sort_desc
? -cmp
: cmp
;
6265 /* The SORT command is the most complex command in Redis. Warning: this code
6266 * is optimized for speed and a bit less for readability */
6267 static void sortCommand(redisClient
*c
) {
6270 int desc
= 0, alpha
= 0;
6271 int limit_start
= 0, limit_count
= -1, start
, end
;
6272 int j
, dontsort
= 0, vectorlen
;
6273 int getop
= 0; /* GET operation counter */
6274 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6275 redisSortObject
*vector
; /* Resulting vector to sort */
6277 /* Lookup the key to sort. It must be of the right types */
6278 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6279 if (sortval
== NULL
) {
6280 addReply(c
,shared
.nullmultibulk
);
6283 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6284 sortval
->type
!= REDIS_ZSET
)
6286 addReply(c
,shared
.wrongtypeerr
);
6290 /* Create a list of operations to perform for every sorted element.
6291 * Operations can be GET/DEL/INCR/DECR */
6292 operations
= listCreate();
6293 listSetFreeMethod(operations
,zfree
);
6296 /* Now we need to protect sortval incrementing its count, in the future
6297 * SORT may have options able to overwrite/delete keys during the sorting
6298 * and the sorted key itself may get destroied */
6299 incrRefCount(sortval
);
6301 /* The SORT command has an SQL-alike syntax, parse it */
6302 while(j
< c
->argc
) {
6303 int leftargs
= c
->argc
-j
-1;
6304 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6306 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6308 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6310 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6311 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6312 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6314 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6315 storekey
= c
->argv
[j
+1];
6317 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6318 sortby
= c
->argv
[j
+1];
6319 /* If the BY pattern does not contain '*', i.e. it is constant,
6320 * we don't need to sort nor to lookup the weight keys. */
6321 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6323 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6324 listAddNodeTail(operations
,createSortOperation(
6325 REDIS_SORT_GET
,c
->argv
[j
+1]));
6329 decrRefCount(sortval
);
6330 listRelease(operations
);
6331 addReply(c
,shared
.syntaxerr
);
6337 /* Load the sorting vector with all the objects to sort */
6338 switch(sortval
->type
) {
6339 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6340 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6341 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6342 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6344 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6347 if (sortval
->type
== REDIS_LIST
) {
6348 list
*list
= sortval
->ptr
;
6352 listRewind(list
,&li
);
6353 while((ln
= listNext(&li
))) {
6354 robj
*ele
= ln
->value
;
6355 vector
[j
].obj
= ele
;
6356 vector
[j
].u
.score
= 0;
6357 vector
[j
].u
.cmpobj
= NULL
;
6365 if (sortval
->type
== REDIS_SET
) {
6368 zset
*zs
= sortval
->ptr
;
6372 di
= dictGetIterator(set
);
6373 while((setele
= dictNext(di
)) != NULL
) {
6374 vector
[j
].obj
= dictGetEntryKey(setele
);
6375 vector
[j
].u
.score
= 0;
6376 vector
[j
].u
.cmpobj
= NULL
;
6379 dictReleaseIterator(di
);
6381 redisAssert(j
== vectorlen
);
6383 /* Now it's time to load the right scores in the sorting vector */
6384 if (dontsort
== 0) {
6385 for (j
= 0; j
< vectorlen
; j
++) {
6389 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6390 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6392 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6394 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6395 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6397 /* Don't need to decode the object if it's
6398 * integer-encoded (the only encoding supported) so
6399 * far. We can just cast it */
6400 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6401 vector
[j
].u
.score
= (long)byval
->ptr
;
6403 redisAssert(1 != 1);
6408 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6409 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6411 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6412 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6414 redisAssert(1 != 1);
6421 /* We are ready to sort the vector... perform a bit of sanity check
6422 * on the LIMIT option too. We'll use a partial version of quicksort. */
6423 start
= (limit_start
< 0) ? 0 : limit_start
;
6424 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6425 if (start
>= vectorlen
) {
6426 start
= vectorlen
-1;
6429 if (end
>= vectorlen
) end
= vectorlen
-1;
6431 if (dontsort
== 0) {
6432 server
.sort_desc
= desc
;
6433 server
.sort_alpha
= alpha
;
6434 server
.sort_bypattern
= sortby
? 1 : 0;
6435 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6436 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6438 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6441 /* Send command output to the output buffer, performing the specified
6442 * GET/DEL/INCR/DECR operations if any. */
6443 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6444 if (storekey
== NULL
) {
6445 /* STORE option not specified, sent the sorting result to client */
6446 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6447 for (j
= start
; j
<= end
; j
++) {
6451 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6452 listRewind(operations
,&li
);
6453 while((ln
= listNext(&li
))) {
6454 redisSortOperation
*sop
= ln
->value
;
6455 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6458 if (sop
->type
== REDIS_SORT_GET
) {
6459 if (!val
|| val
->type
!= REDIS_STRING
) {
6460 addReply(c
,shared
.nullbulk
);
6462 addReplyBulk(c
,val
);
6465 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6470 robj
*listObject
= createListObject();
6471 list
*listPtr
= (list
*) listObject
->ptr
;
6473 /* STORE option specified, set the sorting result as a List object */
6474 for (j
= start
; j
<= end
; j
++) {
6479 listAddNodeTail(listPtr
,vector
[j
].obj
);
6480 incrRefCount(vector
[j
].obj
);
6482 listRewind(operations
,&li
);
6483 while((ln
= listNext(&li
))) {
6484 redisSortOperation
*sop
= ln
->value
;
6485 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6488 if (sop
->type
== REDIS_SORT_GET
) {
6489 if (!val
|| val
->type
!= REDIS_STRING
) {
6490 listAddNodeTail(listPtr
,createStringObject("",0));
6492 listAddNodeTail(listPtr
,val
);
6496 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6500 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6501 incrRefCount(storekey
);
6503 /* Note: we add 1 because the DB is dirty anyway since even if the
6504 * SORT result is empty a new key is set and maybe the old content
6506 server
.dirty
+= 1+outputlen
;
6507 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6511 decrRefCount(sortval
);
6512 listRelease(operations
);
6513 for (j
= 0; j
< vectorlen
; j
++) {
6514 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6515 decrRefCount(vector
[j
].u
.cmpobj
);
6520 /* Convert an amount of bytes into a human readable string in the form
6521 * of 100B, 2G, 100M, 4K, and so forth. */
6522 static void bytesToHuman(char *s
, unsigned long long n
) {
6527 sprintf(s
,"%lluB",n
);
6529 } else if (n
< (1024*1024)) {
6530 d
= (double)n
/(1024);
6531 sprintf(s
,"%.2fK",d
);
6532 } else if (n
< (1024LL*1024*1024)) {
6533 d
= (double)n
/(1024*1024);
6534 sprintf(s
,"%.2fM",d
);
6535 } else if (n
< (1024LL*1024*1024*1024)) {
6536 d
= (double)n
/(1024LL*1024*1024);
6537 sprintf(s
,"%.2fG",d
);
6541 /* Create the string returned by the INFO command. This is decoupled
6542 * by the INFO command itself as we need to report the same information
6543 * on memory corruption problems. */
6544 static sds
genRedisInfoString(void) {
6546 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6550 bytesToHuman(hmem
,zmalloc_used_memory());
6551 info
= sdscatprintf(sdsempty(),
6552 "redis_version:%s\r\n"
6554 "multiplexing_api:%s\r\n"
6555 "process_id:%ld\r\n"
6556 "uptime_in_seconds:%ld\r\n"
6557 "uptime_in_days:%ld\r\n"
6558 "connected_clients:%d\r\n"
6559 "connected_slaves:%d\r\n"
6560 "blocked_clients:%d\r\n"
6561 "used_memory:%zu\r\n"
6562 "used_memory_human:%s\r\n"
6563 "changes_since_last_save:%lld\r\n"
6564 "bgsave_in_progress:%d\r\n"
6565 "last_save_time:%ld\r\n"
6566 "bgrewriteaof_in_progress:%d\r\n"
6567 "total_connections_received:%lld\r\n"
6568 "total_commands_processed:%lld\r\n"
6569 "expired_keys:%lld\r\n"
6570 "hash_max_zipmap_entries:%ld\r\n"
6571 "hash_max_zipmap_value:%ld\r\n"
6575 (sizeof(long) == 8) ? "64" : "32",
6580 listLength(server
.clients
)-listLength(server
.slaves
),
6581 listLength(server
.slaves
),
6582 server
.blpop_blocked_clients
,
6583 zmalloc_used_memory(),
6586 server
.bgsavechildpid
!= -1,
6588 server
.bgrewritechildpid
!= -1,
6589 server
.stat_numconnections
,
6590 server
.stat_numcommands
,
6591 server
.stat_expiredkeys
,
6592 server
.hash_max_zipmap_entries
,
6593 server
.hash_max_zipmap_value
,
6594 server
.vm_enabled
!= 0,
6595 server
.masterhost
== NULL
? "master" : "slave"
6597 if (server
.masterhost
) {
6598 info
= sdscatprintf(info
,
6599 "master_host:%s\r\n"
6600 "master_port:%d\r\n"
6601 "master_link_status:%s\r\n"
6602 "master_last_io_seconds_ago:%d\r\n"
6605 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6607 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6610 if (server
.vm_enabled
) {
6612 info
= sdscatprintf(info
,
6613 "vm_conf_max_memory:%llu\r\n"
6614 "vm_conf_page_size:%llu\r\n"
6615 "vm_conf_pages:%llu\r\n"
6616 "vm_stats_used_pages:%llu\r\n"
6617 "vm_stats_swapped_objects:%llu\r\n"
6618 "vm_stats_swappin_count:%llu\r\n"
6619 "vm_stats_swappout_count:%llu\r\n"
6620 "vm_stats_io_newjobs_len:%lu\r\n"
6621 "vm_stats_io_processing_len:%lu\r\n"
6622 "vm_stats_io_processed_len:%lu\r\n"
6623 "vm_stats_io_active_threads:%lu\r\n"
6624 "vm_stats_blocked_clients:%lu\r\n"
6625 ,(unsigned long long) server
.vm_max_memory
,
6626 (unsigned long long) server
.vm_page_size
,
6627 (unsigned long long) server
.vm_pages
,
6628 (unsigned long long) server
.vm_stats_used_pages
,
6629 (unsigned long long) server
.vm_stats_swapped_objects
,
6630 (unsigned long long) server
.vm_stats_swapins
,
6631 (unsigned long long) server
.vm_stats_swapouts
,
6632 (unsigned long) listLength(server
.io_newjobs
),
6633 (unsigned long) listLength(server
.io_processing
),
6634 (unsigned long) listLength(server
.io_processed
),
6635 (unsigned long) server
.io_active_threads
,
6636 (unsigned long) server
.vm_blocked_clients
6640 for (j
= 0; j
< server
.dbnum
; j
++) {
6641 long long keys
, vkeys
;
6643 keys
= dictSize(server
.db
[j
].dict
);
6644 vkeys
= dictSize(server
.db
[j
].expires
);
6645 if (keys
|| vkeys
) {
6646 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6653 static void infoCommand(redisClient
*c
) {
6654 sds info
= genRedisInfoString();
6655 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6656 (unsigned long)sdslen(info
)));
6657 addReplySds(c
,info
);
6658 addReply(c
,shared
.crlf
);
6661 static void monitorCommand(redisClient
*c
) {
6662 /* ignore MONITOR if aleady slave or in monitor mode */
6663 if (c
->flags
& REDIS_SLAVE
) return;
6665 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6667 listAddNodeTail(server
.monitors
,c
);
6668 addReply(c
,shared
.ok
);
6671 /* ================================= Expire ================================= */
6672 static int removeExpire(redisDb
*db
, robj
*key
) {
6673 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6680 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6681 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6689 /* Return the expire time of the specified key, or -1 if no expire
6690 * is associated with this key (i.e. the key is non volatile) */
6691 static time_t getExpire(redisDb
*db
, robj
*key
) {
6694 /* No expire? return ASAP */
6695 if (dictSize(db
->expires
) == 0 ||
6696 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6698 return (time_t) dictGetEntryVal(de
);
6701 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6705 /* No expire? return ASAP */
6706 if (dictSize(db
->expires
) == 0 ||
6707 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6709 /* Lookup the expire */
6710 when
= (time_t) dictGetEntryVal(de
);
6711 if (time(NULL
) <= when
) return 0;
6713 /* Delete the key */
6714 dictDelete(db
->expires
,key
);
6715 server
.stat_expiredkeys
++;
6716 return dictDelete(db
->dict
,key
) == DICT_OK
;
6719 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6722 /* No expire? return ASAP */
6723 if (dictSize(db
->expires
) == 0 ||
6724 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6726 /* Delete the key */
6728 server
.stat_expiredkeys
++;
6729 dictDelete(db
->expires
,key
);
6730 return dictDelete(db
->dict
,key
) == DICT_OK
;
6733 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6736 de
= dictFind(c
->db
->dict
,key
);
6738 addReply(c
,shared
.czero
);
6742 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6743 addReply(c
, shared
.cone
);
6746 time_t when
= time(NULL
)+seconds
;
6747 if (setExpire(c
->db
,key
,when
)) {
6748 addReply(c
,shared
.cone
);
6751 addReply(c
,shared
.czero
);
6757 static void expireCommand(redisClient
*c
) {
6758 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6761 static void expireatCommand(redisClient
*c
) {
6762 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6765 static void ttlCommand(redisClient
*c
) {
6769 expire
= getExpire(c
->db
,c
->argv
[1]);
6771 ttl
= (int) (expire
-time(NULL
));
6772 if (ttl
< 0) ttl
= -1;
6774 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6777 /* ================================ MULTI/EXEC ============================== */
6779 /* Client state initialization for MULTI/EXEC */
6780 static void initClientMultiState(redisClient
*c
) {
6781 c
->mstate
.commands
= NULL
;
6782 c
->mstate
.count
= 0;
6785 /* Release all the resources associated with MULTI/EXEC state */
6786 static void freeClientMultiState(redisClient
*c
) {
6789 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6791 multiCmd
*mc
= c
->mstate
.commands
+j
;
6793 for (i
= 0; i
< mc
->argc
; i
++)
6794 decrRefCount(mc
->argv
[i
]);
6797 zfree(c
->mstate
.commands
);
6800 /* Add a new command into the MULTI commands queue */
6801 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6805 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6806 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6807 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6810 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6811 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6812 for (j
= 0; j
< c
->argc
; j
++)
6813 incrRefCount(mc
->argv
[j
]);
6817 static void multiCommand(redisClient
*c
) {
6818 c
->flags
|= REDIS_MULTI
;
6819 addReply(c
,shared
.ok
);
6822 static void discardCommand(redisClient
*c
) {
6823 if (!(c
->flags
& REDIS_MULTI
)) {
6824 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6828 freeClientMultiState(c
);
6829 initClientMultiState(c
);
6830 c
->flags
&= (~REDIS_MULTI
);
6831 addReply(c
,shared
.ok
);
6834 static void execCommand(redisClient
*c
) {
6839 if (!(c
->flags
& REDIS_MULTI
)) {
6840 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6844 orig_argv
= c
->argv
;
6845 orig_argc
= c
->argc
;
6846 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6847 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6848 c
->argc
= c
->mstate
.commands
[j
].argc
;
6849 c
->argv
= c
->mstate
.commands
[j
].argv
;
6850 call(c
,c
->mstate
.commands
[j
].cmd
);
6852 c
->argv
= orig_argv
;
6853 c
->argc
= orig_argc
;
6854 freeClientMultiState(c
);
6855 initClientMultiState(c
);
6856 c
->flags
&= (~REDIS_MULTI
);
6859 /* =========================== Blocking Operations ========================= */
6861 /* Currently Redis blocking operations support is limited to list POP ops,
6862 * so the current implementation is not fully generic, but it is also not
6863 * completely specific so it will not require a rewrite to support new
6864 * kind of blocking operations in the future.
6866 * Still it's important to note that list blocking operations can be already
6867 * used as a notification mechanism in order to implement other blocking
6868 * operations at application level, so there must be a very strong evidence
6869 * of usefulness and generality before new blocking operations are implemented.
6871 * This is how the current blocking POP works, we use BLPOP as example:
6872 * - If the user calls BLPOP and the key exists and contains a non empty list
6873 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6874 * if there is not to block.
6875 * - If instead BLPOP is called and the key does not exists or the list is
6876 * empty we need to block. In order to do so we remove the notification for
6877 * new data to read in the client socket (so that we'll not serve new
6878 * requests if the blocking request is not served). Also we put the client
6879 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6880 * blocking for this keys.
6881 * - If a PUSH operation against a key with blocked clients waiting is
6882 * performed, we serve the first in the list: basically instead to push
6883 * the new element inside the list we return it to the (first / oldest)
6884 * blocking client, unblock the client, and remove it form the list.
6886 * The above comment and the source code should be enough in order to understand
6887 * the implementation and modify / fix it later.
6890 /* Set a client in blocking mode for the specified key, with the specified
6892 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6897 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6898 c
->blockingkeysnum
= numkeys
;
6899 c
->blockingto
= timeout
;
6900 for (j
= 0; j
< numkeys
; j
++) {
6901 /* Add the key in the client structure, to map clients -> keys */
6902 c
->blockingkeys
[j
] = keys
[j
];
6903 incrRefCount(keys
[j
]);
6905 /* And in the other "side", to map keys -> clients */
6906 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
6910 /* For every key we take a list of clients blocked for it */
6912 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
6913 incrRefCount(keys
[j
]);
6914 assert(retval
== DICT_OK
);
6916 l
= dictGetEntryVal(de
);
6918 listAddNodeTail(l
,c
);
6920 /* Mark the client as a blocked client */
6921 c
->flags
|= REDIS_BLOCKED
;
6922 server
.blpop_blocked_clients
++;
6925 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
6926 static void unblockClientWaitingData(redisClient
*c
) {
6931 assert(c
->blockingkeys
!= NULL
);
6932 /* The client may wait for multiple keys, so unblock it for every key. */
6933 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
6934 /* Remove this client from the list of clients waiting for this key. */
6935 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6937 l
= dictGetEntryVal(de
);
6938 listDelNode(l
,listSearchKey(l
,c
));
6939 /* If the list is empty we need to remove it to avoid wasting memory */
6940 if (listLength(l
) == 0)
6941 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
6942 decrRefCount(c
->blockingkeys
[j
]);
6944 /* Cleanup the client structure */
6945 zfree(c
->blockingkeys
);
6946 c
->blockingkeys
= NULL
;
6947 c
->flags
&= (~REDIS_BLOCKED
);
6948 server
.blpop_blocked_clients
--;
6949 /* We want to process data if there is some command waiting
6950 * in the input buffer. Note that this is safe even if
6951 * unblockClientWaitingData() gets called from freeClient() because
6952 * freeClient() will be smart enough to call this function
6953 * *after* c->querybuf was set to NULL. */
6954 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
6957 /* This should be called from any function PUSHing into lists.
6958 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
6959 * 'ele' is the element pushed.
6961 * If the function returns 0 there was no client waiting for a list push
6964 * If the function returns 1 there was a client waiting for a list push
6965 * against this key, the element was passed to this client thus it's not
6966 * needed to actually add it to the list and the caller should return asap. */
6967 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
6968 struct dictEntry
*de
;
6969 redisClient
*receiver
;
6973 de
= dictFind(c
->db
->blockingkeys
,key
);
6974 if (de
== NULL
) return 0;
6975 l
= dictGetEntryVal(de
);
6978 receiver
= ln
->value
;
6980 addReplySds(receiver
,sdsnew("*2\r\n"));
6981 addReplyBulk(receiver
,key
);
6982 addReplyBulk(receiver
,ele
);
6983 unblockClientWaitingData(receiver
);
6987 /* Blocking RPOP/LPOP */
6988 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
6993 for (j
= 1; j
< c
->argc
-1; j
++) {
6994 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6996 if (o
->type
!= REDIS_LIST
) {
6997 addReply(c
,shared
.wrongtypeerr
);
7000 list
*list
= o
->ptr
;
7001 if (listLength(list
) != 0) {
7002 /* If the list contains elements fall back to the usual
7003 * non-blocking POP operation */
7004 robj
*argv
[2], **orig_argv
;
7007 /* We need to alter the command arguments before to call
7008 * popGenericCommand() as the command takes a single key. */
7009 orig_argv
= c
->argv
;
7010 orig_argc
= c
->argc
;
7011 argv
[1] = c
->argv
[j
];
7015 /* Also the return value is different, we need to output
7016 * the multi bulk reply header and the key name. The
7017 * "real" command will add the last element (the value)
7018 * for us. If this souds like an hack to you it's just
7019 * because it is... */
7020 addReplySds(c
,sdsnew("*2\r\n"));
7021 addReplyBulk(c
,argv
[1]);
7022 popGenericCommand(c
,where
);
7024 /* Fix the client structure with the original stuff */
7025 c
->argv
= orig_argv
;
7026 c
->argc
= orig_argc
;
7032 /* If the list is empty or the key does not exists we must block */
7033 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7034 if (timeout
> 0) timeout
+= time(NULL
);
7035 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7038 static void blpopCommand(redisClient
*c
) {
7039 blockingPopGenericCommand(c
,REDIS_HEAD
);
7042 static void brpopCommand(redisClient
*c
) {
7043 blockingPopGenericCommand(c
,REDIS_TAIL
);
7046 /* =============================== Replication ============================= */
7048 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7049 ssize_t nwritten
, ret
= size
;
7050 time_t start
= time(NULL
);
7054 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7055 nwritten
= write(fd
,ptr
,size
);
7056 if (nwritten
== -1) return -1;
7060 if ((time(NULL
)-start
) > timeout
) {
7068 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7069 ssize_t nread
, totread
= 0;
7070 time_t start
= time(NULL
);
7074 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7075 nread
= read(fd
,ptr
,size
);
7076 if (nread
== -1) return -1;
7081 if ((time(NULL
)-start
) > timeout
) {
7089 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7096 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7099 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7110 static void syncCommand(redisClient
*c
) {
7111 /* ignore SYNC if aleady slave or in monitor mode */
7112 if (c
->flags
& REDIS_SLAVE
) return;
7114 /* SYNC can't be issued when the server has pending data to send to
7115 * the client about already issued commands. We need a fresh reply
7116 * buffer registering the differences between the BGSAVE and the current
7117 * dataset, so that we can copy to other slaves if needed. */
7118 if (listLength(c
->reply
) != 0) {
7119 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7123 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7124 /* Here we need to check if there is a background saving operation
7125 * in progress, or if it is required to start one */
7126 if (server
.bgsavechildpid
!= -1) {
7127 /* Ok a background save is in progress. Let's check if it is a good
7128 * one for replication, i.e. if there is another slave that is
7129 * registering differences since the server forked to save */
7134 listRewind(server
.slaves
,&li
);
7135 while((ln
= listNext(&li
))) {
7137 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7140 /* Perfect, the server is already registering differences for
7141 * another slave. Set the right state, and copy the buffer. */
7142 listRelease(c
->reply
);
7143 c
->reply
= listDup(slave
->reply
);
7144 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7145 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7147 /* No way, we need to wait for the next BGSAVE in order to
7148 * register differences */
7149 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7150 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7153 /* Ok we don't have a BGSAVE in progress, let's start one */
7154 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7155 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7156 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7157 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7160 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7163 c
->flags
|= REDIS_SLAVE
;
7165 listAddNodeTail(server
.slaves
,c
);
7169 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7170 redisClient
*slave
= privdata
;
7172 REDIS_NOTUSED(mask
);
7173 char buf
[REDIS_IOBUF_LEN
];
7174 ssize_t nwritten
, buflen
;
7176 if (slave
->repldboff
== 0) {
7177 /* Write the bulk write count before to transfer the DB. In theory here
7178 * we don't know how much room there is in the output buffer of the
7179 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7180 * operations) will never be smaller than the few bytes we need. */
7183 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7185 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7193 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7194 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7196 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7197 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7201 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7202 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7207 slave
->repldboff
+= nwritten
;
7208 if (slave
->repldboff
== slave
->repldbsize
) {
7209 close(slave
->repldbfd
);
7210 slave
->repldbfd
= -1;
7211 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7212 slave
->replstate
= REDIS_REPL_ONLINE
;
7213 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7214 sendReplyToClient
, slave
) == AE_ERR
) {
7218 addReplySds(slave
,sdsempty());
7219 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7223 /* This function is called at the end of every backgrond saving.
7224 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7225 * otherwise REDIS_ERR is passed to the function.
7227 * The goal of this function is to handle slaves waiting for a successful
7228 * background saving in order to perform non-blocking synchronization. */
7229 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7231 int startbgsave
= 0;
7234 listRewind(server
.slaves
,&li
);
7235 while((ln
= listNext(&li
))) {
7236 redisClient
*slave
= ln
->value
;
7238 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7240 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7241 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7242 struct redis_stat buf
;
7244 if (bgsaveerr
!= REDIS_OK
) {
7246 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7249 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7250 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7252 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7255 slave
->repldboff
= 0;
7256 slave
->repldbsize
= buf
.st_size
;
7257 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7258 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7259 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7266 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7269 listRewind(server
.slaves
,&li
);
7270 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7271 while((ln
= listNext(&li
))) {
7272 redisClient
*slave
= ln
->value
;
7274 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7281 static int syncWithMaster(void) {
7282 char buf
[1024], tmpfile
[256], authcmd
[1024];
7284 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7285 int dfd
, maxtries
= 5;
7288 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7293 /* AUTH with the master if required. */
7294 if(server
.masterauth
) {
7295 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7296 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7298 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7302 /* Read the AUTH result. */
7303 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7305 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7309 if (buf
[0] != '+') {
7311 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7316 /* Issue the SYNC command */
7317 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7319 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7323 /* Read the bulk write count */
7324 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7326 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7330 if (buf
[0] != '$') {
7332 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7335 dumpsize
= strtol(buf
+1,NULL
,10);
7336 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7337 /* Read the bulk write data on a temp file */
7339 snprintf(tmpfile
,256,
7340 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7341 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7342 if (dfd
!= -1) break;
7347 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7351 int nread
, nwritten
;
7353 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7355 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7361 nwritten
= write(dfd
,buf
,nread
);
7362 if (nwritten
== -1) {
7363 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7371 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7372 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7378 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7379 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7383 server
.master
= createClient(fd
);
7384 server
.master
->flags
|= REDIS_MASTER
;
7385 server
.master
->authenticated
= 1;
7386 server
.replstate
= REDIS_REPL_CONNECTED
;
7390 static void slaveofCommand(redisClient
*c
) {
7391 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7392 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7393 if (server
.masterhost
) {
7394 sdsfree(server
.masterhost
);
7395 server
.masterhost
= NULL
;
7396 if (server
.master
) freeClient(server
.master
);
7397 server
.replstate
= REDIS_REPL_NONE
;
7398 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7401 sdsfree(server
.masterhost
);
7402 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7403 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7404 if (server
.master
) freeClient(server
.master
);
7405 server
.replstate
= REDIS_REPL_CONNECT
;
7406 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7407 server
.masterhost
, server
.masterport
);
7409 addReply(c
,shared
.ok
);
7412 /* ============================ Maxmemory directive ======================== */
7414 /* Try to free one object form the pre-allocated objects free list.
7415 * This is useful under low mem conditions as by default we take 1 million
7416 * free objects allocated. On success REDIS_OK is returned, otherwise
7418 static int tryFreeOneObjectFromFreelist(void) {
7421 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7422 if (listLength(server
.objfreelist
)) {
7423 listNode
*head
= listFirst(server
.objfreelist
);
7424 o
= listNodeValue(head
);
7425 listDelNode(server
.objfreelist
,head
);
7426 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7430 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7435 /* This function gets called when 'maxmemory' is set on the config file to limit
7436 * the max memory used by the server, and we are out of memory.
7437 * This function will try to, in order:
7439 * - Free objects from the free list
7440 * - Try to remove keys with an EXPIRE set
7442 * It is not possible to free enough memory to reach used-memory < maxmemory
7443 * the server will start refusing commands that will enlarge even more the
7446 static void freeMemoryIfNeeded(void) {
7447 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7448 int j
, k
, freed
= 0;
7450 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7451 for (j
= 0; j
< server
.dbnum
; j
++) {
7453 robj
*minkey
= NULL
;
7454 struct dictEntry
*de
;
7456 if (dictSize(server
.db
[j
].expires
)) {
7458 /* From a sample of three keys drop the one nearest to
7459 * the natural expire */
7460 for (k
= 0; k
< 3; k
++) {
7463 de
= dictGetRandomKey(server
.db
[j
].expires
);
7464 t
= (time_t) dictGetEntryVal(de
);
7465 if (minttl
== -1 || t
< minttl
) {
7466 minkey
= dictGetEntryKey(de
);
7470 deleteKey(server
.db
+j
,minkey
);
7473 if (!freed
) return; /* nothing to free... */
7477 /* ============================== Append Only file ========================== */
7479 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7480 sds buf
= sdsempty();
7486 /* The DB this command was targetting is not the same as the last command
7487 * we appendend. To issue a SELECT command is needed. */
7488 if (dictid
!= server
.appendseldb
) {
7491 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7492 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7493 (unsigned long)strlen(seldb
),seldb
);
7494 server
.appendseldb
= dictid
;
7497 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7498 * EXPIREs into EXPIREATs calls */
7499 if (cmd
->proc
== expireCommand
) {
7502 tmpargv
[0] = createStringObject("EXPIREAT",8);
7503 tmpargv
[1] = argv
[1];
7504 incrRefCount(argv
[1]);
7505 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7506 tmpargv
[2] = createObject(REDIS_STRING
,
7507 sdscatprintf(sdsempty(),"%ld",when
));
7511 /* Append the actual command */
7512 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7513 for (j
= 0; j
< argc
; j
++) {
7516 o
= getDecodedObject(o
);
7517 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7518 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7519 buf
= sdscatlen(buf
,"\r\n",2);
7523 /* Free the objects from the modified argv for EXPIREAT */
7524 if (cmd
->proc
== expireCommand
) {
7525 for (j
= 0; j
< 3; j
++)
7526 decrRefCount(argv
[j
]);
7529 /* We want to perform a single write. This should be guaranteed atomic
7530 * at least if the filesystem we are writing is a real physical one.
7531 * While this will save us against the server being killed I don't think
7532 * there is much to do about the whole server stopping for power problems
7534 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7535 if (nwritten
!= (signed)sdslen(buf
)) {
7536 /* Ooops, we are in troubles. The best thing to do for now is
7537 * to simply exit instead to give the illusion that everything is
7538 * working as expected. */
7539 if (nwritten
== -1) {
7540 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7542 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7546 /* If a background append only file rewriting is in progress we want to
7547 * accumulate the differences between the child DB and the current one
7548 * in a buffer, so that when the child process will do its work we
7549 * can append the differences to the new append only file. */
7550 if (server
.bgrewritechildpid
!= -1)
7551 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7555 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7556 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7557 now
-server
.lastfsync
> 1))
7559 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7560 server
.lastfsync
= now
;
7564 /* In Redis commands are always executed in the context of a client, so in
7565 * order to load the append only file we need to create a fake client. */
7566 static struct redisClient
*createFakeClient(void) {
7567 struct redisClient
*c
= zmalloc(sizeof(*c
));
7571 c
->querybuf
= sdsempty();
7575 /* We set the fake client as a slave waiting for the synchronization
7576 * so that Redis will not try to send replies to this client. */
7577 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7578 c
->reply
= listCreate();
7579 listSetFreeMethod(c
->reply
,decrRefCount
);
7580 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7584 static void freeFakeClient(struct redisClient
*c
) {
7585 sdsfree(c
->querybuf
);
7586 listRelease(c
->reply
);
7590 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7591 * error (the append only file is zero-length) REDIS_ERR is returned. On
7592 * fatal error an error message is logged and the program exists. */
7593 int loadAppendOnlyFile(char *filename
) {
7594 struct redisClient
*fakeClient
;
7595 FILE *fp
= fopen(filename
,"r");
7596 struct redis_stat sb
;
7597 unsigned long long loadedkeys
= 0;
7599 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7603 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7607 fakeClient
= createFakeClient();
7614 struct redisCommand
*cmd
;
7616 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7622 if (buf
[0] != '*') goto fmterr
;
7624 argv
= zmalloc(sizeof(robj
*)*argc
);
7625 for (j
= 0; j
< argc
; j
++) {
7626 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7627 if (buf
[0] != '$') goto fmterr
;
7628 len
= strtol(buf
+1,NULL
,10);
7629 argsds
= sdsnewlen(NULL
,len
);
7630 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7631 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7632 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7635 /* Command lookup */
7636 cmd
= lookupCommand(argv
[0]->ptr
);
7638 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7641 /* Try object sharing and encoding */
7642 if (server
.shareobjects
) {
7644 for(j
= 1; j
< argc
; j
++)
7645 argv
[j
] = tryObjectSharing(argv
[j
]);
7647 if (cmd
->flags
& REDIS_CMD_BULK
)
7648 tryObjectEncoding(argv
[argc
-1]);
7649 /* Run the command in the context of a fake client */
7650 fakeClient
->argc
= argc
;
7651 fakeClient
->argv
= argv
;
7652 cmd
->proc(fakeClient
);
7653 /* Discard the reply objects list from the fake client */
7654 while(listLength(fakeClient
->reply
))
7655 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7656 /* Clean up, ready for the next command */
7657 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7659 /* Handle swapping while loading big datasets when VM is on */
7661 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7662 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7663 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7668 freeFakeClient(fakeClient
);
7673 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7675 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7679 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7683 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7684 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7688 /* Avoid the incr/decr ref count business if possible to help
7689 * copy-on-write (we are often in a child process when this function
7691 * Also makes sure that key objects don't get incrRefCount-ed when VM
7693 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7694 obj
= getDecodedObject(obj
);
7697 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7698 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7699 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7701 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7702 if (decrrc
) decrRefCount(obj
);
7705 if (decrrc
) decrRefCount(obj
);
7709 /* Write binary-safe string into a file in the bulkformat
7710 * $<count>\r\n<payload>\r\n */
7711 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7714 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7715 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7716 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7717 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7721 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7722 static int fwriteBulkDouble(FILE *fp
, double d
) {
7723 char buf
[128], dbuf
[128];
7725 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7726 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7727 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7728 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7732 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7733 static int fwriteBulkLong(FILE *fp
, long l
) {
7734 char buf
[128], lbuf
[128];
7736 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7737 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7738 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7739 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7743 /* Write a sequence of commands able to fully rebuild the dataset into
7744 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7745 static int rewriteAppendOnlyFile(char *filename
) {
7746 dictIterator
*di
= NULL
;
7751 time_t now
= time(NULL
);
7753 /* Note that we have to use a different temp name here compared to the
7754 * one used by rewriteAppendOnlyFileBackground() function. */
7755 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7756 fp
= fopen(tmpfile
,"w");
7758 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7761 for (j
= 0; j
< server
.dbnum
; j
++) {
7762 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7763 redisDb
*db
= server
.db
+j
;
7765 if (dictSize(d
) == 0) continue;
7766 di
= dictGetIterator(d
);
7772 /* SELECT the new DB */
7773 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7774 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7776 /* Iterate this DB writing every entry */
7777 while((de
= dictNext(di
)) != NULL
) {
7782 key
= dictGetEntryKey(de
);
7783 /* If the value for this key is swapped, load a preview in memory.
7784 * We use a "swapped" flag to remember if we need to free the
7785 * value object instead to just increment the ref count anyway
7786 * in order to avoid copy-on-write of pages if we are forked() */
7787 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7788 key
->storage
== REDIS_VM_SWAPPING
) {
7789 o
= dictGetEntryVal(de
);
7792 o
= vmPreviewObject(key
);
7795 expiretime
= getExpire(db
,key
);
7797 /* Save the key and associated value */
7798 if (o
->type
== REDIS_STRING
) {
7799 /* Emit a SET command */
7800 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7801 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7803 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7804 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7805 } else if (o
->type
== REDIS_LIST
) {
7806 /* Emit the RPUSHes needed to rebuild the list */
7807 list
*list
= o
->ptr
;
7811 listRewind(list
,&li
);
7812 while((ln
= listNext(&li
))) {
7813 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7814 robj
*eleobj
= listNodeValue(ln
);
7816 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7817 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7818 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7820 } else if (o
->type
== REDIS_SET
) {
7821 /* Emit the SADDs needed to rebuild the set */
7823 dictIterator
*di
= dictGetIterator(set
);
7826 while((de
= dictNext(di
)) != NULL
) {
7827 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7828 robj
*eleobj
= dictGetEntryKey(de
);
7830 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7831 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7832 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7834 dictReleaseIterator(di
);
7835 } else if (o
->type
== REDIS_ZSET
) {
7836 /* Emit the ZADDs needed to rebuild the sorted set */
7838 dictIterator
*di
= dictGetIterator(zs
->dict
);
7841 while((de
= dictNext(di
)) != NULL
) {
7842 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7843 robj
*eleobj
= dictGetEntryKey(de
);
7844 double *score
= dictGetEntryVal(de
);
7846 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7847 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7848 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7849 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7851 dictReleaseIterator(di
);
7852 } else if (o
->type
== REDIS_HASH
) {
7853 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7855 /* Emit the HSETs needed to rebuild the hash */
7856 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7857 unsigned char *p
= zipmapRewind(o
->ptr
);
7858 unsigned char *field
, *val
;
7859 unsigned int flen
, vlen
;
7861 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7862 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7863 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7864 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7866 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7870 dictIterator
*di
= dictGetIterator(o
->ptr
);
7873 while((de
= dictNext(di
)) != NULL
) {
7874 robj
*field
= dictGetEntryKey(de
);
7875 robj
*val
= dictGetEntryVal(de
);
7877 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7878 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7879 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7880 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7882 dictReleaseIterator(di
);
7887 /* Save the expire time */
7888 if (expiretime
!= -1) {
7889 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7890 /* If this key is already expired skip it */
7891 if (expiretime
< now
) continue;
7892 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7893 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7894 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7896 if (swapped
) decrRefCount(o
);
7898 dictReleaseIterator(di
);
7901 /* Make sure data will not remain on the OS's output buffers */
7906 /* Use RENAME to make sure the DB file is changed atomically only
7907 * if the generate DB file is ok. */
7908 if (rename(tmpfile
,filename
) == -1) {
7909 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
7913 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
7919 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
7920 if (di
) dictReleaseIterator(di
);
7924 /* This is how rewriting of the append only file in background works:
7926 * 1) The user calls BGREWRITEAOF
7927 * 2) Redis calls this function, that forks():
7928 * 2a) the child rewrite the append only file in a temp file.
7929 * 2b) the parent accumulates differences in server.bgrewritebuf.
7930 * 3) When the child finished '2a' exists.
7931 * 4) The parent will trap the exit code, if it's OK, will append the
7932 * data accumulated into server.bgrewritebuf into the temp file, and
7933 * finally will rename(2) the temp file in the actual file name.
7934 * The the new file is reopened as the new append only file. Profit!
7936 static int rewriteAppendOnlyFileBackground(void) {
7939 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
7940 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
7941 if ((childpid
= fork()) == 0) {
7945 if (server
.vm_enabled
) vmReopenSwapFile();
7947 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
7948 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
7955 if (childpid
== -1) {
7956 redisLog(REDIS_WARNING
,
7957 "Can't rewrite append only file in background: fork: %s",
7961 redisLog(REDIS_NOTICE
,
7962 "Background append only file rewriting started by pid %d",childpid
);
7963 server
.bgrewritechildpid
= childpid
;
7964 /* We set appendseldb to -1 in order to force the next call to the
7965 * feedAppendOnlyFile() to issue a SELECT command, so the differences
7966 * accumulated by the parent into server.bgrewritebuf will start
7967 * with a SELECT statement and it will be safe to merge. */
7968 server
.appendseldb
= -1;
7971 return REDIS_OK
; /* unreached */
7974 static void bgrewriteaofCommand(redisClient
*c
) {
7975 if (server
.bgrewritechildpid
!= -1) {
7976 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
7979 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
7980 char *status
= "+Background append only file rewriting started\r\n";
7981 addReplySds(c
,sdsnew(status
));
7983 addReply(c
,shared
.err
);
7987 static void aofRemoveTempFile(pid_t childpid
) {
7990 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
7994 /* Virtual Memory is composed mainly of two subsystems:
7995 * - Blocking Virutal Memory
7996 * - Threaded Virtual Memory I/O
7997 * The two parts are not fully decoupled, but functions are split among two
7998 * different sections of the source code (delimited by comments) in order to
7999 * make more clear what functionality is about the blocking VM and what about
8000 * the threaded (not blocking) VM.
8004 * Redis VM is a blocking VM (one that blocks reading swapped values from
8005 * disk into memory when a value swapped out is needed in memory) that is made
8006 * unblocking by trying to examine the command argument vector in order to
8007 * load in background values that will likely be needed in order to exec
8008 * the command. The command is executed only once all the relevant keys
8009 * are loaded into memory.
8011 * This basically is almost as simple of a blocking VM, but almost as parallel
8012 * as a fully non-blocking VM.
8015 /* =================== Virtual Memory - Blocking Side ====================== */
8017 /* substitute the first occurrence of '%p' with the process pid in the
8018 * swap file name. */
8019 static void expandVmSwapFilename(void) {
8020 char *p
= strstr(server
.vm_swap_file
,"%p");
8026 new = sdscat(new,server
.vm_swap_file
);
8027 new = sdscatprintf(new,"%ld",(long) getpid());
8028 new = sdscat(new,p
+2);
8029 zfree(server
.vm_swap_file
);
8030 server
.vm_swap_file
= new;
8033 static void vmInit(void) {
8038 if (server
.vm_max_threads
!= 0)
8039 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8041 expandVmSwapFilename();
8042 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8043 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8044 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8046 if (server
.vm_fp
== NULL
) {
8047 redisLog(REDIS_WARNING
,
8048 "Impossible to open the swap file: %s. Exiting.",
8052 server
.vm_fd
= fileno(server
.vm_fp
);
8053 server
.vm_next_page
= 0;
8054 server
.vm_near_pages
= 0;
8055 server
.vm_stats_used_pages
= 0;
8056 server
.vm_stats_swapped_objects
= 0;
8057 server
.vm_stats_swapouts
= 0;
8058 server
.vm_stats_swapins
= 0;
8059 totsize
= server
.vm_pages
*server
.vm_page_size
;
8060 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8061 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8062 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8066 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8068 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8069 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8070 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8071 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8073 /* Initialize threaded I/O (used by Virtual Memory) */
8074 server
.io_newjobs
= listCreate();
8075 server
.io_processing
= listCreate();
8076 server
.io_processed
= listCreate();
8077 server
.io_ready_clients
= listCreate();
8078 pthread_mutex_init(&server
.io_mutex
,NULL
);
8079 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8080 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8081 server
.io_active_threads
= 0;
8082 if (pipe(pipefds
) == -1) {
8083 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8087 server
.io_ready_pipe_read
= pipefds
[0];
8088 server
.io_ready_pipe_write
= pipefds
[1];
8089 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8090 /* LZF requires a lot of stack */
8091 pthread_attr_init(&server
.io_threads_attr
);
8092 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8093 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8094 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8095 /* Listen for events in the threaded I/O pipe */
8096 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8097 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8098 oom("creating file event");
8101 /* Mark the page as used */
8102 static void vmMarkPageUsed(off_t page
) {
8103 off_t byte
= page
/8;
8105 redisAssert(vmFreePage(page
) == 1);
8106 server
.vm_bitmap
[byte
] |= 1<<bit
;
8109 /* Mark N contiguous pages as used, with 'page' being the first. */
8110 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8113 for (j
= 0; j
< count
; j
++)
8114 vmMarkPageUsed(page
+j
);
8115 server
.vm_stats_used_pages
+= count
;
8116 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8117 (long long)count
, (long long)page
);
8120 /* Mark the page as free */
8121 static void vmMarkPageFree(off_t page
) {
8122 off_t byte
= page
/8;
8124 redisAssert(vmFreePage(page
) == 0);
8125 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8128 /* Mark N contiguous pages as free, with 'page' being the first. */
8129 static void vmMarkPagesFree(off_t page
, off_t count
) {
8132 for (j
= 0; j
< count
; j
++)
8133 vmMarkPageFree(page
+j
);
8134 server
.vm_stats_used_pages
-= count
;
8135 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8136 (long long)count
, (long long)page
);
8139 /* Test if the page is free */
8140 static int vmFreePage(off_t page
) {
8141 off_t byte
= page
/8;
8143 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8146 /* Find N contiguous free pages storing the first page of the cluster in *first.
8147 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8148 * REDIS_ERR is returned.
8150 * This function uses a simple algorithm: we try to allocate
8151 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8152 * again from the start of the swap file searching for free spaces.
8154 * If it looks pretty clear that there are no free pages near our offset
8155 * we try to find less populated places doing a forward jump of
8156 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8157 * without hurry, and then we jump again and so forth...
8159 * This function can be improved using a free list to avoid to guess
8160 * too much, since we could collect data about freed pages.
8162 * note: I implemented this function just after watching an episode of
8163 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8165 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8166 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8168 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8169 server
.vm_near_pages
= 0;
8170 server
.vm_next_page
= 0;
8172 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8173 base
= server
.vm_next_page
;
8175 while(offset
< server
.vm_pages
) {
8176 off_t
this = base
+offset
;
8178 /* If we overflow, restart from page zero */
8179 if (this >= server
.vm_pages
) {
8180 this -= server
.vm_pages
;
8182 /* Just overflowed, what we found on tail is no longer
8183 * interesting, as it's no longer contiguous. */
8187 if (vmFreePage(this)) {
8188 /* This is a free page */
8190 /* Already got N free pages? Return to the caller, with success */
8192 *first
= this-(n
-1);
8193 server
.vm_next_page
= this+1;
8194 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8198 /* The current one is not a free page */
8202 /* Fast-forward if the current page is not free and we already
8203 * searched enough near this place. */
8205 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8206 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8208 /* Note that even if we rewind after the jump, we are don't need
8209 * to make sure numfree is set to zero as we only jump *if* it
8210 * is set to zero. */
8212 /* Otherwise just check the next page */
8219 /* Write the specified object at the specified page of the swap file */
8220 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8221 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8222 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8223 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8224 redisLog(REDIS_WARNING
,
8225 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8229 rdbSaveObject(server
.vm_fp
,o
);
8230 fflush(server
.vm_fp
);
8231 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8235 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8236 * needed to later retrieve the object into the key object.
8237 * If we can't find enough contiguous empty pages to swap the object on disk
8238 * REDIS_ERR is returned. */
8239 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8240 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8243 assert(key
->storage
== REDIS_VM_MEMORY
);
8244 assert(key
->refcount
== 1);
8245 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8246 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8247 key
->vm
.page
= page
;
8248 key
->vm
.usedpages
= pages
;
8249 key
->storage
= REDIS_VM_SWAPPED
;
8250 key
->vtype
= val
->type
;
8251 decrRefCount(val
); /* Deallocate the object from memory. */
8252 vmMarkPagesUsed(page
,pages
);
8253 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8254 (unsigned char*) key
->ptr
,
8255 (unsigned long long) page
, (unsigned long long) pages
);
8256 server
.vm_stats_swapped_objects
++;
8257 server
.vm_stats_swapouts
++;
8261 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8264 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8265 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8266 redisLog(REDIS_WARNING
,
8267 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8271 o
= rdbLoadObject(type
,server
.vm_fp
);
8273 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8276 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8280 /* Load the value object relative to the 'key' object from swap to memory.
8281 * The newly allocated object is returned.
8283 * If preview is true the unserialized object is returned to the caller but
8284 * no changes are made to the key object, nor the pages are marked as freed */
8285 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8288 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8289 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8291 key
->storage
= REDIS_VM_MEMORY
;
8292 key
->vm
.atime
= server
.unixtime
;
8293 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8294 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8295 (unsigned char*) key
->ptr
);
8296 server
.vm_stats_swapped_objects
--;
8298 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8299 (unsigned char*) key
->ptr
);
8301 server
.vm_stats_swapins
++;
8305 /* Plain object loading, from swap to memory */
8306 static robj
*vmLoadObject(robj
*key
) {
8307 /* If we are loading the object in background, stop it, we
8308 * need to load this object synchronously ASAP. */
8309 if (key
->storage
== REDIS_VM_LOADING
)
8310 vmCancelThreadedIOJob(key
);
8311 return vmGenericLoadObject(key
,0);
8314 /* Just load the value on disk, without to modify the key.
8315 * This is useful when we want to perform some operation on the value
8316 * without to really bring it from swap to memory, like while saving the
8317 * dataset or rewriting the append only log. */
8318 static robj
*vmPreviewObject(robj
*key
) {
8319 return vmGenericLoadObject(key
,1);
8322 /* How a good candidate is this object for swapping?
8323 * The better candidate it is, the greater the returned value.
8325 * Currently we try to perform a fast estimation of the object size in
8326 * memory, and combine it with aging informations.
8328 * Basically swappability = idle-time * log(estimated size)
8330 * Bigger objects are preferred over smaller objects, but not
8331 * proportionally, this is why we use the logarithm. This algorithm is
8332 * just a first try and will probably be tuned later. */
8333 static double computeObjectSwappability(robj
*o
) {
8334 time_t age
= server
.unixtime
- o
->vm
.atime
;
8338 struct dictEntry
*de
;
8341 if (age
<= 0) return 0;
8344 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8347 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8352 listNode
*ln
= listFirst(l
);
8354 asize
= sizeof(list
);
8356 robj
*ele
= ln
->value
;
8359 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8360 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8362 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8367 z
= (o
->type
== REDIS_ZSET
);
8368 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8370 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8371 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8376 de
= dictGetRandomKey(d
);
8377 ele
= dictGetEntryKey(de
);
8378 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8379 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8381 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8382 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8386 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8387 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8388 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8389 unsigned int klen
, vlen
;
8390 unsigned char *key
, *val
;
8392 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8396 asize
= len
*(klen
+vlen
+3);
8397 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8399 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8404 de
= dictGetRandomKey(d
);
8405 ele
= dictGetEntryKey(de
);
8406 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8407 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8409 ele
= dictGetEntryVal(de
);
8410 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8411 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8413 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8418 return (double)age
*log(1+asize
);
8421 /* Try to swap an object that's a good candidate for swapping.
8422 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8423 * to swap any object at all.
8425 * If 'usethreaded' is true, Redis will try to swap the object in background
8426 * using I/O threads. */
8427 static int vmSwapOneObject(int usethreads
) {
8429 struct dictEntry
*best
= NULL
;
8430 double best_swappability
= 0;
8431 redisDb
*best_db
= NULL
;
8434 for (j
= 0; j
< server
.dbnum
; j
++) {
8435 redisDb
*db
= server
.db
+j
;
8436 /* Why maxtries is set to 100?
8437 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8438 * are swappable objects */
8441 if (dictSize(db
->dict
) == 0) continue;
8442 for (i
= 0; i
< 5; i
++) {
8444 double swappability
;
8446 if (maxtries
) maxtries
--;
8447 de
= dictGetRandomKey(db
->dict
);
8448 key
= dictGetEntryKey(de
);
8449 val
= dictGetEntryVal(de
);
8450 /* Only swap objects that are currently in memory.
8452 * Also don't swap shared objects if threaded VM is on, as we
8453 * try to ensure that the main thread does not touch the
8454 * object while the I/O thread is using it, but we can't
8455 * control other keys without adding additional mutex. */
8456 if (key
->storage
!= REDIS_VM_MEMORY
||
8457 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8458 if (maxtries
) i
--; /* don't count this try */
8461 swappability
= computeObjectSwappability(val
);
8462 if (!best
|| swappability
> best_swappability
) {
8464 best_swappability
= swappability
;
8469 if (best
== NULL
) return REDIS_ERR
;
8470 key
= dictGetEntryKey(best
);
8471 val
= dictGetEntryVal(best
);
8473 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8474 key
->ptr
, best_swappability
);
8476 /* Unshare the key if needed */
8477 if (key
->refcount
> 1) {
8478 robj
*newkey
= dupStringObject(key
);
8480 key
= dictGetEntryKey(best
) = newkey
;
8484 vmSwapObjectThreaded(key
,val
,best_db
);
8487 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8488 dictGetEntryVal(best
) = NULL
;
8496 static int vmSwapOneObjectBlocking() {
8497 return vmSwapOneObject(0);
8500 static int vmSwapOneObjectThreaded() {
8501 return vmSwapOneObject(1);
8504 /* Return true if it's safe to swap out objects in a given moment.
8505 * Basically we don't want to swap objects out while there is a BGSAVE
8506 * or a BGAEOREWRITE running in backgroud. */
8507 static int vmCanSwapOut(void) {
8508 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8511 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8512 * and was deleted. Otherwise 0 is returned. */
8513 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8517 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8518 foundkey
= dictGetEntryKey(de
);
8519 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8524 /* =================== Virtual Memory - Threaded I/O ======================= */
8526 static void freeIOJob(iojob
*j
) {
8527 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8528 j
->type
== REDIS_IOJOB_DO_SWAP
||
8529 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8530 decrRefCount(j
->val
);
8531 decrRefCount(j
->key
);
8535 /* Every time a thread finished a Job, it writes a byte into the write side
8536 * of an unix pipe in order to "awake" the main thread, and this function
8538 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8542 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8544 REDIS_NOTUSED(mask
);
8545 REDIS_NOTUSED(privdata
);
8547 /* For every byte we read in the read side of the pipe, there is one
8548 * I/O job completed to process. */
8549 while((retval
= read(fd
,buf
,1)) == 1) {
8553 struct dictEntry
*de
;
8555 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8557 /* Get the processed element (the oldest one) */
8559 assert(listLength(server
.io_processed
) != 0);
8560 if (toprocess
== -1) {
8561 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8562 if (toprocess
<= 0) toprocess
= 1;
8564 ln
= listFirst(server
.io_processed
);
8566 listDelNode(server
.io_processed
,ln
);
8568 /* If this job is marked as canceled, just ignore it */
8573 /* Post process it in the main thread, as there are things we
8574 * can do just here to avoid race conditions and/or invasive locks */
8575 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8576 de
= dictFind(j
->db
->dict
,j
->key
);
8578 key
= dictGetEntryKey(de
);
8579 if (j
->type
== REDIS_IOJOB_LOAD
) {
8582 /* Key loaded, bring it at home */
8583 key
->storage
= REDIS_VM_MEMORY
;
8584 key
->vm
.atime
= server
.unixtime
;
8585 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8586 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8587 (unsigned char*) key
->ptr
);
8588 server
.vm_stats_swapped_objects
--;
8589 server
.vm_stats_swapins
++;
8590 dictGetEntryVal(de
) = j
->val
;
8591 incrRefCount(j
->val
);
8594 /* Handle clients waiting for this key to be loaded. */
8595 handleClientsBlockedOnSwappedKey(db
,key
);
8596 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8597 /* Now we know the amount of pages required to swap this object.
8598 * Let's find some space for it, and queue this task again
8599 * rebranded as REDIS_IOJOB_DO_SWAP. */
8600 if (!vmCanSwapOut() ||
8601 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8603 /* Ooops... no space or we can't swap as there is
8604 * a fork()ed Redis trying to save stuff on disk. */
8606 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8608 /* Note that we need to mark this pages as used now,
8609 * if the job will be canceled, we'll mark them as freed
8611 vmMarkPagesUsed(j
->page
,j
->pages
);
8612 j
->type
= REDIS_IOJOB_DO_SWAP
;
8617 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8620 /* Key swapped. We can finally free some memory. */
8621 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8622 printf("key->storage: %d\n",key
->storage
);
8623 printf("key->name: %s\n",(char*)key
->ptr
);
8624 printf("key->refcount: %d\n",key
->refcount
);
8625 printf("val: %p\n",(void*)j
->val
);
8626 printf("val->type: %d\n",j
->val
->type
);
8627 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8629 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8630 val
= dictGetEntryVal(de
);
8631 key
->vm
.page
= j
->page
;
8632 key
->vm
.usedpages
= j
->pages
;
8633 key
->storage
= REDIS_VM_SWAPPED
;
8634 key
->vtype
= j
->val
->type
;
8635 decrRefCount(val
); /* Deallocate the object from memory. */
8636 dictGetEntryVal(de
) = NULL
;
8637 redisLog(REDIS_DEBUG
,
8638 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8639 (unsigned char*) key
->ptr
,
8640 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8641 server
.vm_stats_swapped_objects
++;
8642 server
.vm_stats_swapouts
++;
8644 /* Put a few more swap requests in queue if we are still
8646 if (trytoswap
&& vmCanSwapOut() &&
8647 zmalloc_used_memory() > server
.vm_max_memory
)
8652 more
= listLength(server
.io_newjobs
) <
8653 (unsigned) server
.vm_max_threads
;
8655 /* Don't waste CPU time if swappable objects are rare. */
8656 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8664 if (processed
== toprocess
) return;
8666 if (retval
< 0 && errno
!= EAGAIN
) {
8667 redisLog(REDIS_WARNING
,
8668 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8673 static void lockThreadedIO(void) {
8674 pthread_mutex_lock(&server
.io_mutex
);
8677 static void unlockThreadedIO(void) {
8678 pthread_mutex_unlock(&server
.io_mutex
);
8681 /* Remove the specified object from the threaded I/O queue if still not
8682 * processed, otherwise make sure to flag it as canceled. */
8683 static void vmCancelThreadedIOJob(robj
*o
) {
8685 server
.io_newjobs
, /* 0 */
8686 server
.io_processing
, /* 1 */
8687 server
.io_processed
/* 2 */
8691 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8694 /* Search for a matching key in one of the queues */
8695 for (i
= 0; i
< 3; i
++) {
8699 listRewind(lists
[i
],&li
);
8700 while ((ln
= listNext(&li
)) != NULL
) {
8701 iojob
*job
= ln
->value
;
8703 if (job
->canceled
) continue; /* Skip this, already canceled. */
8704 if (compareStringObjects(job
->key
,o
) == 0) {
8705 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8706 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8707 /* Mark the pages as free since the swap didn't happened
8708 * or happened but is now discarded. */
8709 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8710 vmMarkPagesFree(job
->page
,job
->pages
);
8711 /* Cancel the job. It depends on the list the job is
8714 case 0: /* io_newjobs */
8715 /* If the job was yet not processed the best thing to do
8716 * is to remove it from the queue at all */
8718 listDelNode(lists
[i
],ln
);
8720 case 1: /* io_processing */
8721 /* Oh Shi- the thread is messing with the Job:
8723 * Probably it's accessing the object if this is a
8724 * PREPARE_SWAP or DO_SWAP job.
8725 * If it's a LOAD job it may be reading from disk and
8726 * if we don't wait for the job to terminate before to
8727 * cancel it, maybe in a few microseconds data can be
8728 * corrupted in this pages. So the short story is:
8730 * Better to wait for the job to move into the
8731 * next queue (processed)... */
8733 /* We try again and again until the job is completed. */
8735 /* But let's wait some time for the I/O thread
8736 * to finish with this job. After all this condition
8737 * should be very rare. */
8740 case 2: /* io_processed */
8741 /* The job was already processed, that's easy...
8742 * just mark it as canceled so that we'll ignore it
8743 * when processing completed jobs. */
8747 /* Finally we have to adjust the storage type of the object
8748 * in order to "UNDO" the operaiton. */
8749 if (o
->storage
== REDIS_VM_LOADING
)
8750 o
->storage
= REDIS_VM_SWAPPED
;
8751 else if (o
->storage
== REDIS_VM_SWAPPING
)
8752 o
->storage
= REDIS_VM_MEMORY
;
8759 assert(1 != 1); /* We should never reach this */
8762 static void *IOThreadEntryPoint(void *arg
) {
8767 pthread_detach(pthread_self());
8769 /* Get a new job to process */
8771 if (listLength(server
.io_newjobs
) == 0) {
8772 /* No new jobs in queue, exit. */
8773 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8774 (long) pthread_self());
8775 server
.io_active_threads
--;
8779 ln
= listFirst(server
.io_newjobs
);
8781 listDelNode(server
.io_newjobs
,ln
);
8782 /* Add the job in the processing queue */
8783 j
->thread
= pthread_self();
8784 listAddNodeTail(server
.io_processing
,j
);
8785 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8787 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8788 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8790 /* Process the Job */
8791 if (j
->type
== REDIS_IOJOB_LOAD
) {
8792 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8793 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8794 FILE *fp
= fopen("/dev/null","w+");
8795 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8797 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8798 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8802 /* Done: insert the job into the processed queue */
8803 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8804 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8806 listDelNode(server
.io_processing
,ln
);
8807 listAddNodeTail(server
.io_processed
,j
);
8810 /* Signal the main thread there is new stuff to process */
8811 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8813 return NULL
; /* never reached */
8816 static void spawnIOThread(void) {
8818 sigset_t mask
, omask
;
8822 sigaddset(&mask
,SIGCHLD
);
8823 sigaddset(&mask
,SIGHUP
);
8824 sigaddset(&mask
,SIGPIPE
);
8825 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8826 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8827 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8831 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8832 server
.io_active_threads
++;
8835 /* We need to wait for the last thread to exit before we are able to
8836 * fork() in order to BGSAVE or BGREWRITEAOF. */
8837 static void waitEmptyIOJobsQueue(void) {
8839 int io_processed_len
;
8842 if (listLength(server
.io_newjobs
) == 0 &&
8843 listLength(server
.io_processing
) == 0 &&
8844 server
.io_active_threads
== 0)
8849 /* While waiting for empty jobs queue condition we post-process some
8850 * finshed job, as I/O threads may be hanging trying to write against
8851 * the io_ready_pipe_write FD but there are so much pending jobs that
8853 io_processed_len
= listLength(server
.io_processed
);
8855 if (io_processed_len
) {
8856 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8857 usleep(1000); /* 1 millisecond */
8859 usleep(10000); /* 10 milliseconds */
8864 static void vmReopenSwapFile(void) {
8865 /* Note: we don't close the old one as we are in the child process
8866 * and don't want to mess at all with the original file object. */
8867 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8868 if (server
.vm_fp
== NULL
) {
8869 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8870 server
.vm_swap_file
);
8873 server
.vm_fd
= fileno(server
.vm_fp
);
8876 /* This function must be called while with threaded IO locked */
8877 static void queueIOJob(iojob
*j
) {
8878 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8879 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8880 listAddNodeTail(server
.io_newjobs
,j
);
8881 if (server
.io_active_threads
< server
.vm_max_threads
)
8885 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8888 assert(key
->storage
== REDIS_VM_MEMORY
);
8889 assert(key
->refcount
== 1);
8891 j
= zmalloc(sizeof(*j
));
8892 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8894 j
->key
= dupStringObject(key
);
8898 j
->thread
= (pthread_t
) -1;
8899 key
->storage
= REDIS_VM_SWAPPING
;
8907 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
8909 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
8910 * If there is not already a job loading the key, it is craeted.
8911 * The key is added to the io_keys list in the client structure, and also
8912 * in the hash table mapping swapped keys to waiting clients, that is,
8913 * server.io_waited_keys. */
8914 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
8915 struct dictEntry
*de
;
8919 /* If the key does not exist or is already in RAM we don't need to
8920 * block the client at all. */
8921 de
= dictFind(c
->db
->dict
,key
);
8922 if (de
== NULL
) return 0;
8923 o
= dictGetEntryKey(de
);
8924 if (o
->storage
== REDIS_VM_MEMORY
) {
8926 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
8927 /* We were swapping the key, undo it! */
8928 vmCancelThreadedIOJob(o
);
8932 /* OK: the key is either swapped, or being loaded just now. */
8934 /* Add the key to the list of keys this client is waiting for.
8935 * This maps clients to keys they are waiting for. */
8936 listAddNodeTail(c
->io_keys
,key
);
8939 /* Add the client to the swapped keys => clients waiting map. */
8940 de
= dictFind(c
->db
->io_keys
,key
);
8944 /* For every key we take a list of clients blocked for it */
8946 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
8948 assert(retval
== DICT_OK
);
8950 l
= dictGetEntryVal(de
);
8952 listAddNodeTail(l
,c
);
8954 /* Are we already loading the key from disk? If not create a job */
8955 if (o
->storage
== REDIS_VM_SWAPPED
) {
8958 o
->storage
= REDIS_VM_LOADING
;
8959 j
= zmalloc(sizeof(*j
));
8960 j
->type
= REDIS_IOJOB_LOAD
;
8962 j
->key
= dupStringObject(key
);
8963 j
->key
->vtype
= o
->vtype
;
8964 j
->page
= o
->vm
.page
;
8967 j
->thread
= (pthread_t
) -1;
8975 /* Preload keys needed for the ZUNION and ZINTER commands. */
8976 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
8978 num
= atoi(c
->argv
[2]->ptr
);
8979 for (i
= 0; i
< num
; i
++) {
8980 waitForSwappedKey(c
,c
->argv
[3+i
]);
8984 /* Is this client attempting to run a command against swapped keys?
8985 * If so, block it ASAP, load the keys in background, then resume it.
8987 * The important idea about this function is that it can fail! If keys will
8988 * still be swapped when the client is resumed, this key lookups will
8989 * just block loading keys from disk. In practical terms this should only
8990 * happen with SORT BY command or if there is a bug in this function.
8992 * Return 1 if the client is marked as blocked, 0 if the client can
8993 * continue as the keys it is going to access appear to be in memory. */
8994 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
8997 if (cmd
->vm_preload_proc
!= NULL
) {
8998 cmd
->vm_preload_proc(c
);
9000 if (cmd
->vm_firstkey
== 0) return 0;
9001 last
= cmd
->vm_lastkey
;
9002 if (last
< 0) last
= c
->argc
+last
;
9003 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9004 waitForSwappedKey(c
,c
->argv
[j
]);
9007 /* If the client was blocked for at least one key, mark it as blocked. */
9008 if (listLength(c
->io_keys
)) {
9009 c
->flags
|= REDIS_IO_WAIT
;
9010 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9011 server
.vm_blocked_clients
++;
9018 /* Remove the 'key' from the list of blocked keys for a given client.
9020 * The function returns 1 when there are no longer blocking keys after
9021 * the current one was removed (and the client can be unblocked). */
9022 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9026 struct dictEntry
*de
;
9028 /* Remove the key from the list of keys this client is waiting for. */
9029 listRewind(c
->io_keys
,&li
);
9030 while ((ln
= listNext(&li
)) != NULL
) {
9031 if (compareStringObjects(ln
->value
,key
) == 0) {
9032 listDelNode(c
->io_keys
,ln
);
9038 /* Remove the client form the key => waiting clients map. */
9039 de
= dictFind(c
->db
->io_keys
,key
);
9041 l
= dictGetEntryVal(de
);
9042 ln
= listSearchKey(l
,c
);
9045 if (listLength(l
) == 0)
9046 dictDelete(c
->db
->io_keys
,key
);
9048 return listLength(c
->io_keys
) == 0;
9051 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9052 struct dictEntry
*de
;
9057 de
= dictFind(db
->io_keys
,key
);
9060 l
= dictGetEntryVal(de
);
9061 len
= listLength(l
);
9062 /* Note: we can't use something like while(listLength(l)) as the list
9063 * can be freed by the calling function when we remove the last element. */
9066 redisClient
*c
= ln
->value
;
9068 if (dontWaitForSwappedKey(c
,key
)) {
9069 /* Put the client in the list of clients ready to go as we
9070 * loaded all the keys about it. */
9071 listAddNodeTail(server
.io_ready_clients
,c
);
9076 /* =========================== Remote Configuration ========================= */
9078 static void configSetCommand(redisClient
*c
) {
9079 robj
*o
= getDecodedObject(c
->argv
[3]);
9080 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9081 zfree(server
.dbfilename
);
9082 server
.dbfilename
= zstrdup(o
->ptr
);
9083 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9084 zfree(server
.requirepass
);
9085 server
.requirepass
= zstrdup(o
->ptr
);
9086 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9087 zfree(server
.masterauth
);
9088 server
.masterauth
= zstrdup(o
->ptr
);
9089 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9090 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9092 addReplySds(c
,sdscatprintf(sdsempty(),
9093 "-ERR not supported CONFIG parameter %s\r\n",
9094 (char*)c
->argv
[2]->ptr
));
9099 addReply(c
,shared
.ok
);
9102 static void configGetCommand(redisClient
*c
) {
9103 robj
*o
= getDecodedObject(c
->argv
[2]);
9104 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9105 char *pattern
= o
->ptr
;
9109 decrRefCount(lenobj
);
9111 if (stringmatch(pattern
,"dbfilename",0)) {
9112 addReplyBulkCString(c
,"dbfilename");
9113 addReplyBulkCString(c
,server
.dbfilename
);
9116 if (stringmatch(pattern
,"requirepass",0)) {
9117 addReplyBulkCString(c
,"requirepass");
9118 addReplyBulkCString(c
,server
.requirepass
);
9121 if (stringmatch(pattern
,"masterauth",0)) {
9122 addReplyBulkCString(c
,"masterauth");
9123 addReplyBulkCString(c
,server
.masterauth
);
9126 if (stringmatch(pattern
,"maxmemory",0)) {
9129 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9130 addReplyBulkCString(c
,"maxmemory");
9131 addReplyBulkCString(c
,buf
);
9135 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9138 static void configCommand(redisClient
*c
) {
9139 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9140 if (c
->argc
!= 4) goto badarity
;
9141 configSetCommand(c
);
9142 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9143 if (c
->argc
!= 3) goto badarity
;
9144 configGetCommand(c
);
9145 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9146 if (c
->argc
!= 2) goto badarity
;
9147 server
.stat_numcommands
= 0;
9148 server
.stat_numconnections
= 0;
9149 server
.stat_expiredkeys
= 0;
9150 server
.stat_starttime
= time(NULL
);
9151 addReply(c
,shared
.ok
);
9153 addReplySds(c
,sdscatprintf(sdsempty(),
9154 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9159 addReplySds(c
,sdscatprintf(sdsempty(),
9160 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9161 (char*) c
->argv
[1]->ptr
));
9164 /* ================================= Debugging ============================== */
9166 static void debugCommand(redisClient
*c
) {
9167 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9169 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9170 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9171 addReply(c
,shared
.err
);
9175 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9176 addReply(c
,shared
.err
);
9179 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9180 addReply(c
,shared
.ok
);
9181 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9183 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9184 addReply(c
,shared
.err
);
9187 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9188 addReply(c
,shared
.ok
);
9189 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9190 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9194 addReply(c
,shared
.nokeyerr
);
9197 key
= dictGetEntryKey(de
);
9198 val
= dictGetEntryVal(de
);
9199 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9200 key
->storage
== REDIS_VM_SWAPPING
)) {
9204 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9205 strenc
= strencoding
[val
->encoding
];
9207 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9210 addReplySds(c
,sdscatprintf(sdsempty(),
9211 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9212 "encoding:%s serializedlength:%lld\r\n",
9213 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9214 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9216 addReplySds(c
,sdscatprintf(sdsempty(),
9217 "+Key at:%p refcount:%d, value swapped at: page %llu "
9218 "using %llu pages\r\n",
9219 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9220 (unsigned long long) key
->vm
.usedpages
));
9222 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9223 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9226 if (!server
.vm_enabled
) {
9227 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9231 addReply(c
,shared
.nokeyerr
);
9234 key
= dictGetEntryKey(de
);
9235 val
= dictGetEntryVal(de
);
9236 /* If the key is shared we want to create a copy */
9237 if (key
->refcount
> 1) {
9238 robj
*newkey
= dupStringObject(key
);
9240 key
= dictGetEntryKey(de
) = newkey
;
9243 if (key
->storage
!= REDIS_VM_MEMORY
) {
9244 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9245 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9246 dictGetEntryVal(de
) = NULL
;
9247 addReply(c
,shared
.ok
);
9249 addReply(c
,shared
.err
);
9252 addReplySds(c
,sdsnew(
9253 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9257 static void _redisAssert(char *estr
, char *file
, int line
) {
9258 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9259 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9260 #ifdef HAVE_BACKTRACE
9261 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9266 /* =================================== Main! ================================ */
9269 int linuxOvercommitMemoryValue(void) {
9270 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9274 if (fgets(buf
,64,fp
) == NULL
) {
9283 void linuxOvercommitMemoryWarning(void) {
9284 if (linuxOvercommitMemoryValue() == 0) {
9285 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9288 #endif /* __linux__ */
9290 static void daemonize(void) {
9294 if (fork() != 0) exit(0); /* parent exits */
9295 setsid(); /* create a new session */
9297 /* Every output goes to /dev/null. If Redis is daemonized but
9298 * the 'logfile' is set to 'stdout' in the configuration file
9299 * it will not log at all. */
9300 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9301 dup2(fd
, STDIN_FILENO
);
9302 dup2(fd
, STDOUT_FILENO
);
9303 dup2(fd
, STDERR_FILENO
);
9304 if (fd
> STDERR_FILENO
) close(fd
);
9306 /* Try to write the pid file */
9307 fp
= fopen(server
.pidfile
,"w");
9309 fprintf(fp
,"%d\n",getpid());
9314 static void version() {
9315 printf("Redis server version %s\n", REDIS_VERSION
);
9319 static void usage() {
9320 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9321 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9325 int main(int argc
, char **argv
) {
9330 if (strcmp(argv
[1], "-v") == 0 ||
9331 strcmp(argv
[1], "--version") == 0) version();
9332 if (strcmp(argv
[1], "--help") == 0) usage();
9333 resetServerSaveParams();
9334 loadServerConfig(argv
[1]);
9335 } else if ((argc
> 2)) {
9338 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9340 if (server
.daemonize
) daemonize();
9342 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9344 linuxOvercommitMemoryWarning();
9347 if (server
.appendonly
) {
9348 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9349 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9351 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9352 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9354 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9355 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9357 aeDeleteEventLoop(server
.el
);
9361 /* ============================= Backtrace support ========================= */
9363 #ifdef HAVE_BACKTRACE
9364 static char *findFuncName(void *pointer
, unsigned long *offset
);
9366 static void *getMcontextEip(ucontext_t
*uc
) {
9367 #if defined(__FreeBSD__)
9368 return (void*) uc
->uc_mcontext
.mc_eip
;
9369 #elif defined(__dietlibc__)
9370 return (void*) uc
->uc_mcontext
.eip
;
9371 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9373 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9375 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9377 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9378 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9379 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9381 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9383 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9384 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9385 #elif defined(__ia64__) /* Linux IA64 */
9386 return (void*) uc
->uc_mcontext
.sc_ip
;
9392 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9394 char **messages
= NULL
;
9395 int i
, trace_size
= 0;
9396 unsigned long offset
=0;
9397 ucontext_t
*uc
= (ucontext_t
*) secret
;
9399 REDIS_NOTUSED(info
);
9401 redisLog(REDIS_WARNING
,
9402 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9403 infostring
= genRedisInfoString();
9404 redisLog(REDIS_WARNING
, "%s",infostring
);
9405 /* It's not safe to sdsfree() the returned string under memory
9406 * corruption conditions. Let it leak as we are going to abort */
9408 trace_size
= backtrace(trace
, 100);
9409 /* overwrite sigaction with caller's address */
9410 if (getMcontextEip(uc
) != NULL
) {
9411 trace
[1] = getMcontextEip(uc
);
9413 messages
= backtrace_symbols(trace
, trace_size
);
9415 for (i
=1; i
<trace_size
; ++i
) {
9416 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9418 p
= strchr(messages
[i
],'+');
9419 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9420 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9422 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9425 /* free(messages); Don't call free() with possibly corrupted memory. */
9429 static void setupSigSegvAction(void) {
9430 struct sigaction act
;
9432 sigemptyset (&act
.sa_mask
);
9433 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9434 * is used. Otherwise, sa_handler is used */
9435 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9436 act
.sa_sigaction
= segvHandler
;
9437 sigaction (SIGSEGV
, &act
, NULL
);
9438 sigaction (SIGBUS
, &act
, NULL
);
9439 sigaction (SIGFPE
, &act
, NULL
);
9440 sigaction (SIGILL
, &act
, NULL
);
9441 sigaction (SIGBUS
, &act
, NULL
);
9445 #include "staticsymbols.h"
9446 /* This function try to convert a pointer into a function name. It's used in
9447 * oreder to provide a backtrace under segmentation fault that's able to
9448 * display functions declared as static (otherwise the backtrace is useless). */
9449 static char *findFuncName(void *pointer
, unsigned long *offset
){
9451 unsigned long off
, minoff
= 0;
9453 /* Try to match against the Symbol with the smallest offset */
9454 for (i
=0; symsTable
[i
].pointer
; i
++) {
9455 unsigned long lp
= (unsigned long) pointer
;
9457 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9458 off
=lp
-symsTable
[i
].pointer
;
9459 if (ret
< 0 || off
< minoff
) {
9465 if (ret
== -1) return NULL
;
9467 return symsTable
[ret
].name
;
9469 #else /* HAVE_BACKTRACE */
9470 static void setupSigSegvAction(void) {
9472 #endif /* HAVE_BACKTRACE */