2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.7"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
337 /* Global server state structure */
342 dict
*sharingpool
; /* Poll used for object sharing */
343 unsigned int sharingpoolsize
;
344 long long dirty
; /* changes to DB from the last save */
346 list
*slaves
, *monitors
;
347 char neterr
[ANET_ERR_LEN
];
349 int cronloops
; /* number of times the cron function run */
350 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
351 time_t lastsave
; /* Unix time of last save succeeede */
352 /* Fields used only for stats */
353 time_t stat_starttime
; /* server start time */
354 long long stat_numcommands
; /* number of processed commands */
355 long long stat_numconnections
; /* number of connections received */
356 long long stat_expiredkeys
; /* number of expired keys */
369 pid_t bgsavechildpid
;
370 pid_t bgrewritechildpid
;
371 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
372 struct saveparam
*saveparams
;
377 char *appendfilename
;
381 /* Replication related */
386 redisClient
*master
; /* client that is master for this slave */
388 unsigned int maxclients
;
389 unsigned long long maxmemory
;
390 unsigned int blpop_blocked_clients
;
391 unsigned int vm_blocked_clients
;
392 /* Sort parameters - qsort_r() is only available under BSD so we
393 * have to take this state global, in order to pass it to sortCompare() */
397 /* Virtual memory configuration */
402 unsigned long long vm_max_memory
;
404 size_t hash_max_zipmap_entries
;
405 size_t hash_max_zipmap_value
;
406 /* Virtual memory state */
409 off_t vm_next_page
; /* Next probably empty page */
410 off_t vm_near_pages
; /* Number of pages allocated sequentially */
411 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
412 time_t unixtime
; /* Unix time sampled every second. */
413 /* Virtual memory I/O threads stuff */
414 /* An I/O thread process an element taken from the io_jobs queue and
415 * put the result of the operation in the io_done list. While the
416 * job is being processed, it's put on io_processing queue. */
417 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
418 list
*io_processing
; /* List of VM I/O jobs being processed */
419 list
*io_processed
; /* List of VM I/O jobs already processed */
420 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
421 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
422 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
423 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
424 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
425 int io_active_threads
; /* Number of running I/O threads */
426 int vm_max_threads
; /* Max number of I/O threads running at the same time */
427 /* Our main thread is blocked on the event loop, locking for sockets ready
428 * to be read or written, so when a threaded I/O operation is ready to be
429 * processed by the main thread, the I/O thread will use a unix pipe to
430 * awake the main thread. The followings are the two pipe FDs. */
431 int io_ready_pipe_read
;
432 int io_ready_pipe_write
;
433 /* Virtual memory stats */
434 unsigned long long vm_stats_used_pages
;
435 unsigned long long vm_stats_swapped_objects
;
436 unsigned long long vm_stats_swapouts
;
437 unsigned long long vm_stats_swapins
;
441 typedef void redisCommandProc(redisClient
*c
);
442 struct redisCommand
{
444 redisCommandProc
*proc
;
447 /* Use a function to determine which keys need to be loaded
448 * in the background prior to executing this command. Takes precedence
449 * over vm_firstkey and others, ignored when NULL */
450 redisCommandProc
*vm_preload_proc
;
451 /* What keys should be loaded in background when calling this command? */
452 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
453 int vm_lastkey
; /* THe last argument that's a key */
454 int vm_keystep
; /* The step between first and last key */
457 struct redisFunctionSym
{
459 unsigned long pointer
;
462 typedef struct _redisSortObject
{
470 typedef struct _redisSortOperation
{
473 } redisSortOperation
;
475 /* ZSETs use a specialized version of Skiplists */
477 typedef struct zskiplistNode
{
478 struct zskiplistNode
**forward
;
479 struct zskiplistNode
*backward
;
485 typedef struct zskiplist
{
486 struct zskiplistNode
*header
, *tail
;
487 unsigned long length
;
491 typedef struct zset
{
496 /* Our shared "common" objects */
498 struct sharedObjectsStruct
{
499 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
500 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
501 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
502 *outofrangeerr
, *plus
,
503 *select0
, *select1
, *select2
, *select3
, *select4
,
504 *select5
, *select6
, *select7
, *select8
, *select9
;
507 /* Global vars that are actally used as constants. The following double
508 * values are used for double on-disk serialization, and are initialized
509 * at runtime to avoid strange compiler optimizations. */
511 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
513 /* VM threaded I/O request message */
514 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
515 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
516 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
517 typedef struct iojob
{
518 int type
; /* Request type, REDIS_IOJOB_* */
519 redisDb
*db
;/* Redis database */
520 robj
*key
; /* This I/O request is about swapping this key */
521 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
522 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
523 off_t page
; /* Swap page where to read/write the object */
524 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
525 int canceled
; /* True if this command was canceled by blocking side of VM */
526 pthread_t thread
; /* ID of the thread processing this entry */
529 /*================================ Prototypes =============================== */
531 static void freeStringObject(robj
*o
);
532 static void freeListObject(robj
*o
);
533 static void freeSetObject(robj
*o
);
534 static void decrRefCount(void *o
);
535 static robj
*createObject(int type
, void *ptr
);
536 static void freeClient(redisClient
*c
);
537 static int rdbLoad(char *filename
);
538 static void addReply(redisClient
*c
, robj
*obj
);
539 static void addReplySds(redisClient
*c
, sds s
);
540 static void incrRefCount(robj
*o
);
541 static int rdbSaveBackground(char *filename
);
542 static robj
*createStringObject(char *ptr
, size_t len
);
543 static robj
*dupStringObject(robj
*o
);
544 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
545 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
546 static int syncWithMaster(void);
547 static robj
*tryObjectSharing(robj
*o
);
548 static int tryObjectEncoding(robj
*o
);
549 static robj
*getDecodedObject(robj
*o
);
550 static int removeExpire(redisDb
*db
, robj
*key
);
551 static int expireIfNeeded(redisDb
*db
, robj
*key
);
552 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
553 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
554 static int deleteKey(redisDb
*db
, robj
*key
);
555 static time_t getExpire(redisDb
*db
, robj
*key
);
556 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
557 static void updateSlavesWaitingBgsave(int bgsaveerr
);
558 static void freeMemoryIfNeeded(void);
559 static int processCommand(redisClient
*c
);
560 static void setupSigSegvAction(void);
561 static void rdbRemoveTempFile(pid_t childpid
);
562 static void aofRemoveTempFile(pid_t childpid
);
563 static size_t stringObjectLen(robj
*o
);
564 static void processInputBuffer(redisClient
*c
);
565 static zskiplist
*zslCreate(void);
566 static void zslFree(zskiplist
*zsl
);
567 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
568 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
569 static void initClientMultiState(redisClient
*c
);
570 static void freeClientMultiState(redisClient
*c
);
571 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
572 static void unblockClientWaitingData(redisClient
*c
);
573 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
574 static void vmInit(void);
575 static void vmMarkPagesFree(off_t page
, off_t count
);
576 static robj
*vmLoadObject(robj
*key
);
577 static robj
*vmPreviewObject(robj
*key
);
578 static int vmSwapOneObjectBlocking(void);
579 static int vmSwapOneObjectThreaded(void);
580 static int vmCanSwapOut(void);
581 static int tryFreeOneObjectFromFreelist(void);
582 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
583 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
584 static void vmCancelThreadedIOJob(robj
*o
);
585 static void lockThreadedIO(void);
586 static void unlockThreadedIO(void);
587 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
588 static void freeIOJob(iojob
*j
);
589 static void queueIOJob(iojob
*j
);
590 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
591 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
592 static void waitEmptyIOJobsQueue(void);
593 static void vmReopenSwapFile(void);
594 static int vmFreePage(off_t page
);
595 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
596 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
597 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
598 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
599 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static struct redisCommand
*lookupCommand(char *name
);
601 static void call(redisClient
*c
, struct redisCommand
*cmd
);
602 static void resetClient(redisClient
*c
);
603 static void convertToRealHash(robj
*o
);
605 static void authCommand(redisClient
*c
);
606 static void pingCommand(redisClient
*c
);
607 static void echoCommand(redisClient
*c
);
608 static void setCommand(redisClient
*c
);
609 static void setnxCommand(redisClient
*c
);
610 static void getCommand(redisClient
*c
);
611 static void delCommand(redisClient
*c
);
612 static void existsCommand(redisClient
*c
);
613 static void incrCommand(redisClient
*c
);
614 static void decrCommand(redisClient
*c
);
615 static void incrbyCommand(redisClient
*c
);
616 static void decrbyCommand(redisClient
*c
);
617 static void selectCommand(redisClient
*c
);
618 static void randomkeyCommand(redisClient
*c
);
619 static void keysCommand(redisClient
*c
);
620 static void dbsizeCommand(redisClient
*c
);
621 static void lastsaveCommand(redisClient
*c
);
622 static void saveCommand(redisClient
*c
);
623 static void bgsaveCommand(redisClient
*c
);
624 static void bgrewriteaofCommand(redisClient
*c
);
625 static void shutdownCommand(redisClient
*c
);
626 static void moveCommand(redisClient
*c
);
627 static void renameCommand(redisClient
*c
);
628 static void renamenxCommand(redisClient
*c
);
629 static void lpushCommand(redisClient
*c
);
630 static void rpushCommand(redisClient
*c
);
631 static void lpopCommand(redisClient
*c
);
632 static void rpopCommand(redisClient
*c
);
633 static void llenCommand(redisClient
*c
);
634 static void lindexCommand(redisClient
*c
);
635 static void lrangeCommand(redisClient
*c
);
636 static void ltrimCommand(redisClient
*c
);
637 static void typeCommand(redisClient
*c
);
638 static void lsetCommand(redisClient
*c
);
639 static void saddCommand(redisClient
*c
);
640 static void sremCommand(redisClient
*c
);
641 static void smoveCommand(redisClient
*c
);
642 static void sismemberCommand(redisClient
*c
);
643 static void scardCommand(redisClient
*c
);
644 static void spopCommand(redisClient
*c
);
645 static void srandmemberCommand(redisClient
*c
);
646 static void sinterCommand(redisClient
*c
);
647 static void sinterstoreCommand(redisClient
*c
);
648 static void sunionCommand(redisClient
*c
);
649 static void sunionstoreCommand(redisClient
*c
);
650 static void sdiffCommand(redisClient
*c
);
651 static void sdiffstoreCommand(redisClient
*c
);
652 static void syncCommand(redisClient
*c
);
653 static void flushdbCommand(redisClient
*c
);
654 static void flushallCommand(redisClient
*c
);
655 static void sortCommand(redisClient
*c
);
656 static void lremCommand(redisClient
*c
);
657 static void rpoplpushcommand(redisClient
*c
);
658 static void infoCommand(redisClient
*c
);
659 static void mgetCommand(redisClient
*c
);
660 static void monitorCommand(redisClient
*c
);
661 static void expireCommand(redisClient
*c
);
662 static void expireatCommand(redisClient
*c
);
663 static void getsetCommand(redisClient
*c
);
664 static void ttlCommand(redisClient
*c
);
665 static void slaveofCommand(redisClient
*c
);
666 static void debugCommand(redisClient
*c
);
667 static void msetCommand(redisClient
*c
);
668 static void msetnxCommand(redisClient
*c
);
669 static void zaddCommand(redisClient
*c
);
670 static void zincrbyCommand(redisClient
*c
);
671 static void zrangeCommand(redisClient
*c
);
672 static void zrangebyscoreCommand(redisClient
*c
);
673 static void zcountCommand(redisClient
*c
);
674 static void zrevrangeCommand(redisClient
*c
);
675 static void zcardCommand(redisClient
*c
);
676 static void zremCommand(redisClient
*c
);
677 static void zscoreCommand(redisClient
*c
);
678 static void zremrangebyscoreCommand(redisClient
*c
);
679 static void multiCommand(redisClient
*c
);
680 static void execCommand(redisClient
*c
);
681 static void discardCommand(redisClient
*c
);
682 static void blpopCommand(redisClient
*c
);
683 static void brpopCommand(redisClient
*c
);
684 static void appendCommand(redisClient
*c
);
685 static void substrCommand(redisClient
*c
);
686 static void zrankCommand(redisClient
*c
);
687 static void zrevrankCommand(redisClient
*c
);
688 static void hsetCommand(redisClient
*c
);
689 static void hgetCommand(redisClient
*c
);
690 static void hdelCommand(redisClient
*c
);
691 static void hlenCommand(redisClient
*c
);
692 static void zremrangebyrankCommand(redisClient
*c
);
693 static void zunionCommand(redisClient
*c
);
694 static void zinterCommand(redisClient
*c
);
695 static void hkeysCommand(redisClient
*c
);
696 static void hvalsCommand(redisClient
*c
);
697 static void hgetallCommand(redisClient
*c
);
698 static void hexistsCommand(redisClient
*c
);
699 static void configCommand(redisClient
*c
);
700 static void hincrbyCommand(redisClient
*c
);
702 /*================================= Globals ================================= */
705 static struct redisServer server
; /* server global state */
706 static struct redisCommand cmdTable
[] = {
707 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
708 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
709 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
710 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
711 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
712 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
713 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
714 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
715 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
716 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
717 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
718 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
719 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
720 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
721 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
723 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
725 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
726 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
727 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
728 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
729 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
730 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
731 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
732 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
733 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
734 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
738 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
739 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
740 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
741 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
742 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
743 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
745 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
746 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
747 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
750 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
751 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
757 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
758 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
759 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
760 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
761 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
762 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
763 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
767 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
768 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
769 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
772 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
773 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
774 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
775 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
779 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
780 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
781 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
782 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
783 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
785 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
786 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
787 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
788 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
789 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
795 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
796 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
799 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
804 {NULL
,NULL
,0,0,NULL
,0,0,0}
809 /*============================ Utility functions ============================ */
811 /* Glob-style pattern matching. */
812 static int stringmatchlen(const char *pattern
, int patternLen
,
813 const char *string
, int stringLen
, int nocase
)
818 while (pattern
[1] == '*') {
823 return 1; /* match */
825 if (stringmatchlen(pattern
+1, patternLen
-1,
826 string
, stringLen
, nocase
))
827 return 1; /* match */
831 return 0; /* no match */
835 return 0; /* no match */
845 not = pattern
[0] == '^';
852 if (pattern
[0] == '\\') {
855 if (pattern
[0] == string
[0])
857 } else if (pattern
[0] == ']') {
859 } else if (patternLen
== 0) {
863 } else if (pattern
[1] == '-' && patternLen
>= 3) {
864 int start
= pattern
[0];
865 int end
= pattern
[2];
873 start
= tolower(start
);
879 if (c
>= start
&& c
<= end
)
883 if (pattern
[0] == string
[0])
886 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
896 return 0; /* no match */
902 if (patternLen
>= 2) {
909 if (pattern
[0] != string
[0])
910 return 0; /* no match */
912 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
913 return 0; /* no match */
921 if (stringLen
== 0) {
922 while(*pattern
== '*') {
929 if (patternLen
== 0 && stringLen
== 0)
934 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
935 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
938 static void redisLog(int level
, const char *fmt
, ...) {
942 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
946 if (level
>= server
.verbosity
) {
952 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
953 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
954 vfprintf(fp
, fmt
, ap
);
960 if (server
.logfile
) fclose(fp
);
963 /*====================== Hash table type implementation ==================== */
965 /* This is an hash table type that uses the SDS dynamic strings libary as
966 * keys and radis objects as values (objects can hold SDS strings,
969 static void dictVanillaFree(void *privdata
, void *val
)
971 DICT_NOTUSED(privdata
);
975 static void dictListDestructor(void *privdata
, void *val
)
977 DICT_NOTUSED(privdata
);
978 listRelease((list
*)val
);
981 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
985 DICT_NOTUSED(privdata
);
987 l1
= sdslen((sds
)key1
);
988 l2
= sdslen((sds
)key2
);
989 if (l1
!= l2
) return 0;
990 return memcmp(key1
, key2
, l1
) == 0;
993 static void dictRedisObjectDestructor(void *privdata
, void *val
)
995 DICT_NOTUSED(privdata
);
997 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1001 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1004 const robj
*o1
= key1
, *o2
= key2
;
1005 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1008 static unsigned int dictObjHash(const void *key
) {
1009 const robj
*o
= key
;
1010 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1013 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1016 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1019 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1020 o2
->encoding
== REDIS_ENCODING_INT
&&
1021 o1
->ptr
== o2
->ptr
) return 1;
1023 o1
= getDecodedObject(o1
);
1024 o2
= getDecodedObject(o2
);
1025 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1031 static unsigned int dictEncObjHash(const void *key
) {
1032 robj
*o
= (robj
*) key
;
1034 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1035 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1037 if (o
->encoding
== REDIS_ENCODING_INT
) {
1041 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1042 return dictGenHashFunction((unsigned char*)buf
, len
);
1046 o
= getDecodedObject(o
);
1047 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1054 /* Sets type and expires */
1055 static dictType setDictType
= {
1056 dictEncObjHash
, /* hash function */
1059 dictEncObjKeyCompare
, /* key compare */
1060 dictRedisObjectDestructor
, /* key destructor */
1061 NULL
/* val destructor */
1064 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1065 static dictType zsetDictType
= {
1066 dictEncObjHash
, /* hash function */
1069 dictEncObjKeyCompare
, /* key compare */
1070 dictRedisObjectDestructor
, /* key destructor */
1071 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1075 static dictType dbDictType
= {
1076 dictObjHash
, /* hash function */
1079 dictObjKeyCompare
, /* key compare */
1080 dictRedisObjectDestructor
, /* key destructor */
1081 dictRedisObjectDestructor
/* val destructor */
1085 static dictType keyptrDictType
= {
1086 dictObjHash
, /* hash function */
1089 dictObjKeyCompare
, /* key compare */
1090 dictRedisObjectDestructor
, /* key destructor */
1091 NULL
/* val destructor */
1094 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1095 static dictType hashDictType
= {
1096 dictEncObjHash
, /* hash function */
1099 dictEncObjKeyCompare
, /* key compare */
1100 dictRedisObjectDestructor
, /* key destructor */
1101 dictRedisObjectDestructor
/* val destructor */
1104 /* Keylist hash table type has unencoded redis objects as keys and
1105 * lists as values. It's used for blocking operations (BLPOP) and to
1106 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1107 static dictType keylistDictType
= {
1108 dictObjHash
, /* hash function */
1111 dictObjKeyCompare
, /* key compare */
1112 dictRedisObjectDestructor
, /* key destructor */
1113 dictListDestructor
/* val destructor */
1116 static void version();
1118 /* ========================= Random utility functions ======================= */
1120 /* Redis generally does not try to recover from out of memory conditions
1121 * when allocating objects or strings, it is not clear if it will be possible
1122 * to report this condition to the client since the networking layer itself
1123 * is based on heap allocation for send buffers, so we simply abort.
1124 * At least the code will be simpler to read... */
1125 static void oom(const char *msg
) {
1126 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1131 /* ====================== Redis server networking stuff ===================== */
1132 static void closeTimedoutClients(void) {
1135 time_t now
= time(NULL
);
1138 listRewind(server
.clients
,&li
);
1139 while ((ln
= listNext(&li
)) != NULL
) {
1140 c
= listNodeValue(ln
);
1141 if (server
.maxidletime
&&
1142 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1143 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1144 (now
- c
->lastinteraction
> server
.maxidletime
))
1146 redisLog(REDIS_VERBOSE
,"Closing idle client");
1148 } else if (c
->flags
& REDIS_BLOCKED
) {
1149 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1150 addReply(c
,shared
.nullmultibulk
);
1151 unblockClientWaitingData(c
);
1157 static int htNeedsResize(dict
*dict
) {
1158 long long size
, used
;
1160 size
= dictSlots(dict
);
1161 used
= dictSize(dict
);
1162 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1163 (used
*100/size
< REDIS_HT_MINFILL
));
1166 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1167 * we resize the hash table to save memory */
1168 static void tryResizeHashTables(void) {
1171 for (j
= 0; j
< server
.dbnum
; j
++) {
1172 if (htNeedsResize(server
.db
[j
].dict
)) {
1173 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1174 dictResize(server
.db
[j
].dict
);
1175 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1177 if (htNeedsResize(server
.db
[j
].expires
))
1178 dictResize(server
.db
[j
].expires
);
1182 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1183 void backgroundSaveDoneHandler(int statloc
) {
1184 int exitcode
= WEXITSTATUS(statloc
);
1185 int bysignal
= WIFSIGNALED(statloc
);
1187 if (!bysignal
&& exitcode
== 0) {
1188 redisLog(REDIS_NOTICE
,
1189 "Background saving terminated with success");
1191 server
.lastsave
= time(NULL
);
1192 } else if (!bysignal
&& exitcode
!= 0) {
1193 redisLog(REDIS_WARNING
, "Background saving error");
1195 redisLog(REDIS_WARNING
,
1196 "Background saving terminated by signal");
1197 rdbRemoveTempFile(server
.bgsavechildpid
);
1199 server
.bgsavechildpid
= -1;
1200 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1201 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1202 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1205 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1207 void backgroundRewriteDoneHandler(int statloc
) {
1208 int exitcode
= WEXITSTATUS(statloc
);
1209 int bysignal
= WIFSIGNALED(statloc
);
1211 if (!bysignal
&& exitcode
== 0) {
1215 redisLog(REDIS_NOTICE
,
1216 "Background append only file rewriting terminated with success");
1217 /* Now it's time to flush the differences accumulated by the parent */
1218 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1219 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1221 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1224 /* Flush our data... */
1225 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1226 (signed) sdslen(server
.bgrewritebuf
)) {
1227 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1231 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1232 /* Now our work is to rename the temp file into the stable file. And
1233 * switch the file descriptor used by the server for append only. */
1234 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1235 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1239 /* Mission completed... almost */
1240 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1241 if (server
.appendfd
!= -1) {
1242 /* If append only is actually enabled... */
1243 close(server
.appendfd
);
1244 server
.appendfd
= fd
;
1246 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1247 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1249 /* If append only is disabled we just generate a dump in this
1250 * format. Why not? */
1253 } else if (!bysignal
&& exitcode
!= 0) {
1254 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1256 redisLog(REDIS_WARNING
,
1257 "Background append only file rewriting terminated by signal");
1260 sdsfree(server
.bgrewritebuf
);
1261 server
.bgrewritebuf
= sdsempty();
1262 aofRemoveTempFile(server
.bgrewritechildpid
);
1263 server
.bgrewritechildpid
= -1;
1266 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1267 int j
, loops
= server
.cronloops
++;
1268 REDIS_NOTUSED(eventLoop
);
1270 REDIS_NOTUSED(clientData
);
1272 /* We take a cached value of the unix time in the global state because
1273 * with virtual memory and aging there is to store the current time
1274 * in objects at every object access, and accuracy is not needed.
1275 * To access a global var is faster than calling time(NULL) */
1276 server
.unixtime
= time(NULL
);
1278 /* Show some info about non-empty databases */
1279 for (j
= 0; j
< server
.dbnum
; j
++) {
1280 long long size
, used
, vkeys
;
1282 size
= dictSlots(server
.db
[j
].dict
);
1283 used
= dictSize(server
.db
[j
].dict
);
1284 vkeys
= dictSize(server
.db
[j
].expires
);
1285 if (!(loops
% 50) && (used
|| vkeys
)) {
1286 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1287 /* dictPrintStats(server.dict); */
1291 /* We don't want to resize the hash tables while a bacground saving
1292 * is in progress: the saving child is created using fork() that is
1293 * implemented with a copy-on-write semantic in most modern systems, so
1294 * if we resize the HT while there is the saving child at work actually
1295 * a lot of memory movements in the parent will cause a lot of pages
1297 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1299 /* Show information about connected clients */
1300 if (!(loops
% 50)) {
1301 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1302 listLength(server
.clients
)-listLength(server
.slaves
),
1303 listLength(server
.slaves
),
1304 zmalloc_used_memory(),
1305 dictSize(server
.sharingpool
));
1308 /* Close connections of timedout clients */
1309 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1310 closeTimedoutClients();
1312 /* Check if a background saving or AOF rewrite in progress terminated */
1313 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1317 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1318 if (pid
== server
.bgsavechildpid
) {
1319 backgroundSaveDoneHandler(statloc
);
1321 backgroundRewriteDoneHandler(statloc
);
1325 /* If there is not a background saving in progress check if
1326 * we have to save now */
1327 time_t now
= time(NULL
);
1328 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1329 struct saveparam
*sp
= server
.saveparams
+j
;
1331 if (server
.dirty
>= sp
->changes
&&
1332 now
-server
.lastsave
> sp
->seconds
) {
1333 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1334 sp
->changes
, sp
->seconds
);
1335 rdbSaveBackground(server
.dbfilename
);
1341 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1342 * will use few CPU cycles if there are few expiring keys, otherwise
1343 * it will get more aggressive to avoid that too much memory is used by
1344 * keys that can be removed from the keyspace. */
1345 for (j
= 0; j
< server
.dbnum
; j
++) {
1347 redisDb
*db
= server
.db
+j
;
1349 /* Continue to expire if at the end of the cycle more than 25%
1350 * of the keys were expired. */
1352 long num
= dictSize(db
->expires
);
1353 time_t now
= time(NULL
);
1356 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1357 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1362 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1363 t
= (time_t) dictGetEntryVal(de
);
1365 deleteKey(db
,dictGetEntryKey(de
));
1367 server
.stat_expiredkeys
++;
1370 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1373 /* Swap a few keys on disk if we are over the memory limit and VM
1374 * is enbled. Try to free objects from the free list first. */
1375 if (vmCanSwapOut()) {
1376 while (server
.vm_enabled
&& zmalloc_used_memory() >
1377 server
.vm_max_memory
)
1381 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1382 retval
= (server
.vm_max_threads
== 0) ?
1383 vmSwapOneObjectBlocking() :
1384 vmSwapOneObjectThreaded();
1385 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1386 zmalloc_used_memory() >
1387 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1389 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1391 /* Note that when using threade I/O we free just one object,
1392 * because anyway when the I/O thread in charge to swap this
1393 * object out will finish, the handler of completed jobs
1394 * will try to swap more objects if we are still out of memory. */
1395 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1399 /* Check if we should connect to a MASTER */
1400 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1401 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1402 if (syncWithMaster() == REDIS_OK
) {
1403 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1409 /* This function gets called every time Redis is entering the
1410 * main loop of the event driven library, that is, before to sleep
1411 * for ready file descriptors. */
1412 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1413 REDIS_NOTUSED(eventLoop
);
1415 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1419 listRewind(server
.io_ready_clients
,&li
);
1420 while((ln
= listNext(&li
))) {
1421 redisClient
*c
= ln
->value
;
1422 struct redisCommand
*cmd
;
1424 /* Resume the client. */
1425 listDelNode(server
.io_ready_clients
,ln
);
1426 c
->flags
&= (~REDIS_IO_WAIT
);
1427 server
.vm_blocked_clients
--;
1428 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1429 readQueryFromClient
, c
);
1430 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1431 assert(cmd
!= NULL
);
1434 /* There may be more data to process in the input buffer. */
1435 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1436 processInputBuffer(c
);
1441 static void createSharedObjects(void) {
1442 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1443 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1444 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1445 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1446 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1447 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1448 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1449 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1450 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1451 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1452 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1453 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1454 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1455 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1456 "-ERR no such key\r\n"));
1457 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1458 "-ERR syntax error\r\n"));
1459 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1460 "-ERR source and destination objects are the same\r\n"));
1461 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1462 "-ERR index out of range\r\n"));
1463 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1464 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1465 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1466 shared
.select0
= createStringObject("select 0\r\n",10);
1467 shared
.select1
= createStringObject("select 1\r\n",10);
1468 shared
.select2
= createStringObject("select 2\r\n",10);
1469 shared
.select3
= createStringObject("select 3\r\n",10);
1470 shared
.select4
= createStringObject("select 4\r\n",10);
1471 shared
.select5
= createStringObject("select 5\r\n",10);
1472 shared
.select6
= createStringObject("select 6\r\n",10);
1473 shared
.select7
= createStringObject("select 7\r\n",10);
1474 shared
.select8
= createStringObject("select 8\r\n",10);
1475 shared
.select9
= createStringObject("select 9\r\n",10);
1478 static void appendServerSaveParams(time_t seconds
, int changes
) {
1479 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1480 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1481 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1482 server
.saveparamslen
++;
1485 static void resetServerSaveParams() {
1486 zfree(server
.saveparams
);
1487 server
.saveparams
= NULL
;
1488 server
.saveparamslen
= 0;
1491 static void initServerConfig() {
1492 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1493 server
.port
= REDIS_SERVERPORT
;
1494 server
.verbosity
= REDIS_VERBOSE
;
1495 server
.maxidletime
= REDIS_MAXIDLETIME
;
1496 server
.saveparams
= NULL
;
1497 server
.logfile
= NULL
; /* NULL = log on standard output */
1498 server
.bindaddr
= NULL
;
1499 server
.glueoutputbuf
= 1;
1500 server
.daemonize
= 0;
1501 server
.appendonly
= 0;
1502 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1503 server
.lastfsync
= time(NULL
);
1504 server
.appendfd
= -1;
1505 server
.appendseldb
= -1; /* Make sure the first time will not match */
1506 server
.pidfile
= zstrdup("/var/run/redis.pid");
1507 server
.dbfilename
= zstrdup("dump.rdb");
1508 server
.appendfilename
= zstrdup("appendonly.aof");
1509 server
.requirepass
= NULL
;
1510 server
.shareobjects
= 0;
1511 server
.rdbcompression
= 1;
1512 server
.sharingpoolsize
= 1024;
1513 server
.maxclients
= 0;
1514 server
.blpop_blocked_clients
= 0;
1515 server
.maxmemory
= 0;
1516 server
.vm_enabled
= 0;
1517 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1518 server
.vm_page_size
= 256; /* 256 bytes per page */
1519 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1520 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1521 server
.vm_max_threads
= 4;
1522 server
.vm_blocked_clients
= 0;
1523 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1524 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1526 resetServerSaveParams();
1528 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1529 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1530 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1531 /* Replication related */
1533 server
.masterauth
= NULL
;
1534 server
.masterhost
= NULL
;
1535 server
.masterport
= 6379;
1536 server
.master
= NULL
;
1537 server
.replstate
= REDIS_REPL_NONE
;
1539 /* Double constants initialization */
1541 R_PosInf
= 1.0/R_Zero
;
1542 R_NegInf
= -1.0/R_Zero
;
1543 R_Nan
= R_Zero
/R_Zero
;
1546 static void initServer() {
1549 signal(SIGHUP
, SIG_IGN
);
1550 signal(SIGPIPE
, SIG_IGN
);
1551 setupSigSegvAction();
1553 server
.devnull
= fopen("/dev/null","w");
1554 if (server
.devnull
== NULL
) {
1555 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1558 server
.clients
= listCreate();
1559 server
.slaves
= listCreate();
1560 server
.monitors
= listCreate();
1561 server
.objfreelist
= listCreate();
1562 createSharedObjects();
1563 server
.el
= aeCreateEventLoop();
1564 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1565 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1566 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1567 if (server
.fd
== -1) {
1568 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1571 for (j
= 0; j
< server
.dbnum
; j
++) {
1572 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1573 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1574 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1575 if (server
.vm_enabled
)
1576 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1577 server
.db
[j
].id
= j
;
1579 server
.cronloops
= 0;
1580 server
.bgsavechildpid
= -1;
1581 server
.bgrewritechildpid
= -1;
1582 server
.bgrewritebuf
= sdsempty();
1583 server
.lastsave
= time(NULL
);
1585 server
.stat_numcommands
= 0;
1586 server
.stat_numconnections
= 0;
1587 server
.stat_expiredkeys
= 0;
1588 server
.stat_starttime
= time(NULL
);
1589 server
.unixtime
= time(NULL
);
1590 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1591 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1592 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1594 if (server
.appendonly
) {
1595 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1596 if (server
.appendfd
== -1) {
1597 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1603 if (server
.vm_enabled
) vmInit();
1606 /* Empty the whole database */
1607 static long long emptyDb() {
1609 long long removed
= 0;
1611 for (j
= 0; j
< server
.dbnum
; j
++) {
1612 removed
+= dictSize(server
.db
[j
].dict
);
1613 dictEmpty(server
.db
[j
].dict
);
1614 dictEmpty(server
.db
[j
].expires
);
1619 static int yesnotoi(char *s
) {
1620 if (!strcasecmp(s
,"yes")) return 1;
1621 else if (!strcasecmp(s
,"no")) return 0;
1625 /* I agree, this is a very rudimental way to load a configuration...
1626 will improve later if the config gets more complex */
1627 static void loadServerConfig(char *filename
) {
1629 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1632 char *errormsg
= "Fatal error, can't open config file '%s'";
1633 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1634 sprintf(errorbuf
, errormsg
, filename
);
1636 if (filename
[0] == '-' && filename
[1] == '\0')
1639 if ((fp
= fopen(filename
,"r")) == NULL
) {
1640 redisLog(REDIS_WARNING
, errorbuf
);
1645 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1651 line
= sdstrim(line
," \t\r\n");
1653 /* Skip comments and blank lines*/
1654 if (line
[0] == '#' || line
[0] == '\0') {
1659 /* Split into arguments */
1660 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1661 sdstolower(argv
[0]);
1663 /* Execute config directives */
1664 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1665 server
.maxidletime
= atoi(argv
[1]);
1666 if (server
.maxidletime
< 0) {
1667 err
= "Invalid timeout value"; goto loaderr
;
1669 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1670 server
.port
= atoi(argv
[1]);
1671 if (server
.port
< 1 || server
.port
> 65535) {
1672 err
= "Invalid port"; goto loaderr
;
1674 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1675 server
.bindaddr
= zstrdup(argv
[1]);
1676 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1677 int seconds
= atoi(argv
[1]);
1678 int changes
= atoi(argv
[2]);
1679 if (seconds
< 1 || changes
< 0) {
1680 err
= "Invalid save parameters"; goto loaderr
;
1682 appendServerSaveParams(seconds
,changes
);
1683 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1684 if (chdir(argv
[1]) == -1) {
1685 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1686 argv
[1], strerror(errno
));
1689 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1690 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1691 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1692 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1693 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1695 err
= "Invalid log level. Must be one of debug, notice, warning";
1698 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1701 server
.logfile
= zstrdup(argv
[1]);
1702 if (!strcasecmp(server
.logfile
,"stdout")) {
1703 zfree(server
.logfile
);
1704 server
.logfile
= NULL
;
1706 if (server
.logfile
) {
1707 /* Test if we are able to open the file. The server will not
1708 * be able to abort just for this problem later... */
1709 logfp
= fopen(server
.logfile
,"a");
1710 if (logfp
== NULL
) {
1711 err
= sdscatprintf(sdsempty(),
1712 "Can't open the log file: %s", strerror(errno
));
1717 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1718 server
.dbnum
= atoi(argv
[1]);
1719 if (server
.dbnum
< 1) {
1720 err
= "Invalid number of databases"; goto loaderr
;
1722 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1723 loadServerConfig(argv
[1]);
1724 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1725 server
.maxclients
= atoi(argv
[1]);
1726 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1727 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1728 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1729 server
.masterhost
= sdsnew(argv
[1]);
1730 server
.masterport
= atoi(argv
[2]);
1731 server
.replstate
= REDIS_REPL_CONNECT
;
1732 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1733 server
.masterauth
= zstrdup(argv
[1]);
1734 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1735 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1736 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1738 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1739 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1740 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1742 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1743 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1744 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1746 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1747 server
.sharingpoolsize
= atoi(argv
[1]);
1748 if (server
.sharingpoolsize
< 1) {
1749 err
= "invalid object sharing pool size"; goto loaderr
;
1751 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1752 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1753 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1755 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1756 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1757 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1759 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1760 if (!strcasecmp(argv
[1],"no")) {
1761 server
.appendfsync
= APPENDFSYNC_NO
;
1762 } else if (!strcasecmp(argv
[1],"always")) {
1763 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1764 } else if (!strcasecmp(argv
[1],"everysec")) {
1765 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1767 err
= "argument must be 'no', 'always' or 'everysec'";
1770 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1771 server
.requirepass
= zstrdup(argv
[1]);
1772 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1773 zfree(server
.pidfile
);
1774 server
.pidfile
= zstrdup(argv
[1]);
1775 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1776 zfree(server
.dbfilename
);
1777 server
.dbfilename
= zstrdup(argv
[1]);
1778 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1779 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1780 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1782 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1783 zfree(server
.vm_swap_file
);
1784 server
.vm_swap_file
= zstrdup(argv
[1]);
1785 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1786 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1787 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1788 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1789 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1790 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1791 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1792 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1793 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1794 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1795 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1796 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1797 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1798 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1800 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1802 for (j
= 0; j
< argc
; j
++)
1807 if (fp
!= stdin
) fclose(fp
);
1811 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1812 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1813 fprintf(stderr
, ">>> '%s'\n", line
);
1814 fprintf(stderr
, "%s\n", err
);
1818 static void freeClientArgv(redisClient
*c
) {
1821 for (j
= 0; j
< c
->argc
; j
++)
1822 decrRefCount(c
->argv
[j
]);
1823 for (j
= 0; j
< c
->mbargc
; j
++)
1824 decrRefCount(c
->mbargv
[j
]);
1829 static void freeClient(redisClient
*c
) {
1832 /* Note that if the client we are freeing is blocked into a blocking
1833 * call, we have to set querybuf to NULL *before* to call
1834 * unblockClientWaitingData() to avoid processInputBuffer() will get
1835 * called. Also it is important to remove the file events after
1836 * this, because this call adds the READABLE event. */
1837 sdsfree(c
->querybuf
);
1839 if (c
->flags
& REDIS_BLOCKED
)
1840 unblockClientWaitingData(c
);
1842 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1843 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1844 listRelease(c
->reply
);
1847 /* Remove from the list of clients */
1848 ln
= listSearchKey(server
.clients
,c
);
1849 redisAssert(ln
!= NULL
);
1850 listDelNode(server
.clients
,ln
);
1851 /* Remove from the list of clients waiting for swapped keys */
1852 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1853 ln
= listSearchKey(server
.io_ready_clients
,c
);
1855 listDelNode(server
.io_ready_clients
,ln
);
1856 server
.vm_blocked_clients
--;
1859 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1860 ln
= listFirst(c
->io_keys
);
1861 dontWaitForSwappedKey(c
,ln
->value
);
1863 listRelease(c
->io_keys
);
1865 if (c
->flags
& REDIS_SLAVE
) {
1866 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1868 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1869 ln
= listSearchKey(l
,c
);
1870 redisAssert(ln
!= NULL
);
1873 if (c
->flags
& REDIS_MASTER
) {
1874 server
.master
= NULL
;
1875 server
.replstate
= REDIS_REPL_CONNECT
;
1879 freeClientMultiState(c
);
1883 #define GLUEREPLY_UP_TO (1024)
1884 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1886 char buf
[GLUEREPLY_UP_TO
];
1891 listRewind(c
->reply
,&li
);
1892 while((ln
= listNext(&li
))) {
1896 objlen
= sdslen(o
->ptr
);
1897 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1898 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1900 listDelNode(c
->reply
,ln
);
1902 if (copylen
== 0) return;
1906 /* Now the output buffer is empty, add the new single element */
1907 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1908 listAddNodeHead(c
->reply
,o
);
1911 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1912 redisClient
*c
= privdata
;
1913 int nwritten
= 0, totwritten
= 0, objlen
;
1916 REDIS_NOTUSED(mask
);
1918 /* Use writev() if we have enough buffers to send */
1919 if (!server
.glueoutputbuf
&&
1920 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1921 !(c
->flags
& REDIS_MASTER
))
1923 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1927 while(listLength(c
->reply
)) {
1928 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1929 glueReplyBuffersIfNeeded(c
);
1931 o
= listNodeValue(listFirst(c
->reply
));
1932 objlen
= sdslen(o
->ptr
);
1935 listDelNode(c
->reply
,listFirst(c
->reply
));
1939 if (c
->flags
& REDIS_MASTER
) {
1940 /* Don't reply to a master */
1941 nwritten
= objlen
- c
->sentlen
;
1943 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1944 if (nwritten
<= 0) break;
1946 c
->sentlen
+= nwritten
;
1947 totwritten
+= nwritten
;
1948 /* If we fully sent the object on head go to the next one */
1949 if (c
->sentlen
== objlen
) {
1950 listDelNode(c
->reply
,listFirst(c
->reply
));
1953 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1954 * bytes, in a single threaded server it's a good idea to serve
1955 * other clients as well, even if a very large request comes from
1956 * super fast link that is always able to accept data (in real world
1957 * scenario think about 'KEYS *' against the loopback interfae) */
1958 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1960 if (nwritten
== -1) {
1961 if (errno
== EAGAIN
) {
1964 redisLog(REDIS_VERBOSE
,
1965 "Error writing to client: %s", strerror(errno
));
1970 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1971 if (listLength(c
->reply
) == 0) {
1973 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1977 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
1979 redisClient
*c
= privdata
;
1980 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
1982 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
1983 int offset
, ion
= 0;
1985 REDIS_NOTUSED(mask
);
1988 while (listLength(c
->reply
)) {
1989 offset
= c
->sentlen
;
1993 /* fill-in the iov[] array */
1994 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
1995 o
= listNodeValue(node
);
1996 objlen
= sdslen(o
->ptr
);
1998 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2001 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2002 break; /* no more iovecs */
2004 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2005 iov
[ion
].iov_len
= objlen
- offset
;
2006 willwrite
+= objlen
- offset
;
2007 offset
= 0; /* just for the first item */
2014 /* write all collected blocks at once */
2015 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2016 if (errno
!= EAGAIN
) {
2017 redisLog(REDIS_VERBOSE
,
2018 "Error writing to client: %s", strerror(errno
));
2025 totwritten
+= nwritten
;
2026 offset
= c
->sentlen
;
2028 /* remove written robjs from c->reply */
2029 while (nwritten
&& listLength(c
->reply
)) {
2030 o
= listNodeValue(listFirst(c
->reply
));
2031 objlen
= sdslen(o
->ptr
);
2033 if(nwritten
>= objlen
- offset
) {
2034 listDelNode(c
->reply
, listFirst(c
->reply
));
2035 nwritten
-= objlen
- offset
;
2039 c
->sentlen
+= nwritten
;
2047 c
->lastinteraction
= time(NULL
);
2049 if (listLength(c
->reply
) == 0) {
2051 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2055 static struct redisCommand
*lookupCommand(char *name
) {
2057 while(cmdTable
[j
].name
!= NULL
) {
2058 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2064 /* resetClient prepare the client to process the next command */
2065 static void resetClient(redisClient
*c
) {
2071 /* Call() is the core of Redis execution of a command */
2072 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2075 dirty
= server
.dirty
;
2077 if (server
.appendonly
&& server
.dirty
-dirty
)
2078 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2079 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2080 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2081 if (listLength(server
.monitors
))
2082 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2083 server
.stat_numcommands
++;
2086 /* If this function gets called we already read a whole
2087 * command, argments are in the client argv/argc fields.
2088 * processCommand() execute the command or prepare the
2089 * server for a bulk read from the client.
2091 * If 1 is returned the client is still alive and valid and
2092 * and other operations can be performed by the caller. Otherwise
2093 * if 0 is returned the client was destroied (i.e. after QUIT). */
2094 static int processCommand(redisClient
*c
) {
2095 struct redisCommand
*cmd
;
2097 /* Free some memory if needed (maxmemory setting) */
2098 if (server
.maxmemory
) freeMemoryIfNeeded();
2100 /* Handle the multi bulk command type. This is an alternative protocol
2101 * supported by Redis in order to receive commands that are composed of
2102 * multiple binary-safe "bulk" arguments. The latency of processing is
2103 * a bit higher but this allows things like multi-sets, so if this
2104 * protocol is used only for MSET and similar commands this is a big win. */
2105 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2106 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2107 if (c
->multibulk
<= 0) {
2111 decrRefCount(c
->argv
[c
->argc
-1]);
2115 } else if (c
->multibulk
) {
2116 if (c
->bulklen
== -1) {
2117 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2118 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2122 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2123 decrRefCount(c
->argv
[0]);
2124 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2126 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2131 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2135 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2136 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2140 if (c
->multibulk
== 0) {
2144 /* Here we need to swap the multi-bulk argc/argv with the
2145 * normal argc/argv of the client structure. */
2147 c
->argv
= c
->mbargv
;
2148 c
->mbargv
= auxargv
;
2151 c
->argc
= c
->mbargc
;
2152 c
->mbargc
= auxargc
;
2154 /* We need to set bulklen to something different than -1
2155 * in order for the code below to process the command without
2156 * to try to read the last argument of a bulk command as
2157 * a special argument. */
2159 /* continue below and process the command */
2166 /* -- end of multi bulk commands processing -- */
2168 /* The QUIT command is handled as a special case. Normal command
2169 * procs are unable to close the client connection safely */
2170 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2175 /* Now lookup the command and check ASAP about trivial error conditions
2176 * such wrong arity, bad command name and so forth. */
2177 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2180 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2181 (char*)c
->argv
[0]->ptr
));
2184 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2185 (c
->argc
< -cmd
->arity
)) {
2187 sdscatprintf(sdsempty(),
2188 "-ERR wrong number of arguments for '%s' command\r\n",
2192 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2193 /* This is a bulk command, we have to read the last argument yet. */
2194 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2196 decrRefCount(c
->argv
[c
->argc
-1]);
2197 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2199 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2204 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2205 /* It is possible that the bulk read is already in the
2206 * buffer. Check this condition and handle it accordingly.
2207 * This is just a fast path, alternative to call processInputBuffer().
2208 * It's a good idea since the code is small and this condition
2209 * happens most of the times. */
2210 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2211 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2213 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2215 /* Otherwise return... there is to read the last argument
2216 * from the socket. */
2220 /* Let's try to share objects on the command arguments vector */
2221 if (server
.shareobjects
) {
2223 for(j
= 1; j
< c
->argc
; j
++)
2224 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2226 /* Let's try to encode the bulk object to save space. */
2227 if (cmd
->flags
& REDIS_CMD_BULK
)
2228 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2230 /* Check if the user is authenticated */
2231 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2232 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2237 /* Handle the maxmemory directive */
2238 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2239 zmalloc_used_memory() > server
.maxmemory
)
2241 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2246 /* Exec the command */
2247 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2248 queueMultiCommand(c
,cmd
);
2249 addReply(c
,shared
.queued
);
2251 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2252 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2256 /* Prepare the client for the next command */
2261 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2266 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2267 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2268 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2269 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2272 if (argc
<= REDIS_STATIC_ARGS
) {
2275 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2278 lenobj
= createObject(REDIS_STRING
,
2279 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2280 lenobj
->refcount
= 0;
2281 outv
[outc
++] = lenobj
;
2282 for (j
= 0; j
< argc
; j
++) {
2283 lenobj
= createObject(REDIS_STRING
,
2284 sdscatprintf(sdsempty(),"$%lu\r\n",
2285 (unsigned long) stringObjectLen(argv
[j
])));
2286 lenobj
->refcount
= 0;
2287 outv
[outc
++] = lenobj
;
2288 outv
[outc
++] = argv
[j
];
2289 outv
[outc
++] = shared
.crlf
;
2292 /* Increment all the refcounts at start and decrement at end in order to
2293 * be sure to free objects if there is no slave in a replication state
2294 * able to be feed with commands */
2295 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2296 listRewind(slaves
,&li
);
2297 while((ln
= listNext(&li
))) {
2298 redisClient
*slave
= ln
->value
;
2300 /* Don't feed slaves that are still waiting for BGSAVE to start */
2301 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2303 /* Feed all the other slaves, MONITORs and so on */
2304 if (slave
->slaveseldb
!= dictid
) {
2308 case 0: selectcmd
= shared
.select0
; break;
2309 case 1: selectcmd
= shared
.select1
; break;
2310 case 2: selectcmd
= shared
.select2
; break;
2311 case 3: selectcmd
= shared
.select3
; break;
2312 case 4: selectcmd
= shared
.select4
; break;
2313 case 5: selectcmd
= shared
.select5
; break;
2314 case 6: selectcmd
= shared
.select6
; break;
2315 case 7: selectcmd
= shared
.select7
; break;
2316 case 8: selectcmd
= shared
.select8
; break;
2317 case 9: selectcmd
= shared
.select9
; break;
2319 selectcmd
= createObject(REDIS_STRING
,
2320 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2321 selectcmd
->refcount
= 0;
2324 addReply(slave
,selectcmd
);
2325 slave
->slaveseldb
= dictid
;
2327 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2329 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2330 if (outv
!= static_outv
) zfree(outv
);
2333 static void processInputBuffer(redisClient
*c
) {
2335 /* Before to process the input buffer, make sure the client is not
2336 * waitig for a blocking operation such as BLPOP. Note that the first
2337 * iteration the client is never blocked, otherwise the processInputBuffer
2338 * would not be called at all, but after the execution of the first commands
2339 * in the input buffer the client may be blocked, and the "goto again"
2340 * will try to reiterate. The following line will make it return asap. */
2341 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2342 if (c
->bulklen
== -1) {
2343 /* Read the first line of the query */
2344 char *p
= strchr(c
->querybuf
,'\n');
2351 query
= c
->querybuf
;
2352 c
->querybuf
= sdsempty();
2353 querylen
= 1+(p
-(query
));
2354 if (sdslen(query
) > querylen
) {
2355 /* leave data after the first line of the query in the buffer */
2356 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2358 *p
= '\0'; /* remove "\n" */
2359 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2360 sdsupdatelen(query
);
2362 /* Now we can split the query in arguments */
2363 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2366 if (c
->argv
) zfree(c
->argv
);
2367 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2369 for (j
= 0; j
< argc
; j
++) {
2370 if (sdslen(argv
[j
])) {
2371 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2379 /* Execute the command. If the client is still valid
2380 * after processCommand() return and there is something
2381 * on the query buffer try to process the next command. */
2382 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2384 /* Nothing to process, argc == 0. Just process the query
2385 * buffer if it's not empty or return to the caller */
2386 if (sdslen(c
->querybuf
)) goto again
;
2389 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2390 redisLog(REDIS_VERBOSE
, "Client protocol error");
2395 /* Bulk read handling. Note that if we are at this point
2396 the client already sent a command terminated with a newline,
2397 we are reading the bulk data that is actually the last
2398 argument of the command. */
2399 int qbl
= sdslen(c
->querybuf
);
2401 if (c
->bulklen
<= qbl
) {
2402 /* Copy everything but the final CRLF as final argument */
2403 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2405 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2406 /* Process the command. If the client is still valid after
2407 * the processing and there is more data in the buffer
2408 * try to parse it. */
2409 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2415 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2416 redisClient
*c
= (redisClient
*) privdata
;
2417 char buf
[REDIS_IOBUF_LEN
];
2420 REDIS_NOTUSED(mask
);
2422 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2424 if (errno
== EAGAIN
) {
2427 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2431 } else if (nread
== 0) {
2432 redisLog(REDIS_VERBOSE
, "Client closed connection");
2437 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2438 c
->lastinteraction
= time(NULL
);
2442 processInputBuffer(c
);
2445 static int selectDb(redisClient
*c
, int id
) {
2446 if (id
< 0 || id
>= server
.dbnum
)
2448 c
->db
= &server
.db
[id
];
2452 static void *dupClientReplyValue(void *o
) {
2453 incrRefCount((robj
*)o
);
2457 static redisClient
*createClient(int fd
) {
2458 redisClient
*c
= zmalloc(sizeof(*c
));
2460 anetNonBlock(NULL
,fd
);
2461 anetTcpNoDelay(NULL
,fd
);
2462 if (!c
) return NULL
;
2465 c
->querybuf
= sdsempty();
2474 c
->lastinteraction
= time(NULL
);
2475 c
->authenticated
= 0;
2476 c
->replstate
= REDIS_REPL_NONE
;
2477 c
->reply
= listCreate();
2478 listSetFreeMethod(c
->reply
,decrRefCount
);
2479 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2480 c
->blockingkeys
= NULL
;
2481 c
->blockingkeysnum
= 0;
2482 c
->io_keys
= listCreate();
2483 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2484 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2485 readQueryFromClient
, c
) == AE_ERR
) {
2489 listAddNodeTail(server
.clients
,c
);
2490 initClientMultiState(c
);
2494 static void addReply(redisClient
*c
, robj
*obj
) {
2495 if (listLength(c
->reply
) == 0 &&
2496 (c
->replstate
== REDIS_REPL_NONE
||
2497 c
->replstate
== REDIS_REPL_ONLINE
) &&
2498 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2499 sendReplyToClient
, c
) == AE_ERR
) return;
2501 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2502 obj
= dupStringObject(obj
);
2503 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2505 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2508 static void addReplySds(redisClient
*c
, sds s
) {
2509 robj
*o
= createObject(REDIS_STRING
,s
);
2514 static void addReplyDouble(redisClient
*c
, double d
) {
2517 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2518 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2519 (unsigned long) strlen(buf
),buf
));
2522 static void addReplyLong(redisClient
*c
, long l
) {
2527 addReply(c
,shared
.czero
);
2529 } else if (l
== 1) {
2530 addReply(c
,shared
.cone
);
2533 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2534 addReplySds(c
,sdsnewlen(buf
,len
));
2537 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2542 addReply(c
,shared
.czero
);
2544 } else if (ul
== 1) {
2545 addReply(c
,shared
.cone
);
2548 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2549 addReplySds(c
,sdsnewlen(buf
,len
));
2552 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2555 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2556 len
= sdslen(obj
->ptr
);
2558 long n
= (long)obj
->ptr
;
2560 /* Compute how many bytes will take this integer as a radix 10 string */
2566 while((n
= n
/10) != 0) {
2570 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2573 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2574 addReplyBulkLen(c
,obj
);
2576 addReply(c
,shared
.crlf
);
2579 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2580 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2582 addReply(c
,shared
.nullbulk
);
2584 robj
*o
= createStringObject(s
,strlen(s
));
2590 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2595 REDIS_NOTUSED(mask
);
2596 REDIS_NOTUSED(privdata
);
2598 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2599 if (cfd
== AE_ERR
) {
2600 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2603 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2604 if ((c
= createClient(cfd
)) == NULL
) {
2605 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2606 close(cfd
); /* May be already closed, just ingore errors */
2609 /* If maxclient directive is set and this is one client more... close the
2610 * connection. Note that we create the client instead to check before
2611 * for this condition, since now the socket is already set in nonblocking
2612 * mode and we can send an error for free using the Kernel I/O */
2613 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2614 char *err
= "-ERR max number of clients reached\r\n";
2616 /* That's a best effort error message, don't check write errors */
2617 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2618 /* Nothing to do, Just to avoid the warning... */
2623 server
.stat_numconnections
++;
2626 /* ======================= Redis objects implementation ===================== */
2628 static robj
*createObject(int type
, void *ptr
) {
2631 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2632 if (listLength(server
.objfreelist
)) {
2633 listNode
*head
= listFirst(server
.objfreelist
);
2634 o
= listNodeValue(head
);
2635 listDelNode(server
.objfreelist
,head
);
2636 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2638 if (server
.vm_enabled
) {
2639 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2640 o
= zmalloc(sizeof(*o
));
2642 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2646 o
->encoding
= REDIS_ENCODING_RAW
;
2649 if (server
.vm_enabled
) {
2650 /* Note that this code may run in the context of an I/O thread
2651 * and accessing to server.unixtime in theory is an error
2652 * (no locks). But in practice this is safe, and even if we read
2653 * garbage Redis will not fail, as it's just a statistical info */
2654 o
->vm
.atime
= server
.unixtime
;
2655 o
->storage
= REDIS_VM_MEMORY
;
2660 static robj
*createStringObject(char *ptr
, size_t len
) {
2661 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2664 static robj
*dupStringObject(robj
*o
) {
2665 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2666 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2669 static robj
*createListObject(void) {
2670 list
*l
= listCreate();
2672 listSetFreeMethod(l
,decrRefCount
);
2673 return createObject(REDIS_LIST
,l
);
2676 static robj
*createSetObject(void) {
2677 dict
*d
= dictCreate(&setDictType
,NULL
);
2678 return createObject(REDIS_SET
,d
);
2681 static robj
*createHashObject(void) {
2682 /* All the Hashes start as zipmaps. Will be automatically converted
2683 * into hash tables if there are enough elements or big elements
2685 unsigned char *zm
= zipmapNew();
2686 robj
*o
= createObject(REDIS_HASH
,zm
);
2687 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2691 static robj
*createZsetObject(void) {
2692 zset
*zs
= zmalloc(sizeof(*zs
));
2694 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2695 zs
->zsl
= zslCreate();
2696 return createObject(REDIS_ZSET
,zs
);
2699 static void freeStringObject(robj
*o
) {
2700 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2705 static void freeListObject(robj
*o
) {
2706 listRelease((list
*) o
->ptr
);
2709 static void freeSetObject(robj
*o
) {
2710 dictRelease((dict
*) o
->ptr
);
2713 static void freeZsetObject(robj
*o
) {
2716 dictRelease(zs
->dict
);
2721 static void freeHashObject(robj
*o
) {
2722 switch (o
->encoding
) {
2723 case REDIS_ENCODING_HT
:
2724 dictRelease((dict
*) o
->ptr
);
2726 case REDIS_ENCODING_ZIPMAP
:
2735 static void incrRefCount(robj
*o
) {
2736 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2740 static void decrRefCount(void *obj
) {
2743 /* Object is a key of a swapped out value, or in the process of being
2745 if (server
.vm_enabled
&&
2746 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2748 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2749 redisAssert(o
->refcount
== 1);
2751 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2752 redisAssert(o
->type
== REDIS_STRING
);
2753 freeStringObject(o
);
2754 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2755 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2756 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2757 !listAddNodeHead(server
.objfreelist
,o
))
2759 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2760 server
.vm_stats_swapped_objects
--;
2763 /* Object is in memory, or in the process of being swapped out. */
2764 if (--(o
->refcount
) == 0) {
2765 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2766 vmCancelThreadedIOJob(obj
);
2768 case REDIS_STRING
: freeStringObject(o
); break;
2769 case REDIS_LIST
: freeListObject(o
); break;
2770 case REDIS_SET
: freeSetObject(o
); break;
2771 case REDIS_ZSET
: freeZsetObject(o
); break;
2772 case REDIS_HASH
: freeHashObject(o
); break;
2773 default: redisAssert(0); break;
2775 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2776 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2777 !listAddNodeHead(server
.objfreelist
,o
))
2779 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2783 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2784 dictEntry
*de
= dictFind(db
->dict
,key
);
2786 robj
*key
= dictGetEntryKey(de
);
2787 robj
*val
= dictGetEntryVal(de
);
2789 if (server
.vm_enabled
) {
2790 if (key
->storage
== REDIS_VM_MEMORY
||
2791 key
->storage
== REDIS_VM_SWAPPING
)
2793 /* If we were swapping the object out, stop it, this key
2795 if (key
->storage
== REDIS_VM_SWAPPING
)
2796 vmCancelThreadedIOJob(key
);
2797 /* Update the access time of the key for the aging algorithm. */
2798 key
->vm
.atime
= server
.unixtime
;
2800 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2802 /* Our value was swapped on disk. Bring it at home. */
2803 redisAssert(val
== NULL
);
2804 val
= vmLoadObject(key
);
2805 dictGetEntryVal(de
) = val
;
2807 /* Clients blocked by the VM subsystem may be waiting for
2809 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2818 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2819 expireIfNeeded(db
,key
);
2820 return lookupKey(db
,key
);
2823 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2824 deleteIfVolatile(db
,key
);
2825 return lookupKey(db
,key
);
2828 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2829 robj
*o
= lookupKeyRead(c
->db
, key
);
2830 if (!o
) addReply(c
,reply
);
2834 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2835 robj
*o
= lookupKeyWrite(c
->db
, key
);
2836 if (!o
) addReply(c
,reply
);
2840 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2841 if (o
->type
!= type
) {
2842 addReply(c
,shared
.wrongtypeerr
);
2848 static int deleteKey(redisDb
*db
, robj
*key
) {
2851 /* We need to protect key from destruction: after the first dictDelete()
2852 * it may happen that 'key' is no longer valid if we don't increment
2853 * it's count. This may happen when we get the object reference directly
2854 * from the hash table with dictRandomKey() or dict iterators */
2856 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2857 retval
= dictDelete(db
->dict
,key
);
2860 return retval
== DICT_OK
;
2863 /* Try to share an object against the shared objects pool */
2864 static robj
*tryObjectSharing(robj
*o
) {
2865 struct dictEntry
*de
;
2868 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2870 redisAssert(o
->type
== REDIS_STRING
);
2871 de
= dictFind(server
.sharingpool
,o
);
2873 robj
*shared
= dictGetEntryKey(de
);
2875 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2876 dictGetEntryVal(de
) = (void*) c
;
2877 incrRefCount(shared
);
2881 /* Here we are using a stream algorihtm: Every time an object is
2882 * shared we increment its count, everytime there is a miss we
2883 * recrement the counter of a random object. If this object reaches
2884 * zero we remove the object and put the current object instead. */
2885 if (dictSize(server
.sharingpool
) >=
2886 server
.sharingpoolsize
) {
2887 de
= dictGetRandomKey(server
.sharingpool
);
2888 redisAssert(de
!= NULL
);
2889 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2890 dictGetEntryVal(de
) = (void*) c
;
2892 dictDelete(server
.sharingpool
,de
->key
);
2895 c
= 0; /* If the pool is empty we want to add this object */
2900 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2901 redisAssert(retval
== DICT_OK
);
2908 /* Check if the nul-terminated string 's' can be represented by a long
2909 * (that is, is a number that fits into long without any other space or
2910 * character before or after the digits).
2912 * If so, the function returns REDIS_OK and *longval is set to the value
2913 * of the number. Otherwise REDIS_ERR is returned */
2914 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2915 char buf
[32], *endptr
;
2919 value
= strtol(s
, &endptr
, 10);
2920 if (endptr
[0] != '\0') return REDIS_ERR
;
2921 slen
= snprintf(buf
,32,"%ld",value
);
2923 /* If the number converted back into a string is not identical
2924 * then it's not possible to encode the string as integer */
2925 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2926 if (longval
) *longval
= value
;
2930 /* Try to encode a string object in order to save space */
2931 static int tryObjectEncoding(robj
*o
) {
2935 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2936 return REDIS_ERR
; /* Already encoded */
2938 /* It's not save to encode shared objects: shared objects can be shared
2939 * everywhere in the "object space" of Redis. Encoded objects can only
2940 * appear as "values" (and not, for instance, as keys) */
2941 if (o
->refcount
> 1) return REDIS_ERR
;
2943 /* Currently we try to encode only strings */
2944 redisAssert(o
->type
== REDIS_STRING
);
2946 /* Check if we can represent this string as a long integer */
2947 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2949 /* Ok, this object can be encoded */
2950 o
->encoding
= REDIS_ENCODING_INT
;
2952 o
->ptr
= (void*) value
;
2956 /* Get a decoded version of an encoded object (returned as a new object).
2957 * If the object is already raw-encoded just increment the ref count. */
2958 static robj
*getDecodedObject(robj
*o
) {
2961 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2965 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2968 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2969 dec
= createStringObject(buf
,strlen(buf
));
2972 redisAssert(1 != 1);
2976 /* Compare two string objects via strcmp() or alike.
2977 * Note that the objects may be integer-encoded. In such a case we
2978 * use snprintf() to get a string representation of the numbers on the stack
2979 * and compare the strings, it's much faster than calling getDecodedObject().
2981 * Important note: if objects are not integer encoded, but binary-safe strings,
2982 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2984 static int compareStringObjects(robj
*a
, robj
*b
) {
2985 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
2986 char bufa
[128], bufb
[128], *astr
, *bstr
;
2989 if (a
== b
) return 0;
2990 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
2991 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
2997 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
2998 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3004 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3007 static size_t stringObjectLen(robj
*o
) {
3008 redisAssert(o
->type
== REDIS_STRING
);
3009 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3010 return sdslen(o
->ptr
);
3014 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3018 /*============================ RDB saving/loading =========================== */
3020 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3021 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3025 static int rdbSaveTime(FILE *fp
, time_t t
) {
3026 int32_t t32
= (int32_t) t
;
3027 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3031 /* check rdbLoadLen() comments for more info */
3032 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3033 unsigned char buf
[2];
3036 /* Save a 6 bit len */
3037 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3038 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3039 } else if (len
< (1<<14)) {
3040 /* Save a 14 bit len */
3041 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3043 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3045 /* Save a 32 bit len */
3046 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3047 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3049 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3054 /* String objects in the form "2391" "-100" without any space and with a
3055 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3056 * encoded as integers to save space */
3057 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3059 char *endptr
, buf
[32];
3061 /* Check if it's possible to encode this value as a number */
3062 value
= strtoll(s
, &endptr
, 10);
3063 if (endptr
[0] != '\0') return 0;
3064 snprintf(buf
,32,"%lld",value
);
3066 /* If the number converted back into a string is not identical
3067 * then it's not possible to encode the string as integer */
3068 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3070 /* Finally check if it fits in our ranges */
3071 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3072 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3073 enc
[1] = value
&0xFF;
3075 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3076 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3077 enc
[1] = value
&0xFF;
3078 enc
[2] = (value
>>8)&0xFF;
3080 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3081 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3082 enc
[1] = value
&0xFF;
3083 enc
[2] = (value
>>8)&0xFF;
3084 enc
[3] = (value
>>16)&0xFF;
3085 enc
[4] = (value
>>24)&0xFF;
3092 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3093 size_t comprlen
, outlen
;
3097 /* We require at least four bytes compression for this to be worth it */
3098 if (len
<= 4) return 0;
3100 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3101 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3102 if (comprlen
== 0) {
3106 /* Data compressed! Let's save it on disk */
3107 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3108 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3109 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3110 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3111 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3120 /* Save a string objet as [len][data] on disk. If the object is a string
3121 * representation of an integer value we try to safe it in a special form */
3122 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3125 /* Try integer encoding */
3127 unsigned char buf
[5];
3128 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3129 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3134 /* Try LZF compression - under 20 bytes it's unable to compress even
3135 * aaaaaaaaaaaaaaaaaa so skip it */
3136 if (server
.rdbcompression
&& len
> 20) {
3139 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3140 if (retval
== -1) return -1;
3141 if (retval
> 0) return 0;
3142 /* retval == 0 means data can't be compressed, save the old way */
3145 /* Store verbatim */
3146 if (rdbSaveLen(fp
,len
) == -1) return -1;
3147 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3151 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3152 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3155 /* Avoid incr/decr ref count business when possible.
3156 * This plays well with copy-on-write given that we are probably
3157 * in a child process (BGSAVE). Also this makes sure key objects
3158 * of swapped objects are not incRefCount-ed (an assert does not allow
3159 * this in order to avoid bugs) */
3160 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3161 obj
= getDecodedObject(obj
);
3162 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3165 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3170 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3171 * 8 bit integer specifing the length of the representation.
3172 * This 8 bit integer has special values in order to specify the following
3178 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3179 unsigned char buf
[128];
3185 } else if (!isfinite(val
)) {
3187 buf
[0] = (val
< 0) ? 255 : 254;
3189 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3190 buf
[0] = strlen((char*)buf
+1);
3193 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3197 /* Save a Redis object. */
3198 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3199 if (o
->type
== REDIS_STRING
) {
3200 /* Save a string value */
3201 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3202 } else if (o
->type
== REDIS_LIST
) {
3203 /* Save a list value */
3204 list
*list
= o
->ptr
;
3208 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3209 listRewind(list
,&li
);
3210 while((ln
= listNext(&li
))) {
3211 robj
*eleobj
= listNodeValue(ln
);
3213 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3215 } else if (o
->type
== REDIS_SET
) {
3216 /* Save a set value */
3218 dictIterator
*di
= dictGetIterator(set
);
3221 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3222 while((de
= dictNext(di
)) != NULL
) {
3223 robj
*eleobj
= dictGetEntryKey(de
);
3225 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3227 dictReleaseIterator(di
);
3228 } else if (o
->type
== REDIS_ZSET
) {
3229 /* Save a set value */
3231 dictIterator
*di
= dictGetIterator(zs
->dict
);
3234 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3235 while((de
= dictNext(di
)) != NULL
) {
3236 robj
*eleobj
= dictGetEntryKey(de
);
3237 double *score
= dictGetEntryVal(de
);
3239 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3240 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3242 dictReleaseIterator(di
);
3243 } else if (o
->type
== REDIS_HASH
) {
3244 /* Save a hash value */
3245 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3246 unsigned char *p
= zipmapRewind(o
->ptr
);
3247 unsigned int count
= zipmapLen(o
->ptr
);
3248 unsigned char *key
, *val
;
3249 unsigned int klen
, vlen
;
3251 if (rdbSaveLen(fp
,count
) == -1) return -1;
3252 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3253 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3254 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3257 dictIterator
*di
= dictGetIterator(o
->ptr
);
3260 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3261 while((de
= dictNext(di
)) != NULL
) {
3262 robj
*key
= dictGetEntryKey(de
);
3263 robj
*val
= dictGetEntryVal(de
);
3265 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3266 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3268 dictReleaseIterator(di
);
3276 /* Return the length the object will have on disk if saved with
3277 * the rdbSaveObject() function. Currently we use a trick to get
3278 * this length with very little changes to the code. In the future
3279 * we could switch to a faster solution. */
3280 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3281 if (fp
== NULL
) fp
= server
.devnull
;
3283 assert(rdbSaveObject(fp
,o
) != 1);
3287 /* Return the number of pages required to save this object in the swap file */
3288 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3289 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3291 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3294 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3295 static int rdbSave(char *filename
) {
3296 dictIterator
*di
= NULL
;
3301 time_t now
= time(NULL
);
3303 /* Wait for I/O therads to terminate, just in case this is a
3304 * foreground-saving, to avoid seeking the swap file descriptor at the
3306 if (server
.vm_enabled
)
3307 waitEmptyIOJobsQueue();
3309 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3310 fp
= fopen(tmpfile
,"w");
3312 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3315 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3316 for (j
= 0; j
< server
.dbnum
; j
++) {
3317 redisDb
*db
= server
.db
+j
;
3319 if (dictSize(d
) == 0) continue;
3320 di
= dictGetIterator(d
);
3326 /* Write the SELECT DB opcode */
3327 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3328 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3330 /* Iterate this DB writing every entry */
3331 while((de
= dictNext(di
)) != NULL
) {
3332 robj
*key
= dictGetEntryKey(de
);
3333 robj
*o
= dictGetEntryVal(de
);
3334 time_t expiretime
= getExpire(db
,key
);
3336 /* Save the expire time */
3337 if (expiretime
!= -1) {
3338 /* If this key is already expired skip it */
3339 if (expiretime
< now
) continue;
3340 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3341 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3343 /* Save the key and associated value. This requires special
3344 * handling if the value is swapped out. */
3345 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3346 key
->storage
== REDIS_VM_SWAPPING
) {
3347 /* Save type, key, value */
3348 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3349 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3350 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3352 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3354 /* Get a preview of the object in memory */
3355 po
= vmPreviewObject(key
);
3356 /* Save type, key, value */
3357 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3358 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3359 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3360 /* Remove the loaded object from memory */
3364 dictReleaseIterator(di
);
3367 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3369 /* Make sure data will not remain on the OS's output buffers */
3374 /* Use RENAME to make sure the DB file is changed atomically only
3375 * if the generate DB file is ok. */
3376 if (rename(tmpfile
,filename
) == -1) {
3377 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3381 redisLog(REDIS_NOTICE
,"DB saved on disk");
3383 server
.lastsave
= time(NULL
);
3389 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3390 if (di
) dictReleaseIterator(di
);
3394 static int rdbSaveBackground(char *filename
) {
3397 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3398 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3399 if ((childpid
= fork()) == 0) {
3401 if (server
.vm_enabled
) vmReopenSwapFile();
3403 if (rdbSave(filename
) == REDIS_OK
) {
3410 if (childpid
== -1) {
3411 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3415 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3416 server
.bgsavechildpid
= childpid
;
3419 return REDIS_OK
; /* unreached */
3422 static void rdbRemoveTempFile(pid_t childpid
) {
3425 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3429 static int rdbLoadType(FILE *fp
) {
3431 if (fread(&type
,1,1,fp
) == 0) return -1;
3435 static time_t rdbLoadTime(FILE *fp
) {
3437 if (fread(&t32
,4,1,fp
) == 0) return -1;
3438 return (time_t) t32
;
3441 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3442 * of this file for a description of how this are stored on disk.
3444 * isencoded is set to 1 if the readed length is not actually a length but
3445 * an "encoding type", check the above comments for more info */
3446 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3447 unsigned char buf
[2];
3451 if (isencoded
) *isencoded
= 0;
3452 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3453 type
= (buf
[0]&0xC0)>>6;
3454 if (type
== REDIS_RDB_6BITLEN
) {
3455 /* Read a 6 bit len */
3457 } else if (type
== REDIS_RDB_ENCVAL
) {
3458 /* Read a 6 bit len encoding type */
3459 if (isencoded
) *isencoded
= 1;
3461 } else if (type
== REDIS_RDB_14BITLEN
) {
3462 /* Read a 14 bit len */
3463 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3464 return ((buf
[0]&0x3F)<<8)|buf
[1];
3466 /* Read a 32 bit len */
3467 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3472 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3473 unsigned char enc
[4];
3476 if (enctype
== REDIS_RDB_ENC_INT8
) {
3477 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3478 val
= (signed char)enc
[0];
3479 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3481 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3482 v
= enc
[0]|(enc
[1]<<8);
3484 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3486 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3487 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3490 val
= 0; /* anti-warning */
3493 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3496 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3497 unsigned int len
, clen
;
3498 unsigned char *c
= NULL
;
3501 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3502 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3503 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3504 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3505 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3506 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3508 return createObject(REDIS_STRING
,val
);
3515 static robj
*rdbLoadStringObject(FILE*fp
) {
3520 len
= rdbLoadLen(fp
,&isencoded
);
3523 case REDIS_RDB_ENC_INT8
:
3524 case REDIS_RDB_ENC_INT16
:
3525 case REDIS_RDB_ENC_INT32
:
3526 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3527 case REDIS_RDB_ENC_LZF
:
3528 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3534 if (len
== REDIS_RDB_LENERR
) return NULL
;
3535 val
= sdsnewlen(NULL
,len
);
3536 if (len
&& fread(val
,len
,1,fp
) == 0) {
3540 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3543 /* For information about double serialization check rdbSaveDoubleValue() */
3544 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3548 if (fread(&len
,1,1,fp
) == 0) return -1;
3550 case 255: *val
= R_NegInf
; return 0;
3551 case 254: *val
= R_PosInf
; return 0;
3552 case 253: *val
= R_Nan
; return 0;
3554 if (fread(buf
,len
,1,fp
) == 0) return -1;
3556 sscanf(buf
, "%lg", val
);
3561 /* Load a Redis object of the specified type from the specified file.
3562 * On success a newly allocated object is returned, otherwise NULL. */
3563 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3566 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3567 if (type
== REDIS_STRING
) {
3568 /* Read string value */
3569 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3570 tryObjectEncoding(o
);
3571 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3572 /* Read list/set value */
3575 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3576 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3577 /* It's faster to expand the dict to the right size asap in order
3578 * to avoid rehashing */
3579 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3580 dictExpand(o
->ptr
,listlen
);
3581 /* Load every single element of the list/set */
3585 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3586 tryObjectEncoding(ele
);
3587 if (type
== REDIS_LIST
) {
3588 listAddNodeTail((list
*)o
->ptr
,ele
);
3590 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3593 } else if (type
== REDIS_ZSET
) {
3594 /* Read list/set value */
3598 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3599 o
= createZsetObject();
3601 /* Load every single element of the list/set */
3604 double *score
= zmalloc(sizeof(double));
3606 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3607 tryObjectEncoding(ele
);
3608 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3609 dictAdd(zs
->dict
,ele
,score
);
3610 zslInsert(zs
->zsl
,*score
,ele
);
3611 incrRefCount(ele
); /* added to skiplist */
3613 } else if (type
== REDIS_HASH
) {
3616 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3617 o
= createHashObject();
3618 /* Too many entries? Use an hash table. */
3619 if (hashlen
> server
.hash_max_zipmap_entries
)
3620 convertToRealHash(o
);
3621 /* Load every key/value, then set it into the zipmap or hash
3622 * table, as needed. */
3626 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3627 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3628 /* If we are using a zipmap and there are too big values
3629 * the object is converted to real hash table encoding. */
3630 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3631 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3632 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3634 convertToRealHash(o
);
3637 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3638 unsigned char *zm
= o
->ptr
;
3640 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3641 val
->ptr
,sdslen(val
->ptr
),NULL
);
3646 tryObjectEncoding(key
);
3647 tryObjectEncoding(val
);
3648 dictAdd((dict
*)o
->ptr
,key
,val
);
3657 static int rdbLoad(char *filename
) {
3659 robj
*keyobj
= NULL
;
3661 int type
, retval
, rdbver
;
3662 dict
*d
= server
.db
[0].dict
;
3663 redisDb
*db
= server
.db
+0;
3665 time_t expiretime
= -1, now
= time(NULL
);
3666 long long loadedkeys
= 0;
3668 fp
= fopen(filename
,"r");
3669 if (!fp
) return REDIS_ERR
;
3670 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3672 if (memcmp(buf
,"REDIS",5) != 0) {
3674 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3677 rdbver
= atoi(buf
+5);
3680 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3687 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3688 if (type
== REDIS_EXPIRETIME
) {
3689 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3690 /* We read the time so we need to read the object type again */
3691 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3693 if (type
== REDIS_EOF
) break;
3694 /* Handle SELECT DB opcode as a special case */
3695 if (type
== REDIS_SELECTDB
) {
3696 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3698 if (dbid
>= (unsigned)server
.dbnum
) {
3699 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3702 db
= server
.db
+dbid
;
3707 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3709 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3710 /* Add the new object in the hash table */
3711 retval
= dictAdd(d
,keyobj
,o
);
3712 if (retval
== DICT_ERR
) {
3713 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3716 /* Set the expire time if needed */
3717 if (expiretime
!= -1) {
3718 setExpire(db
,keyobj
,expiretime
);
3719 /* Delete this key if already expired */
3720 if (expiretime
< now
) deleteKey(db
,keyobj
);
3724 /* Handle swapping while loading big datasets when VM is on */
3726 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3727 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3728 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3735 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3736 if (keyobj
) decrRefCount(keyobj
);
3737 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3739 return REDIS_ERR
; /* Just to avoid warning */
3742 /*================================== Commands =============================== */
3744 static void authCommand(redisClient
*c
) {
3745 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3746 c
->authenticated
= 1;
3747 addReply(c
,shared
.ok
);
3749 c
->authenticated
= 0;
3750 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3754 static void pingCommand(redisClient
*c
) {
3755 addReply(c
,shared
.pong
);
3758 static void echoCommand(redisClient
*c
) {
3759 addReplyBulk(c
,c
->argv
[1]);
3762 /*=================================== Strings =============================== */
3764 static void setGenericCommand(redisClient
*c
, int nx
) {
3767 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3768 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3769 if (retval
== DICT_ERR
) {
3771 /* If the key is about a swapped value, we want a new key object
3772 * to overwrite the old. So we delete the old key in the database.
3773 * This will also make sure that swap pages about the old object
3774 * will be marked as free. */
3775 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3776 incrRefCount(c
->argv
[1]);
3777 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3778 incrRefCount(c
->argv
[2]);
3780 addReply(c
,shared
.czero
);
3784 incrRefCount(c
->argv
[1]);
3785 incrRefCount(c
->argv
[2]);
3788 removeExpire(c
->db
,c
->argv
[1]);
3789 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3792 static void setCommand(redisClient
*c
) {
3793 setGenericCommand(c
,0);
3796 static void setnxCommand(redisClient
*c
) {
3797 setGenericCommand(c
,1);
3800 static int getGenericCommand(redisClient
*c
) {
3803 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3806 if (o
->type
!= REDIS_STRING
) {
3807 addReply(c
,shared
.wrongtypeerr
);
3815 static void getCommand(redisClient
*c
) {
3816 getGenericCommand(c
);
3819 static void getsetCommand(redisClient
*c
) {
3820 if (getGenericCommand(c
) == REDIS_ERR
) return;
3821 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3822 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3824 incrRefCount(c
->argv
[1]);
3826 incrRefCount(c
->argv
[2]);
3828 removeExpire(c
->db
,c
->argv
[1]);
3831 static void mgetCommand(redisClient
*c
) {
3834 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3835 for (j
= 1; j
< c
->argc
; j
++) {
3836 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3838 addReply(c
,shared
.nullbulk
);
3840 if (o
->type
!= REDIS_STRING
) {
3841 addReply(c
,shared
.nullbulk
);
3849 static void msetGenericCommand(redisClient
*c
, int nx
) {
3850 int j
, busykeys
= 0;
3852 if ((c
->argc
% 2) == 0) {
3853 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3856 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3857 * set nothing at all if at least one already key exists. */
3859 for (j
= 1; j
< c
->argc
; j
+= 2) {
3860 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3866 addReply(c
, shared
.czero
);
3870 for (j
= 1; j
< c
->argc
; j
+= 2) {
3873 tryObjectEncoding(c
->argv
[j
+1]);
3874 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3875 if (retval
== DICT_ERR
) {
3876 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3877 incrRefCount(c
->argv
[j
+1]);
3879 incrRefCount(c
->argv
[j
]);
3880 incrRefCount(c
->argv
[j
+1]);
3882 removeExpire(c
->db
,c
->argv
[j
]);
3884 server
.dirty
+= (c
->argc
-1)/2;
3885 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3888 static void msetCommand(redisClient
*c
) {
3889 msetGenericCommand(c
,0);
3892 static void msetnxCommand(redisClient
*c
) {
3893 msetGenericCommand(c
,1);
3896 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3901 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3905 if (o
->type
!= REDIS_STRING
) {
3910 if (o
->encoding
== REDIS_ENCODING_RAW
)
3911 value
= strtoll(o
->ptr
, &eptr
, 10);
3912 else if (o
->encoding
== REDIS_ENCODING_INT
)
3913 value
= (long)o
->ptr
;
3915 redisAssert(1 != 1);
3920 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3921 tryObjectEncoding(o
);
3922 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3923 if (retval
== DICT_ERR
) {
3924 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3925 removeExpire(c
->db
,c
->argv
[1]);
3927 incrRefCount(c
->argv
[1]);
3930 addReply(c
,shared
.colon
);
3932 addReply(c
,shared
.crlf
);
3935 static void incrCommand(redisClient
*c
) {
3936 incrDecrCommand(c
,1);
3939 static void decrCommand(redisClient
*c
) {
3940 incrDecrCommand(c
,-1);
3943 static void incrbyCommand(redisClient
*c
) {
3944 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3945 incrDecrCommand(c
,incr
);
3948 static void decrbyCommand(redisClient
*c
) {
3949 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3950 incrDecrCommand(c
,-incr
);
3953 static void appendCommand(redisClient
*c
) {
3958 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3960 /* Create the key */
3961 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3962 incrRefCount(c
->argv
[1]);
3963 incrRefCount(c
->argv
[2]);
3964 totlen
= stringObjectLen(c
->argv
[2]);
3968 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3971 o
= dictGetEntryVal(de
);
3972 if (o
->type
!= REDIS_STRING
) {
3973 addReply(c
,shared
.wrongtypeerr
);
3976 /* If the object is specially encoded or shared we have to make
3978 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
3979 robj
*decoded
= getDecodedObject(o
);
3981 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
3982 decrRefCount(decoded
);
3983 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3986 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
3987 o
->ptr
= sdscatlen(o
->ptr
,
3988 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
3990 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
3991 (unsigned long) c
->argv
[2]->ptr
);
3993 totlen
= sdslen(o
->ptr
);
3996 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
3999 static void substrCommand(redisClient
*c
) {
4001 long start
= atoi(c
->argv
[2]->ptr
);
4002 long end
= atoi(c
->argv
[3]->ptr
);
4003 size_t rangelen
, strlen
;
4006 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4007 checkType(c
,o
,REDIS_STRING
)) return;
4009 o
= getDecodedObject(o
);
4010 strlen
= sdslen(o
->ptr
);
4012 /* convert negative indexes */
4013 if (start
< 0) start
= strlen
+start
;
4014 if (end
< 0) end
= strlen
+end
;
4015 if (start
< 0) start
= 0;
4016 if (end
< 0) end
= 0;
4018 /* indexes sanity checks */
4019 if (start
> end
|| (size_t)start
>= strlen
) {
4020 /* Out of range start or start > end result in null reply */
4021 addReply(c
,shared
.nullbulk
);
4025 if ((size_t)end
>= strlen
) end
= strlen
-1;
4026 rangelen
= (end
-start
)+1;
4028 /* Return the result */
4029 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4030 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4031 addReplySds(c
,range
);
4032 addReply(c
,shared
.crlf
);
4036 /* ========================= Type agnostic commands ========================= */
4038 static void delCommand(redisClient
*c
) {
4041 for (j
= 1; j
< c
->argc
; j
++) {
4042 if (deleteKey(c
->db
,c
->argv
[j
])) {
4047 addReplyLong(c
,deleted
);
4050 static void existsCommand(redisClient
*c
) {
4051 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4054 static void selectCommand(redisClient
*c
) {
4055 int id
= atoi(c
->argv
[1]->ptr
);
4057 if (selectDb(c
,id
) == REDIS_ERR
) {
4058 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4060 addReply(c
,shared
.ok
);
4064 static void randomkeyCommand(redisClient
*c
) {
4068 de
= dictGetRandomKey(c
->db
->dict
);
4069 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4072 addReply(c
,shared
.plus
);
4073 addReply(c
,shared
.crlf
);
4075 addReply(c
,shared
.plus
);
4076 addReply(c
,dictGetEntryKey(de
));
4077 addReply(c
,shared
.crlf
);
4081 static void keysCommand(redisClient
*c
) {
4084 sds pattern
= c
->argv
[1]->ptr
;
4085 int plen
= sdslen(pattern
);
4086 unsigned long numkeys
= 0;
4087 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4089 di
= dictGetIterator(c
->db
->dict
);
4091 decrRefCount(lenobj
);
4092 while((de
= dictNext(di
)) != NULL
) {
4093 robj
*keyobj
= dictGetEntryKey(de
);
4095 sds key
= keyobj
->ptr
;
4096 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4097 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4098 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4099 addReplyBulk(c
,keyobj
);
4104 dictReleaseIterator(di
);
4105 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4108 static void dbsizeCommand(redisClient
*c
) {
4110 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4113 static void lastsaveCommand(redisClient
*c
) {
4115 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4118 static void typeCommand(redisClient
*c
) {
4122 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4127 case REDIS_STRING
: type
= "+string"; break;
4128 case REDIS_LIST
: type
= "+list"; break;
4129 case REDIS_SET
: type
= "+set"; break;
4130 case REDIS_ZSET
: type
= "+zset"; break;
4131 case REDIS_HASH
: type
= "+hash"; break;
4132 default: type
= "+unknown"; break;
4135 addReplySds(c
,sdsnew(type
));
4136 addReply(c
,shared
.crlf
);
4139 static void saveCommand(redisClient
*c
) {
4140 if (server
.bgsavechildpid
!= -1) {
4141 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4144 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4145 addReply(c
,shared
.ok
);
4147 addReply(c
,shared
.err
);
4151 static void bgsaveCommand(redisClient
*c
) {
4152 if (server
.bgsavechildpid
!= -1) {
4153 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4156 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4157 char *status
= "+Background saving started\r\n";
4158 addReplySds(c
,sdsnew(status
));
4160 addReply(c
,shared
.err
);
4164 static void shutdownCommand(redisClient
*c
) {
4165 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4166 /* Kill the saving child if there is a background saving in progress.
4167 We want to avoid race conditions, for instance our saving child may
4168 overwrite the synchronous saving did by SHUTDOWN. */
4169 if (server
.bgsavechildpid
!= -1) {
4170 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4171 kill(server
.bgsavechildpid
,SIGKILL
);
4172 rdbRemoveTempFile(server
.bgsavechildpid
);
4174 if (server
.appendonly
) {
4175 /* Append only file: fsync() the AOF and exit */
4176 fsync(server
.appendfd
);
4177 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4180 /* Snapshotting. Perform a SYNC SAVE and exit */
4181 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4182 if (server
.daemonize
)
4183 unlink(server
.pidfile
);
4184 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4185 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4186 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4189 /* Ooops.. error saving! The best we can do is to continue
4190 * operating. Note that if there was a background saving process,
4191 * in the next cron() Redis will be notified that the background
4192 * saving aborted, handling special stuff like slaves pending for
4193 * synchronization... */
4194 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4196 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4201 static void renameGenericCommand(redisClient
*c
, int nx
) {
4204 /* To use the same key as src and dst is probably an error */
4205 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4206 addReply(c
,shared
.sameobjecterr
);
4210 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4214 deleteIfVolatile(c
->db
,c
->argv
[2]);
4215 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4218 addReply(c
,shared
.czero
);
4221 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4223 incrRefCount(c
->argv
[2]);
4225 deleteKey(c
->db
,c
->argv
[1]);
4227 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4230 static void renameCommand(redisClient
*c
) {
4231 renameGenericCommand(c
,0);
4234 static void renamenxCommand(redisClient
*c
) {
4235 renameGenericCommand(c
,1);
4238 static void moveCommand(redisClient
*c
) {
4243 /* Obtain source and target DB pointers */
4246 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4247 addReply(c
,shared
.outofrangeerr
);
4251 selectDb(c
,srcid
); /* Back to the source DB */
4253 /* If the user is moving using as target the same
4254 * DB as the source DB it is probably an error. */
4256 addReply(c
,shared
.sameobjecterr
);
4260 /* Check if the element exists and get a reference */
4261 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4263 addReply(c
,shared
.czero
);
4267 /* Try to add the element to the target DB */
4268 deleteIfVolatile(dst
,c
->argv
[1]);
4269 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4270 addReply(c
,shared
.czero
);
4273 incrRefCount(c
->argv
[1]);
4276 /* OK! key moved, free the entry in the source DB */
4277 deleteKey(src
,c
->argv
[1]);
4279 addReply(c
,shared
.cone
);
4282 /* =================================== Lists ================================ */
4283 static void pushGenericCommand(redisClient
*c
, int where
) {
4287 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4289 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4290 addReply(c
,shared
.cone
);
4293 lobj
= createListObject();
4295 if (where
== REDIS_HEAD
) {
4296 listAddNodeHead(list
,c
->argv
[2]);
4298 listAddNodeTail(list
,c
->argv
[2]);
4300 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4301 incrRefCount(c
->argv
[1]);
4302 incrRefCount(c
->argv
[2]);
4304 if (lobj
->type
!= REDIS_LIST
) {
4305 addReply(c
,shared
.wrongtypeerr
);
4308 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4309 addReply(c
,shared
.cone
);
4313 if (where
== REDIS_HEAD
) {
4314 listAddNodeHead(list
,c
->argv
[2]);
4316 listAddNodeTail(list
,c
->argv
[2]);
4318 incrRefCount(c
->argv
[2]);
4321 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4324 static void lpushCommand(redisClient
*c
) {
4325 pushGenericCommand(c
,REDIS_HEAD
);
4328 static void rpushCommand(redisClient
*c
) {
4329 pushGenericCommand(c
,REDIS_TAIL
);
4332 static void llenCommand(redisClient
*c
) {
4336 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4337 checkType(c
,o
,REDIS_LIST
)) return;
4340 addReplyUlong(c
,listLength(l
));
4343 static void lindexCommand(redisClient
*c
) {
4345 int index
= atoi(c
->argv
[2]->ptr
);
4349 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4350 checkType(c
,o
,REDIS_LIST
)) return;
4353 ln
= listIndex(list
, index
);
4355 addReply(c
,shared
.nullbulk
);
4357 robj
*ele
= listNodeValue(ln
);
4358 addReplyBulk(c
,ele
);
4362 static void lsetCommand(redisClient
*c
) {
4364 int index
= atoi(c
->argv
[2]->ptr
);
4368 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4369 checkType(c
,o
,REDIS_LIST
)) return;
4372 ln
= listIndex(list
, index
);
4374 addReply(c
,shared
.outofrangeerr
);
4376 robj
*ele
= listNodeValue(ln
);
4379 listNodeValue(ln
) = c
->argv
[3];
4380 incrRefCount(c
->argv
[3]);
4381 addReply(c
,shared
.ok
);
4386 static void popGenericCommand(redisClient
*c
, int where
) {
4391 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4392 checkType(c
,o
,REDIS_LIST
)) return;
4395 if (where
== REDIS_HEAD
)
4396 ln
= listFirst(list
);
4398 ln
= listLast(list
);
4401 addReply(c
,shared
.nullbulk
);
4403 robj
*ele
= listNodeValue(ln
);
4404 addReplyBulk(c
,ele
);
4405 listDelNode(list
,ln
);
4406 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4411 static void lpopCommand(redisClient
*c
) {
4412 popGenericCommand(c
,REDIS_HEAD
);
4415 static void rpopCommand(redisClient
*c
) {
4416 popGenericCommand(c
,REDIS_TAIL
);
4419 static void lrangeCommand(redisClient
*c
) {
4421 int start
= atoi(c
->argv
[2]->ptr
);
4422 int end
= atoi(c
->argv
[3]->ptr
);
4429 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4430 checkType(c
,o
,REDIS_LIST
)) return;
4432 llen
= listLength(list
);
4434 /* convert negative indexes */
4435 if (start
< 0) start
= llen
+start
;
4436 if (end
< 0) end
= llen
+end
;
4437 if (start
< 0) start
= 0;
4438 if (end
< 0) end
= 0;
4440 /* indexes sanity checks */
4441 if (start
> end
|| start
>= llen
) {
4442 /* Out of range start or start > end result in empty list */
4443 addReply(c
,shared
.emptymultibulk
);
4446 if (end
>= llen
) end
= llen
-1;
4447 rangelen
= (end
-start
)+1;
4449 /* Return the result in form of a multi-bulk reply */
4450 ln
= listIndex(list
, start
);
4451 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4452 for (j
= 0; j
< rangelen
; j
++) {
4453 ele
= listNodeValue(ln
);
4454 addReplyBulk(c
,ele
);
4459 static void ltrimCommand(redisClient
*c
) {
4461 int start
= atoi(c
->argv
[2]->ptr
);
4462 int end
= atoi(c
->argv
[3]->ptr
);
4464 int j
, ltrim
, rtrim
;
4468 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4469 checkType(c
,o
,REDIS_LIST
)) return;
4471 llen
= listLength(list
);
4473 /* convert negative indexes */
4474 if (start
< 0) start
= llen
+start
;
4475 if (end
< 0) end
= llen
+end
;
4476 if (start
< 0) start
= 0;
4477 if (end
< 0) end
= 0;
4479 /* indexes sanity checks */
4480 if (start
> end
|| start
>= llen
) {
4481 /* Out of range start or start > end result in empty list */
4485 if (end
>= llen
) end
= llen
-1;
4490 /* Remove list elements to perform the trim */
4491 for (j
= 0; j
< ltrim
; j
++) {
4492 ln
= listFirst(list
);
4493 listDelNode(list
,ln
);
4495 for (j
= 0; j
< rtrim
; j
++) {
4496 ln
= listLast(list
);
4497 listDelNode(list
,ln
);
4499 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4501 addReply(c
,shared
.ok
);
4504 static void lremCommand(redisClient
*c
) {
4507 listNode
*ln
, *next
;
4508 int toremove
= atoi(c
->argv
[2]->ptr
);
4512 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4513 checkType(c
,o
,REDIS_LIST
)) return;
4517 toremove
= -toremove
;
4520 ln
= fromtail
? list
->tail
: list
->head
;
4522 robj
*ele
= listNodeValue(ln
);
4524 next
= fromtail
? ln
->prev
: ln
->next
;
4525 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4526 listDelNode(list
,ln
);
4529 if (toremove
&& removed
== toremove
) break;
4533 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4534 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4537 /* This is the semantic of this command:
4538 * RPOPLPUSH srclist dstlist:
4539 * IF LLEN(srclist) > 0
4540 * element = RPOP srclist
4541 * LPUSH dstlist element
4548 * The idea is to be able to get an element from a list in a reliable way
4549 * since the element is not just returned but pushed against another list
4550 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4552 static void rpoplpushcommand(redisClient
*c
) {
4557 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4558 checkType(c
,sobj
,REDIS_LIST
)) return;
4559 srclist
= sobj
->ptr
;
4560 ln
= listLast(srclist
);
4563 addReply(c
,shared
.nullbulk
);
4565 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4566 robj
*ele
= listNodeValue(ln
);
4569 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4570 addReply(c
,shared
.wrongtypeerr
);
4574 /* Add the element to the target list (unless it's directly
4575 * passed to some BLPOP-ing client */
4576 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4578 /* Create the list if the key does not exist */
4579 dobj
= createListObject();
4580 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4581 incrRefCount(c
->argv
[2]);
4583 dstlist
= dobj
->ptr
;
4584 listAddNodeHead(dstlist
,ele
);
4588 /* Send the element to the client as reply as well */
4589 addReplyBulk(c
,ele
);
4591 /* Finally remove the element from the source list */
4592 listDelNode(srclist
,ln
);
4593 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4598 /* ==================================== Sets ================================ */
4600 static void saddCommand(redisClient
*c
) {
4603 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4605 set
= createSetObject();
4606 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4607 incrRefCount(c
->argv
[1]);
4609 if (set
->type
!= REDIS_SET
) {
4610 addReply(c
,shared
.wrongtypeerr
);
4614 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4615 incrRefCount(c
->argv
[2]);
4617 addReply(c
,shared
.cone
);
4619 addReply(c
,shared
.czero
);
4623 static void sremCommand(redisClient
*c
) {
4626 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4627 checkType(c
,set
,REDIS_SET
)) return;
4629 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4631 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4632 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4633 addReply(c
,shared
.cone
);
4635 addReply(c
,shared
.czero
);
4639 static void smoveCommand(redisClient
*c
) {
4640 robj
*srcset
, *dstset
;
4642 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4643 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4645 /* If the source key does not exist return 0, if it's of the wrong type
4647 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4648 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4651 /* Error if the destination key is not a set as well */
4652 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4653 addReply(c
,shared
.wrongtypeerr
);
4656 /* Remove the element from the source set */
4657 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4658 /* Key not found in the src set! return zero */
4659 addReply(c
,shared
.czero
);
4662 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4663 deleteKey(c
->db
,c
->argv
[1]);
4665 /* Add the element to the destination set */
4667 dstset
= createSetObject();
4668 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4669 incrRefCount(c
->argv
[2]);
4671 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4672 incrRefCount(c
->argv
[3]);
4673 addReply(c
,shared
.cone
);
4676 static void sismemberCommand(redisClient
*c
) {
4679 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4680 checkType(c
,set
,REDIS_SET
)) return;
4682 if (dictFind(set
->ptr
,c
->argv
[2]))
4683 addReply(c
,shared
.cone
);
4685 addReply(c
,shared
.czero
);
4688 static void scardCommand(redisClient
*c
) {
4692 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4693 checkType(c
,o
,REDIS_SET
)) return;
4696 addReplyUlong(c
,dictSize(s
));
4699 static void spopCommand(redisClient
*c
) {
4703 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4704 checkType(c
,set
,REDIS_SET
)) return;
4706 de
= dictGetRandomKey(set
->ptr
);
4708 addReply(c
,shared
.nullbulk
);
4710 robj
*ele
= dictGetEntryKey(de
);
4712 addReplyBulk(c
,ele
);
4713 dictDelete(set
->ptr
,ele
);
4714 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4715 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4720 static void srandmemberCommand(redisClient
*c
) {
4724 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4725 checkType(c
,set
,REDIS_SET
)) return;
4727 de
= dictGetRandomKey(set
->ptr
);
4729 addReply(c
,shared
.nullbulk
);
4731 robj
*ele
= dictGetEntryKey(de
);
4733 addReplyBulk(c
,ele
);
4737 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4738 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4740 return dictSize(*d1
)-dictSize(*d2
);
4743 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4744 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4747 robj
*lenobj
= NULL
, *dstset
= NULL
;
4748 unsigned long j
, cardinality
= 0;
4750 for (j
= 0; j
< setsnum
; j
++) {
4754 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4755 lookupKeyRead(c
->db
,setskeys
[j
]);
4759 if (deleteKey(c
->db
,dstkey
))
4761 addReply(c
,shared
.czero
);
4763 addReply(c
,shared
.nullmultibulk
);
4767 if (setobj
->type
!= REDIS_SET
) {
4769 addReply(c
,shared
.wrongtypeerr
);
4772 dv
[j
] = setobj
->ptr
;
4774 /* Sort sets from the smallest to largest, this will improve our
4775 * algorithm's performace */
4776 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4778 /* The first thing we should output is the total number of elements...
4779 * since this is a multi-bulk write, but at this stage we don't know
4780 * the intersection set size, so we use a trick, append an empty object
4781 * to the output list and save the pointer to later modify it with the
4784 lenobj
= createObject(REDIS_STRING
,NULL
);
4786 decrRefCount(lenobj
);
4788 /* If we have a target key where to store the resulting set
4789 * create this key with an empty set inside */
4790 dstset
= createSetObject();
4793 /* Iterate all the elements of the first (smallest) set, and test
4794 * the element against all the other sets, if at least one set does
4795 * not include the element it is discarded */
4796 di
= dictGetIterator(dv
[0]);
4798 while((de
= dictNext(di
)) != NULL
) {
4801 for (j
= 1; j
< setsnum
; j
++)
4802 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4804 continue; /* at least one set does not contain the member */
4805 ele
= dictGetEntryKey(de
);
4807 addReplyBulk(c
,ele
);
4810 dictAdd(dstset
->ptr
,ele
,NULL
);
4814 dictReleaseIterator(di
);
4817 /* Store the resulting set into the target, if the intersection
4818 * is not an empty set. */
4819 deleteKey(c
->db
,dstkey
);
4820 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4821 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4822 incrRefCount(dstkey
);
4823 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4825 decrRefCount(dstset
);
4826 addReply(c
,shared
.czero
);
4830 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4835 static void sinterCommand(redisClient
*c
) {
4836 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4839 static void sinterstoreCommand(redisClient
*c
) {
4840 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4843 #define REDIS_OP_UNION 0
4844 #define REDIS_OP_DIFF 1
4845 #define REDIS_OP_INTER 2
4847 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4848 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4851 robj
*dstset
= NULL
;
4852 int j
, cardinality
= 0;
4854 for (j
= 0; j
< setsnum
; j
++) {
4858 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4859 lookupKeyRead(c
->db
,setskeys
[j
]);
4864 if (setobj
->type
!= REDIS_SET
) {
4866 addReply(c
,shared
.wrongtypeerr
);
4869 dv
[j
] = setobj
->ptr
;
4872 /* We need a temp set object to store our union. If the dstkey
4873 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4874 * this set object will be the resulting object to set into the target key*/
4875 dstset
= createSetObject();
4877 /* Iterate all the elements of all the sets, add every element a single
4878 * time to the result set */
4879 for (j
= 0; j
< setsnum
; j
++) {
4880 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4881 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4883 di
= dictGetIterator(dv
[j
]);
4885 while((de
= dictNext(di
)) != NULL
) {
4888 /* dictAdd will not add the same element multiple times */
4889 ele
= dictGetEntryKey(de
);
4890 if (op
== REDIS_OP_UNION
|| j
== 0) {
4891 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4895 } else if (op
== REDIS_OP_DIFF
) {
4896 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4901 dictReleaseIterator(di
);
4903 /* result set is empty? Exit asap. */
4904 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4907 /* Output the content of the resulting set, if not in STORE mode */
4909 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4910 di
= dictGetIterator(dstset
->ptr
);
4911 while((de
= dictNext(di
)) != NULL
) {
4914 ele
= dictGetEntryKey(de
);
4915 addReplyBulk(c
,ele
);
4917 dictReleaseIterator(di
);
4918 decrRefCount(dstset
);
4920 /* If we have a target key where to store the resulting set
4921 * create this key with the result set inside */
4922 deleteKey(c
->db
,dstkey
);
4923 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4924 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4925 incrRefCount(dstkey
);
4926 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4928 decrRefCount(dstset
);
4929 addReply(c
,shared
.czero
);
4936 static void sunionCommand(redisClient
*c
) {
4937 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4940 static void sunionstoreCommand(redisClient
*c
) {
4941 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4944 static void sdiffCommand(redisClient
*c
) {
4945 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4948 static void sdiffstoreCommand(redisClient
*c
) {
4949 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4952 /* ==================================== ZSets =============================== */
4954 /* ZSETs are ordered sets using two data structures to hold the same elements
4955 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4958 * The elements are added to an hash table mapping Redis objects to scores.
4959 * At the same time the elements are added to a skip list mapping scores
4960 * to Redis objects (so objects are sorted by scores in this "view"). */
4962 /* This skiplist implementation is almost a C translation of the original
4963 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4964 * Alternative to Balanced Trees", modified in three ways:
4965 * a) this implementation allows for repeated values.
4966 * b) the comparison is not just by key (our 'score') but by satellite data.
4967 * c) there is a back pointer, so it's a doubly linked list with the back
4968 * pointers being only at "level 1". This allows to traverse the list
4969 * from tail to head, useful for ZREVRANGE. */
4971 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4972 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4974 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4976 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
4982 static zskiplist
*zslCreate(void) {
4986 zsl
= zmalloc(sizeof(*zsl
));
4989 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
4990 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
4991 zsl
->header
->forward
[j
] = NULL
;
4993 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
4994 if (j
< ZSKIPLIST_MAXLEVEL
-1)
4995 zsl
->header
->span
[j
] = 0;
4997 zsl
->header
->backward
= NULL
;
5002 static void zslFreeNode(zskiplistNode
*node
) {
5003 decrRefCount(node
->obj
);
5004 zfree(node
->forward
);
5009 static void zslFree(zskiplist
*zsl
) {
5010 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5012 zfree(zsl
->header
->forward
);
5013 zfree(zsl
->header
->span
);
5016 next
= node
->forward
[0];
5023 static int zslRandomLevel(void) {
5025 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5030 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5031 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5032 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5036 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5037 /* store rank that is crossed to reach the insert position */
5038 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5040 while (x
->forward
[i
] &&
5041 (x
->forward
[i
]->score
< score
||
5042 (x
->forward
[i
]->score
== score
&&
5043 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5044 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5049 /* we assume the key is not already inside, since we allow duplicated
5050 * scores, and the re-insertion of score and redis object should never
5051 * happpen since the caller of zslInsert() should test in the hash table
5052 * if the element is already inside or not. */
5053 level
= zslRandomLevel();
5054 if (level
> zsl
->level
) {
5055 for (i
= zsl
->level
; i
< level
; i
++) {
5057 update
[i
] = zsl
->header
;
5058 update
[i
]->span
[i
-1] = zsl
->length
;
5062 x
= zslCreateNode(level
,score
,obj
);
5063 for (i
= 0; i
< level
; i
++) {
5064 x
->forward
[i
] = update
[i
]->forward
[i
];
5065 update
[i
]->forward
[i
] = x
;
5067 /* update span covered by update[i] as x is inserted here */
5069 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5070 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5074 /* increment span for untouched levels */
5075 for (i
= level
; i
< zsl
->level
; i
++) {
5076 update
[i
]->span
[i
-1]++;
5079 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5081 x
->forward
[0]->backward
= x
;
5087 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5088 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5090 for (i
= 0; i
< zsl
->level
; i
++) {
5091 if (update
[i
]->forward
[i
] == x
) {
5093 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5095 update
[i
]->forward
[i
] = x
->forward
[i
];
5097 /* invariant: i > 0, because update[0]->forward[0]
5098 * is always equal to x */
5099 update
[i
]->span
[i
-1] -= 1;
5102 if (x
->forward
[0]) {
5103 x
->forward
[0]->backward
= x
->backward
;
5105 zsl
->tail
= x
->backward
;
5107 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5112 /* Delete an element with matching score/object from the skiplist. */
5113 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5114 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5118 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5119 while (x
->forward
[i
] &&
5120 (x
->forward
[i
]->score
< score
||
5121 (x
->forward
[i
]->score
== score
&&
5122 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5126 /* We may have multiple elements with the same score, what we need
5127 * is to find the element with both the right score and object. */
5129 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5130 zslDeleteNode(zsl
, x
, update
);
5134 return 0; /* not found */
5136 return 0; /* not found */
5139 /* Delete all the elements with score between min and max from the skiplist.
5140 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5141 * Note that this function takes the reference to the hash table view of the
5142 * sorted set, in order to remove the elements from the hash table too. */
5143 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5144 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5145 unsigned long removed
= 0;
5149 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5150 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5154 /* We may have multiple elements with the same score, what we need
5155 * is to find the element with both the right score and object. */
5157 while (x
&& x
->score
<= max
) {
5158 zskiplistNode
*next
= x
->forward
[0];
5159 zslDeleteNode(zsl
, x
, update
);
5160 dictDelete(dict
,x
->obj
);
5165 return removed
; /* not found */
5168 /* Delete all the elements with rank between start and end from the skiplist.
5169 * Start and end are inclusive. Note that start and end need to be 1-based */
5170 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5171 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5172 unsigned long traversed
= 0, removed
= 0;
5176 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5177 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5178 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5186 while (x
&& traversed
<= end
) {
5187 zskiplistNode
*next
= x
->forward
[0];
5188 zslDeleteNode(zsl
, x
, update
);
5189 dictDelete(dict
,x
->obj
);
5198 /* Find the first node having a score equal or greater than the specified one.
5199 * Returns NULL if there is no match. */
5200 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5205 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5206 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5209 /* We may have multiple elements with the same score, what we need
5210 * is to find the element with both the right score and object. */
5211 return x
->forward
[0];
5214 /* Find the rank for an element by both score and key.
5215 * Returns 0 when the element cannot be found, rank otherwise.
5216 * Note that the rank is 1-based due to the span of zsl->header to the
5218 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5220 unsigned long rank
= 0;
5224 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5225 while (x
->forward
[i
] &&
5226 (x
->forward
[i
]->score
< score
||
5227 (x
->forward
[i
]->score
== score
&&
5228 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5229 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5233 /* x might be equal to zsl->header, so test if obj is non-NULL */
5234 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5241 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5242 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5244 unsigned long traversed
= 0;
5248 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5249 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5251 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5254 if (traversed
== rank
) {
5261 /* The actual Z-commands implementations */
5263 /* This generic command implements both ZADD and ZINCRBY.
5264 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5265 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5266 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5271 zsetobj
= lookupKeyWrite(c
->db
,key
);
5272 if (zsetobj
== NULL
) {
5273 zsetobj
= createZsetObject();
5274 dictAdd(c
->db
->dict
,key
,zsetobj
);
5277 if (zsetobj
->type
!= REDIS_ZSET
) {
5278 addReply(c
,shared
.wrongtypeerr
);
5284 /* Ok now since we implement both ZADD and ZINCRBY here the code
5285 * needs to handle the two different conditions. It's all about setting
5286 * '*score', that is, the new score to set, to the right value. */
5287 score
= zmalloc(sizeof(double));
5291 /* Read the old score. If the element was not present starts from 0 */
5292 de
= dictFind(zs
->dict
,ele
);
5294 double *oldscore
= dictGetEntryVal(de
);
5295 *score
= *oldscore
+ scoreval
;
5303 /* What follows is a simple remove and re-insert operation that is common
5304 * to both ZADD and ZINCRBY... */
5305 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5306 /* case 1: New element */
5307 incrRefCount(ele
); /* added to hash */
5308 zslInsert(zs
->zsl
,*score
,ele
);
5309 incrRefCount(ele
); /* added to skiplist */
5312 addReplyDouble(c
,*score
);
5314 addReply(c
,shared
.cone
);
5319 /* case 2: Score update operation */
5320 de
= dictFind(zs
->dict
,ele
);
5321 redisAssert(de
!= NULL
);
5322 oldscore
= dictGetEntryVal(de
);
5323 if (*score
!= *oldscore
) {
5326 /* Remove and insert the element in the skip list with new score */
5327 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5328 redisAssert(deleted
!= 0);
5329 zslInsert(zs
->zsl
,*score
,ele
);
5331 /* Update the score in the hash table */
5332 dictReplace(zs
->dict
,ele
,score
);
5338 addReplyDouble(c
,*score
);
5340 addReply(c
,shared
.czero
);
5344 static void zaddCommand(redisClient
*c
) {
5347 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5348 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5351 static void zincrbyCommand(redisClient
*c
) {
5354 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5355 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5358 static void zremCommand(redisClient
*c
) {
5365 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5366 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5369 de
= dictFind(zs
->dict
,c
->argv
[2]);
5371 addReply(c
,shared
.czero
);
5374 /* Delete from the skiplist */
5375 oldscore
= dictGetEntryVal(de
);
5376 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5377 redisAssert(deleted
!= 0);
5379 /* Delete from the hash table */
5380 dictDelete(zs
->dict
,c
->argv
[2]);
5381 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5382 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5384 addReply(c
,shared
.cone
);
5387 static void zremrangebyscoreCommand(redisClient
*c
) {
5388 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5389 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5394 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5395 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5398 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5399 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5400 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5401 server
.dirty
+= deleted
;
5402 addReplyLong(c
,deleted
);
5405 static void zremrangebyrankCommand(redisClient
*c
) {
5406 int start
= atoi(c
->argv
[2]->ptr
);
5407 int end
= atoi(c
->argv
[3]->ptr
);
5413 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5414 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5416 llen
= zs
->zsl
->length
;
5418 /* convert negative indexes */
5419 if (start
< 0) start
= llen
+start
;
5420 if (end
< 0) end
= llen
+end
;
5421 if (start
< 0) start
= 0;
5422 if (end
< 0) end
= 0;
5424 /* indexes sanity checks */
5425 if (start
> end
|| start
>= llen
) {
5426 addReply(c
,shared
.czero
);
5429 if (end
>= llen
) end
= llen
-1;
5431 /* increment start and end because zsl*Rank functions
5432 * use 1-based rank */
5433 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5434 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5435 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5436 server
.dirty
+= deleted
;
5437 addReplyLong(c
, deleted
);
5445 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5446 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5447 unsigned long size1
, size2
;
5448 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5449 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5450 return size1
- size2
;
5453 #define REDIS_AGGR_SUM 1
5454 #define REDIS_AGGR_MIN 2
5455 #define REDIS_AGGR_MAX 3
5457 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5458 if (aggregate
== REDIS_AGGR_SUM
) {
5459 *target
= *target
+ val
;
5460 } else if (aggregate
== REDIS_AGGR_MIN
) {
5461 *target
= val
< *target
? val
: *target
;
5462 } else if (aggregate
== REDIS_AGGR_MAX
) {
5463 *target
= val
> *target
? val
: *target
;
5466 redisAssert(0 != 0);
5470 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5472 int aggregate
= REDIS_AGGR_SUM
;
5479 /* expect zsetnum input keys to be given */
5480 zsetnum
= atoi(c
->argv
[2]->ptr
);
5482 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5486 /* test if the expected number of keys would overflow */
5487 if (3+zsetnum
> c
->argc
) {
5488 addReply(c
,shared
.syntaxerr
);
5492 /* read keys to be used for input */
5493 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5494 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5495 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5499 if (zsetobj
->type
!= REDIS_ZSET
) {
5501 addReply(c
,shared
.wrongtypeerr
);
5504 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5507 /* default all weights to 1 */
5508 src
[i
].weight
= 1.0;
5511 /* parse optional extra arguments */
5513 int remaining
= c
->argc
- j
;
5516 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5518 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5519 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5521 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5523 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5524 aggregate
= REDIS_AGGR_SUM
;
5525 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5526 aggregate
= REDIS_AGGR_MIN
;
5527 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5528 aggregate
= REDIS_AGGR_MAX
;
5531 addReply(c
,shared
.syntaxerr
);
5537 addReply(c
,shared
.syntaxerr
);
5543 /* sort sets from the smallest to largest, this will improve our
5544 * algorithm's performance */
5545 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5547 dstobj
= createZsetObject();
5548 dstzset
= dstobj
->ptr
;
5550 if (op
== REDIS_OP_INTER
) {
5551 /* skip going over all entries if the smallest zset is NULL or empty */
5552 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5553 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5554 * from small to large, all src[i > 0].dict are non-empty too */
5555 di
= dictGetIterator(src
[0].dict
);
5556 while((de
= dictNext(di
)) != NULL
) {
5557 double *score
= zmalloc(sizeof(double)), value
;
5558 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5560 for (j
= 1; j
< zsetnum
; j
++) {
5561 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5563 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5564 zunionInterAggregate(score
, value
, aggregate
);
5570 /* skip entry when not present in every source dict */
5574 robj
*o
= dictGetEntryKey(de
);
5575 dictAdd(dstzset
->dict
,o
,score
);
5576 incrRefCount(o
); /* added to dictionary */
5577 zslInsert(dstzset
->zsl
,*score
,o
);
5578 incrRefCount(o
); /* added to skiplist */
5581 dictReleaseIterator(di
);
5583 } else if (op
== REDIS_OP_UNION
) {
5584 for (i
= 0; i
< zsetnum
; i
++) {
5585 if (!src
[i
].dict
) continue;
5587 di
= dictGetIterator(src
[i
].dict
);
5588 while((de
= dictNext(di
)) != NULL
) {
5589 /* skip key when already processed */
5590 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5592 double *score
= zmalloc(sizeof(double)), value
;
5593 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5595 /* because the zsets are sorted by size, its only possible
5596 * for sets at larger indices to hold this entry */
5597 for (j
= (i
+1); j
< zsetnum
; j
++) {
5598 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5600 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5601 zunionInterAggregate(score
, value
, aggregate
);
5605 robj
*o
= dictGetEntryKey(de
);
5606 dictAdd(dstzset
->dict
,o
,score
);
5607 incrRefCount(o
); /* added to dictionary */
5608 zslInsert(dstzset
->zsl
,*score
,o
);
5609 incrRefCount(o
); /* added to skiplist */
5611 dictReleaseIterator(di
);
5614 /* unknown operator */
5615 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5618 deleteKey(c
->db
,dstkey
);
5619 if (dstzset
->zsl
->length
) {
5620 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5621 incrRefCount(dstkey
);
5622 addReplyLong(c
, dstzset
->zsl
->length
);
5625 decrRefCount(dstzset
);
5626 addReply(c
, shared
.czero
);
5631 static void zunionCommand(redisClient
*c
) {
5632 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5635 static void zinterCommand(redisClient
*c
) {
5636 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5639 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5641 int start
= atoi(c
->argv
[2]->ptr
);
5642 int end
= atoi(c
->argv
[3]->ptr
);
5651 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5653 } else if (c
->argc
>= 5) {
5654 addReply(c
,shared
.syntaxerr
);
5658 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5659 checkType(c
,o
,REDIS_ZSET
)) return;
5664 /* convert negative indexes */
5665 if (start
< 0) start
= llen
+start
;
5666 if (end
< 0) end
= llen
+end
;
5667 if (start
< 0) start
= 0;
5668 if (end
< 0) end
= 0;
5670 /* indexes sanity checks */
5671 if (start
> end
|| start
>= llen
) {
5672 /* Out of range start or start > end result in empty list */
5673 addReply(c
,shared
.emptymultibulk
);
5676 if (end
>= llen
) end
= llen
-1;
5677 rangelen
= (end
-start
)+1;
5679 /* check if starting point is trivial, before searching
5680 * the element in log(N) time */
5682 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5685 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5688 /* Return the result in form of a multi-bulk reply */
5689 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5690 withscores
? (rangelen
*2) : rangelen
));
5691 for (j
= 0; j
< rangelen
; j
++) {
5693 addReplyBulk(c
,ele
);
5695 addReplyDouble(c
,ln
->score
);
5696 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5700 static void zrangeCommand(redisClient
*c
) {
5701 zrangeGenericCommand(c
,0);
5704 static void zrevrangeCommand(redisClient
*c
) {
5705 zrangeGenericCommand(c
,1);
5708 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5709 * If justcount is non-zero, just the count is returned. */
5710 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5713 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5714 int offset
= 0, limit
= -1;
5718 /* Parse the min-max interval. If one of the values is prefixed
5719 * by the "(" character, it's considered "open". For instance
5720 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5721 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5722 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5723 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5726 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5728 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5729 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5732 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5735 /* Parse "WITHSCORES": note that if the command was called with
5736 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5737 * enter the following paths to parse WITHSCORES and LIMIT. */
5738 if (c
->argc
== 5 || c
->argc
== 8) {
5739 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5744 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5748 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5753 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5754 addReply(c
,shared
.syntaxerr
);
5756 } else if (c
->argc
== (7 + withscores
)) {
5757 offset
= atoi(c
->argv
[5]->ptr
);
5758 limit
= atoi(c
->argv
[6]->ptr
);
5759 if (offset
< 0) offset
= 0;
5762 /* Ok, lookup the key and get the range */
5763 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5765 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5767 if (o
->type
!= REDIS_ZSET
) {
5768 addReply(c
,shared
.wrongtypeerr
);
5770 zset
*zsetobj
= o
->ptr
;
5771 zskiplist
*zsl
= zsetobj
->zsl
;
5773 robj
*ele
, *lenobj
= NULL
;
5774 unsigned long rangelen
= 0;
5776 /* Get the first node with the score >= min, or with
5777 * score > min if 'minex' is true. */
5778 ln
= zslFirstWithScore(zsl
,min
);
5779 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5782 /* No element matching the speciifed interval */
5783 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5787 /* We don't know in advance how many matching elements there
5788 * are in the list, so we push this object that will represent
5789 * the multi-bulk length in the output buffer, and will "fix"
5792 lenobj
= createObject(REDIS_STRING
,NULL
);
5794 decrRefCount(lenobj
);
5797 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5800 ln
= ln
->forward
[0];
5803 if (limit
== 0) break;
5806 addReplyBulk(c
,ele
);
5808 addReplyDouble(c
,ln
->score
);
5810 ln
= ln
->forward
[0];
5812 if (limit
> 0) limit
--;
5815 addReplyLong(c
,(long)rangelen
);
5817 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5818 withscores
? (rangelen
*2) : rangelen
);
5824 static void zrangebyscoreCommand(redisClient
*c
) {
5825 genericZrangebyscoreCommand(c
,0);
5828 static void zcountCommand(redisClient
*c
) {
5829 genericZrangebyscoreCommand(c
,1);
5832 static void zcardCommand(redisClient
*c
) {
5836 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5837 checkType(c
,o
,REDIS_ZSET
)) return;
5840 addReplyUlong(c
,zs
->zsl
->length
);
5843 static void zscoreCommand(redisClient
*c
) {
5848 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5849 checkType(c
,o
,REDIS_ZSET
)) return;
5852 de
= dictFind(zs
->dict
,c
->argv
[2]);
5854 addReply(c
,shared
.nullbulk
);
5856 double *score
= dictGetEntryVal(de
);
5858 addReplyDouble(c
,*score
);
5862 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5870 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5871 checkType(c
,o
,REDIS_ZSET
)) return;
5875 de
= dictFind(zs
->dict
,c
->argv
[2]);
5877 addReply(c
,shared
.nullbulk
);
5881 score
= dictGetEntryVal(de
);
5882 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5885 addReplyLong(c
, zsl
->length
- rank
);
5887 addReplyLong(c
, rank
-1);
5890 addReply(c
,shared
.nullbulk
);
5894 static void zrankCommand(redisClient
*c
) {
5895 zrankGenericCommand(c
, 0);
5898 static void zrevrankCommand(redisClient
*c
) {
5899 zrankGenericCommand(c
, 1);
5902 /* =================================== Hashes =============================== */
5903 static void hsetCommand(redisClient
*c
) {
5905 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5908 o
= createHashObject();
5909 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5910 incrRefCount(c
->argv
[1]);
5912 if (o
->type
!= REDIS_HASH
) {
5913 addReply(c
,shared
.wrongtypeerr
);
5917 /* We want to convert the zipmap into an hash table right now if the
5918 * entry to be added is too big. Note that we check if the object
5919 * is integer encoded before to try fetching the length in the test below.
5920 * This is because integers are small, but currently stringObjectLen()
5921 * performs a slow conversion: not worth it. */
5922 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5923 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5924 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5925 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5926 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5928 convertToRealHash(o
);
5931 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5932 unsigned char *zm
= o
->ptr
;
5933 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5935 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5936 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5937 decrRefCount(valobj
);
5940 /* And here there is the second check for hash conversion...
5941 * we want to do it only if the operation was not just an update as
5942 * zipmapLen() is O(N). */
5943 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5944 convertToRealHash(o
);
5946 tryObjectEncoding(c
->argv
[2]);
5947 /* note that c->argv[3] is already encoded, as the latest arg
5948 * of a bulk command is always integer encoded if possible. */
5949 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5950 incrRefCount(c
->argv
[2]);
5954 incrRefCount(c
->argv
[3]);
5957 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5960 static void hincrbyCommand(redisClient
*c
) {
5962 long long value
= 0, incr
= 0;
5963 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5966 o
= createHashObject();
5967 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5968 incrRefCount(c
->argv
[1]);
5970 if (o
->type
!= REDIS_HASH
) {
5971 addReply(c
,shared
.wrongtypeerr
);
5976 robj
*o_incr
= getDecodedObject(c
->argv
[3]);
5977 incr
= strtoll(o_incr
->ptr
, NULL
, 10);
5978 decrRefCount(o_incr
);
5980 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5981 unsigned char *zm
= o
->ptr
;
5982 unsigned char *zval
;
5985 /* Find value if already present in hash */
5986 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5988 /* strtoll needs the char* to have a trailing \0, but
5989 * the zipmap doesn't include them. */
5990 sds szval
= sdsnewlen(zval
, zvlen
);
5991 value
= strtoll(szval
,NULL
,10);
5996 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
5997 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5998 (unsigned char*)svalue
,sdslen(svalue
),&update
);
6002 /* Check if the zipmap needs to be converted
6003 * if this was not an update. */
6004 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6005 convertToRealHash(o
);
6010 /* Find value if already present in hash */
6011 de
= dictFind(o
->ptr
,c
->argv
[2]);
6013 hval
= dictGetEntryVal(de
);
6014 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6015 value
= strtoll(hval
->ptr
,NULL
,10);
6016 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6017 value
= (long)hval
->ptr
;
6019 redisAssert(1 != 1);
6023 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6024 tryObjectEncoding(hval
);
6025 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6026 incrRefCount(c
->argv
[2]);
6031 addReplyLong(c
, value
);
6034 static void hgetCommand(redisClient
*c
) {
6037 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6038 checkType(c
,o
,REDIS_HASH
)) return;
6040 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6041 unsigned char *zm
= o
->ptr
;
6046 field
= getDecodedObject(c
->argv
[2]);
6047 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6048 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6049 addReplySds(c
,sdsnewlen(val
,vlen
));
6050 addReply(c
,shared
.crlf
);
6051 decrRefCount(field
);
6054 addReply(c
,shared
.nullbulk
);
6055 decrRefCount(field
);
6059 struct dictEntry
*de
;
6061 de
= dictFind(o
->ptr
,c
->argv
[2]);
6063 addReply(c
,shared
.nullbulk
);
6065 robj
*e
= dictGetEntryVal(de
);
6072 static void hdelCommand(redisClient
*c
) {
6076 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6077 checkType(c
,o
,REDIS_HASH
)) return;
6079 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6080 robj
*field
= getDecodedObject(c
->argv
[2]);
6082 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6083 (unsigned char*) field
->ptr
,
6084 sdslen(field
->ptr
), &deleted
);
6085 decrRefCount(field
);
6086 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6087 deleteKey(c
->db
,c
->argv
[1]);
6089 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6090 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6091 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6093 if (deleted
) server
.dirty
++;
6094 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6097 static void hlenCommand(redisClient
*c
) {
6101 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6102 checkType(c
,o
,REDIS_HASH
)) return;
6104 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6105 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6106 addReplyUlong(c
,len
);
6109 #define REDIS_GETALL_KEYS 1
6110 #define REDIS_GETALL_VALS 2
6111 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6113 unsigned long count
= 0;
6115 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6116 || checkType(c
,o
,REDIS_HASH
)) return;
6118 lenobj
= createObject(REDIS_STRING
,NULL
);
6120 decrRefCount(lenobj
);
6122 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6123 unsigned char *p
= zipmapRewind(o
->ptr
);
6124 unsigned char *field
, *val
;
6125 unsigned int flen
, vlen
;
6127 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6130 if (flags
& REDIS_GETALL_KEYS
) {
6131 aux
= createStringObject((char*)field
,flen
);
6132 addReplyBulk(c
,aux
);
6136 if (flags
& REDIS_GETALL_VALS
) {
6137 aux
= createStringObject((char*)val
,vlen
);
6138 addReplyBulk(c
,aux
);
6144 dictIterator
*di
= dictGetIterator(o
->ptr
);
6147 while((de
= dictNext(di
)) != NULL
) {
6148 robj
*fieldobj
= dictGetEntryKey(de
);
6149 robj
*valobj
= dictGetEntryVal(de
);
6151 if (flags
& REDIS_GETALL_KEYS
) {
6152 addReplyBulk(c
,fieldobj
);
6155 if (flags
& REDIS_GETALL_VALS
) {
6156 addReplyBulk(c
,valobj
);
6160 dictReleaseIterator(di
);
6162 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6165 static void hkeysCommand(redisClient
*c
) {
6166 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6169 static void hvalsCommand(redisClient
*c
) {
6170 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6173 static void hgetallCommand(redisClient
*c
) {
6174 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6177 static void hexistsCommand(redisClient
*c
) {
6181 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6182 checkType(c
,o
,REDIS_HASH
)) return;
6184 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6186 unsigned char *zm
= o
->ptr
;
6188 field
= getDecodedObject(c
->argv
[2]);
6189 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6190 decrRefCount(field
);
6192 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6194 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6197 static void convertToRealHash(robj
*o
) {
6198 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6199 unsigned int klen
, vlen
;
6200 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6202 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6203 p
= zipmapRewind(zm
);
6204 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6205 robj
*keyobj
, *valobj
;
6207 keyobj
= createStringObject((char*)key
,klen
);
6208 valobj
= createStringObject((char*)val
,vlen
);
6209 tryObjectEncoding(keyobj
);
6210 tryObjectEncoding(valobj
);
6211 dictAdd(dict
,keyobj
,valobj
);
6213 o
->encoding
= REDIS_ENCODING_HT
;
6218 /* ========================= Non type-specific commands ==================== */
6220 static void flushdbCommand(redisClient
*c
) {
6221 server
.dirty
+= dictSize(c
->db
->dict
);
6222 dictEmpty(c
->db
->dict
);
6223 dictEmpty(c
->db
->expires
);
6224 addReply(c
,shared
.ok
);
6227 static void flushallCommand(redisClient
*c
) {
6228 server
.dirty
+= emptyDb();
6229 addReply(c
,shared
.ok
);
6230 if (server
.bgsavechildpid
!= -1) {
6231 kill(server
.bgsavechildpid
,SIGKILL
);
6232 rdbRemoveTempFile(server
.bgsavechildpid
);
6234 rdbSave(server
.dbfilename
);
6238 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6239 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6241 so
->pattern
= pattern
;
6245 /* Return the value associated to the key with a name obtained
6246 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6247 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6251 int prefixlen
, sublen
, postfixlen
;
6252 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6256 char buf
[REDIS_SORTKEY_MAX
+1];
6259 /* If the pattern is "#" return the substitution object itself in order
6260 * to implement the "SORT ... GET #" feature. */
6261 spat
= pattern
->ptr
;
6262 if (spat
[0] == '#' && spat
[1] == '\0') {
6266 /* The substitution object may be specially encoded. If so we create
6267 * a decoded object on the fly. Otherwise getDecodedObject will just
6268 * increment the ref count, that we'll decrement later. */
6269 subst
= getDecodedObject(subst
);
6272 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6273 p
= strchr(spat
,'*');
6275 decrRefCount(subst
);
6280 sublen
= sdslen(ssub
);
6281 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6282 memcpy(keyname
.buf
,spat
,prefixlen
);
6283 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6284 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6285 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6286 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6288 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6289 decrRefCount(subst
);
6291 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6292 return lookupKeyRead(db
,&keyobj
);
6295 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6296 * the additional parameter is not standard but a BSD-specific we have to
6297 * pass sorting parameters via the global 'server' structure */
6298 static int sortCompare(const void *s1
, const void *s2
) {
6299 const redisSortObject
*so1
= s1
, *so2
= s2
;
6302 if (!server
.sort_alpha
) {
6303 /* Numeric sorting. Here it's trivial as we precomputed scores */
6304 if (so1
->u
.score
> so2
->u
.score
) {
6306 } else if (so1
->u
.score
< so2
->u
.score
) {
6312 /* Alphanumeric sorting */
6313 if (server
.sort_bypattern
) {
6314 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6315 /* At least one compare object is NULL */
6316 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6318 else if (so1
->u
.cmpobj
== NULL
)
6323 /* We have both the objects, use strcoll */
6324 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6327 /* Compare elements directly */
6330 dec1
= getDecodedObject(so1
->obj
);
6331 dec2
= getDecodedObject(so2
->obj
);
6332 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6337 return server
.sort_desc
? -cmp
: cmp
;
6340 /* The SORT command is the most complex command in Redis. Warning: this code
6341 * is optimized for speed and a bit less for readability */
6342 static void sortCommand(redisClient
*c
) {
6345 int desc
= 0, alpha
= 0;
6346 int limit_start
= 0, limit_count
= -1, start
, end
;
6347 int j
, dontsort
= 0, vectorlen
;
6348 int getop
= 0; /* GET operation counter */
6349 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6350 redisSortObject
*vector
; /* Resulting vector to sort */
6352 /* Lookup the key to sort. It must be of the right types */
6353 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6354 if (sortval
== NULL
) {
6355 addReply(c
,shared
.nullmultibulk
);
6358 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6359 sortval
->type
!= REDIS_ZSET
)
6361 addReply(c
,shared
.wrongtypeerr
);
6365 /* Create a list of operations to perform for every sorted element.
6366 * Operations can be GET/DEL/INCR/DECR */
6367 operations
= listCreate();
6368 listSetFreeMethod(operations
,zfree
);
6371 /* Now we need to protect sortval incrementing its count, in the future
6372 * SORT may have options able to overwrite/delete keys during the sorting
6373 * and the sorted key itself may get destroied */
6374 incrRefCount(sortval
);
6376 /* The SORT command has an SQL-alike syntax, parse it */
6377 while(j
< c
->argc
) {
6378 int leftargs
= c
->argc
-j
-1;
6379 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6381 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6383 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6385 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6386 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6387 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6389 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6390 storekey
= c
->argv
[j
+1];
6392 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6393 sortby
= c
->argv
[j
+1];
6394 /* If the BY pattern does not contain '*', i.e. it is constant,
6395 * we don't need to sort nor to lookup the weight keys. */
6396 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6398 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6399 listAddNodeTail(operations
,createSortOperation(
6400 REDIS_SORT_GET
,c
->argv
[j
+1]));
6404 decrRefCount(sortval
);
6405 listRelease(operations
);
6406 addReply(c
,shared
.syntaxerr
);
6412 /* Load the sorting vector with all the objects to sort */
6413 switch(sortval
->type
) {
6414 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6415 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6416 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6417 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6419 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6422 if (sortval
->type
== REDIS_LIST
) {
6423 list
*list
= sortval
->ptr
;
6427 listRewind(list
,&li
);
6428 while((ln
= listNext(&li
))) {
6429 robj
*ele
= ln
->value
;
6430 vector
[j
].obj
= ele
;
6431 vector
[j
].u
.score
= 0;
6432 vector
[j
].u
.cmpobj
= NULL
;
6440 if (sortval
->type
== REDIS_SET
) {
6443 zset
*zs
= sortval
->ptr
;
6447 di
= dictGetIterator(set
);
6448 while((setele
= dictNext(di
)) != NULL
) {
6449 vector
[j
].obj
= dictGetEntryKey(setele
);
6450 vector
[j
].u
.score
= 0;
6451 vector
[j
].u
.cmpobj
= NULL
;
6454 dictReleaseIterator(di
);
6456 redisAssert(j
== vectorlen
);
6458 /* Now it's time to load the right scores in the sorting vector */
6459 if (dontsort
== 0) {
6460 for (j
= 0; j
< vectorlen
; j
++) {
6464 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6465 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6467 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6469 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6470 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6472 /* Don't need to decode the object if it's
6473 * integer-encoded (the only encoding supported) so
6474 * far. We can just cast it */
6475 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6476 vector
[j
].u
.score
= (long)byval
->ptr
;
6478 redisAssert(1 != 1);
6483 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6484 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6486 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6487 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6489 redisAssert(1 != 1);
6496 /* We are ready to sort the vector... perform a bit of sanity check
6497 * on the LIMIT option too. We'll use a partial version of quicksort. */
6498 start
= (limit_start
< 0) ? 0 : limit_start
;
6499 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6500 if (start
>= vectorlen
) {
6501 start
= vectorlen
-1;
6504 if (end
>= vectorlen
) end
= vectorlen
-1;
6506 if (dontsort
== 0) {
6507 server
.sort_desc
= desc
;
6508 server
.sort_alpha
= alpha
;
6509 server
.sort_bypattern
= sortby
? 1 : 0;
6510 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6511 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6513 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6516 /* Send command output to the output buffer, performing the specified
6517 * GET/DEL/INCR/DECR operations if any. */
6518 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6519 if (storekey
== NULL
) {
6520 /* STORE option not specified, sent the sorting result to client */
6521 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6522 for (j
= start
; j
<= end
; j
++) {
6526 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6527 listRewind(operations
,&li
);
6528 while((ln
= listNext(&li
))) {
6529 redisSortOperation
*sop
= ln
->value
;
6530 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6533 if (sop
->type
== REDIS_SORT_GET
) {
6534 if (!val
|| val
->type
!= REDIS_STRING
) {
6535 addReply(c
,shared
.nullbulk
);
6537 addReplyBulk(c
,val
);
6540 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6545 robj
*listObject
= createListObject();
6546 list
*listPtr
= (list
*) listObject
->ptr
;
6548 /* STORE option specified, set the sorting result as a List object */
6549 for (j
= start
; j
<= end
; j
++) {
6554 listAddNodeTail(listPtr
,vector
[j
].obj
);
6555 incrRefCount(vector
[j
].obj
);
6557 listRewind(operations
,&li
);
6558 while((ln
= listNext(&li
))) {
6559 redisSortOperation
*sop
= ln
->value
;
6560 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6563 if (sop
->type
== REDIS_SORT_GET
) {
6564 if (!val
|| val
->type
!= REDIS_STRING
) {
6565 listAddNodeTail(listPtr
,createStringObject("",0));
6567 listAddNodeTail(listPtr
,val
);
6571 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6575 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6576 incrRefCount(storekey
);
6578 /* Note: we add 1 because the DB is dirty anyway since even if the
6579 * SORT result is empty a new key is set and maybe the old content
6581 server
.dirty
+= 1+outputlen
;
6582 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6586 decrRefCount(sortval
);
6587 listRelease(operations
);
6588 for (j
= 0; j
< vectorlen
; j
++) {
6589 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6590 decrRefCount(vector
[j
].u
.cmpobj
);
6595 /* Convert an amount of bytes into a human readable string in the form
6596 * of 100B, 2G, 100M, 4K, and so forth. */
6597 static void bytesToHuman(char *s
, unsigned long long n
) {
6602 sprintf(s
,"%lluB",n
);
6604 } else if (n
< (1024*1024)) {
6605 d
= (double)n
/(1024);
6606 sprintf(s
,"%.2fK",d
);
6607 } else if (n
< (1024LL*1024*1024)) {
6608 d
= (double)n
/(1024*1024);
6609 sprintf(s
,"%.2fM",d
);
6610 } else if (n
< (1024LL*1024*1024*1024)) {
6611 d
= (double)n
/(1024LL*1024*1024);
6612 sprintf(s
,"%.2fG",d
);
6616 /* Create the string returned by the INFO command. This is decoupled
6617 * by the INFO command itself as we need to report the same information
6618 * on memory corruption problems. */
6619 static sds
genRedisInfoString(void) {
6621 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6625 bytesToHuman(hmem
,zmalloc_used_memory());
6626 info
= sdscatprintf(sdsempty(),
6627 "redis_version:%s\r\n"
6629 "multiplexing_api:%s\r\n"
6630 "process_id:%ld\r\n"
6631 "uptime_in_seconds:%ld\r\n"
6632 "uptime_in_days:%ld\r\n"
6633 "connected_clients:%d\r\n"
6634 "connected_slaves:%d\r\n"
6635 "blocked_clients:%d\r\n"
6636 "used_memory:%zu\r\n"
6637 "used_memory_human:%s\r\n"
6638 "changes_since_last_save:%lld\r\n"
6639 "bgsave_in_progress:%d\r\n"
6640 "last_save_time:%ld\r\n"
6641 "bgrewriteaof_in_progress:%d\r\n"
6642 "total_connections_received:%lld\r\n"
6643 "total_commands_processed:%lld\r\n"
6644 "expired_keys:%lld\r\n"
6645 "hash_max_zipmap_entries:%ld\r\n"
6646 "hash_max_zipmap_value:%ld\r\n"
6650 (sizeof(long) == 8) ? "64" : "32",
6655 listLength(server
.clients
)-listLength(server
.slaves
),
6656 listLength(server
.slaves
),
6657 server
.blpop_blocked_clients
,
6658 zmalloc_used_memory(),
6661 server
.bgsavechildpid
!= -1,
6663 server
.bgrewritechildpid
!= -1,
6664 server
.stat_numconnections
,
6665 server
.stat_numcommands
,
6666 server
.stat_expiredkeys
,
6667 server
.hash_max_zipmap_entries
,
6668 server
.hash_max_zipmap_value
,
6669 server
.vm_enabled
!= 0,
6670 server
.masterhost
== NULL
? "master" : "slave"
6672 if (server
.masterhost
) {
6673 info
= sdscatprintf(info
,
6674 "master_host:%s\r\n"
6675 "master_port:%d\r\n"
6676 "master_link_status:%s\r\n"
6677 "master_last_io_seconds_ago:%d\r\n"
6680 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6682 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6685 if (server
.vm_enabled
) {
6687 info
= sdscatprintf(info
,
6688 "vm_conf_max_memory:%llu\r\n"
6689 "vm_conf_page_size:%llu\r\n"
6690 "vm_conf_pages:%llu\r\n"
6691 "vm_stats_used_pages:%llu\r\n"
6692 "vm_stats_swapped_objects:%llu\r\n"
6693 "vm_stats_swappin_count:%llu\r\n"
6694 "vm_stats_swappout_count:%llu\r\n"
6695 "vm_stats_io_newjobs_len:%lu\r\n"
6696 "vm_stats_io_processing_len:%lu\r\n"
6697 "vm_stats_io_processed_len:%lu\r\n"
6698 "vm_stats_io_active_threads:%lu\r\n"
6699 "vm_stats_blocked_clients:%lu\r\n"
6700 ,(unsigned long long) server
.vm_max_memory
,
6701 (unsigned long long) server
.vm_page_size
,
6702 (unsigned long long) server
.vm_pages
,
6703 (unsigned long long) server
.vm_stats_used_pages
,
6704 (unsigned long long) server
.vm_stats_swapped_objects
,
6705 (unsigned long long) server
.vm_stats_swapins
,
6706 (unsigned long long) server
.vm_stats_swapouts
,
6707 (unsigned long) listLength(server
.io_newjobs
),
6708 (unsigned long) listLength(server
.io_processing
),
6709 (unsigned long) listLength(server
.io_processed
),
6710 (unsigned long) server
.io_active_threads
,
6711 (unsigned long) server
.vm_blocked_clients
6715 for (j
= 0; j
< server
.dbnum
; j
++) {
6716 long long keys
, vkeys
;
6718 keys
= dictSize(server
.db
[j
].dict
);
6719 vkeys
= dictSize(server
.db
[j
].expires
);
6720 if (keys
|| vkeys
) {
6721 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6728 static void infoCommand(redisClient
*c
) {
6729 sds info
= genRedisInfoString();
6730 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6731 (unsigned long)sdslen(info
)));
6732 addReplySds(c
,info
);
6733 addReply(c
,shared
.crlf
);
6736 static void monitorCommand(redisClient
*c
) {
6737 /* ignore MONITOR if aleady slave or in monitor mode */
6738 if (c
->flags
& REDIS_SLAVE
) return;
6740 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6742 listAddNodeTail(server
.monitors
,c
);
6743 addReply(c
,shared
.ok
);
6746 /* ================================= Expire ================================= */
6747 static int removeExpire(redisDb
*db
, robj
*key
) {
6748 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6755 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6756 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6764 /* Return the expire time of the specified key, or -1 if no expire
6765 * is associated with this key (i.e. the key is non volatile) */
6766 static time_t getExpire(redisDb
*db
, robj
*key
) {
6769 /* No expire? return ASAP */
6770 if (dictSize(db
->expires
) == 0 ||
6771 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6773 return (time_t) dictGetEntryVal(de
);
6776 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6780 /* No expire? return ASAP */
6781 if (dictSize(db
->expires
) == 0 ||
6782 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6784 /* Lookup the expire */
6785 when
= (time_t) dictGetEntryVal(de
);
6786 if (time(NULL
) <= when
) return 0;
6788 /* Delete the key */
6789 dictDelete(db
->expires
,key
);
6790 server
.stat_expiredkeys
++;
6791 return dictDelete(db
->dict
,key
) == DICT_OK
;
6794 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6797 /* No expire? return ASAP */
6798 if (dictSize(db
->expires
) == 0 ||
6799 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6801 /* Delete the key */
6803 server
.stat_expiredkeys
++;
6804 dictDelete(db
->expires
,key
);
6805 return dictDelete(db
->dict
,key
) == DICT_OK
;
6808 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6811 de
= dictFind(c
->db
->dict
,key
);
6813 addReply(c
,shared
.czero
);
6817 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6818 addReply(c
, shared
.cone
);
6821 time_t when
= time(NULL
)+seconds
;
6822 if (setExpire(c
->db
,key
,when
)) {
6823 addReply(c
,shared
.cone
);
6826 addReply(c
,shared
.czero
);
6832 static void expireCommand(redisClient
*c
) {
6833 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6836 static void expireatCommand(redisClient
*c
) {
6837 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6840 static void ttlCommand(redisClient
*c
) {
6844 expire
= getExpire(c
->db
,c
->argv
[1]);
6846 ttl
= (int) (expire
-time(NULL
));
6847 if (ttl
< 0) ttl
= -1;
6849 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6852 /* ================================ MULTI/EXEC ============================== */
6854 /* Client state initialization for MULTI/EXEC */
6855 static void initClientMultiState(redisClient
*c
) {
6856 c
->mstate
.commands
= NULL
;
6857 c
->mstate
.count
= 0;
6860 /* Release all the resources associated with MULTI/EXEC state */
6861 static void freeClientMultiState(redisClient
*c
) {
6864 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6866 multiCmd
*mc
= c
->mstate
.commands
+j
;
6868 for (i
= 0; i
< mc
->argc
; i
++)
6869 decrRefCount(mc
->argv
[i
]);
6872 zfree(c
->mstate
.commands
);
6875 /* Add a new command into the MULTI commands queue */
6876 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6880 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6881 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6882 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6885 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6886 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6887 for (j
= 0; j
< c
->argc
; j
++)
6888 incrRefCount(mc
->argv
[j
]);
6892 static void multiCommand(redisClient
*c
) {
6893 c
->flags
|= REDIS_MULTI
;
6894 addReply(c
,shared
.ok
);
6897 static void discardCommand(redisClient
*c
) {
6898 if (!(c
->flags
& REDIS_MULTI
)) {
6899 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6903 freeClientMultiState(c
);
6904 initClientMultiState(c
);
6905 c
->flags
&= (~REDIS_MULTI
);
6906 addReply(c
,shared
.ok
);
6909 static void execCommand(redisClient
*c
) {
6914 if (!(c
->flags
& REDIS_MULTI
)) {
6915 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6919 orig_argv
= c
->argv
;
6920 orig_argc
= c
->argc
;
6921 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6922 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6923 c
->argc
= c
->mstate
.commands
[j
].argc
;
6924 c
->argv
= c
->mstate
.commands
[j
].argv
;
6925 call(c
,c
->mstate
.commands
[j
].cmd
);
6927 c
->argv
= orig_argv
;
6928 c
->argc
= orig_argc
;
6929 freeClientMultiState(c
);
6930 initClientMultiState(c
);
6931 c
->flags
&= (~REDIS_MULTI
);
6934 /* =========================== Blocking Operations ========================= */
6936 /* Currently Redis blocking operations support is limited to list POP ops,
6937 * so the current implementation is not fully generic, but it is also not
6938 * completely specific so it will not require a rewrite to support new
6939 * kind of blocking operations in the future.
6941 * Still it's important to note that list blocking operations can be already
6942 * used as a notification mechanism in order to implement other blocking
6943 * operations at application level, so there must be a very strong evidence
6944 * of usefulness and generality before new blocking operations are implemented.
6946 * This is how the current blocking POP works, we use BLPOP as example:
6947 * - If the user calls BLPOP and the key exists and contains a non empty list
6948 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6949 * if there is not to block.
6950 * - If instead BLPOP is called and the key does not exists or the list is
6951 * empty we need to block. In order to do so we remove the notification for
6952 * new data to read in the client socket (so that we'll not serve new
6953 * requests if the blocking request is not served). Also we put the client
6954 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6955 * blocking for this keys.
6956 * - If a PUSH operation against a key with blocked clients waiting is
6957 * performed, we serve the first in the list: basically instead to push
6958 * the new element inside the list we return it to the (first / oldest)
6959 * blocking client, unblock the client, and remove it form the list.
6961 * The above comment and the source code should be enough in order to understand
6962 * the implementation and modify / fix it later.
6965 /* Set a client in blocking mode for the specified key, with the specified
6967 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6972 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6973 c
->blockingkeysnum
= numkeys
;
6974 c
->blockingto
= timeout
;
6975 for (j
= 0; j
< numkeys
; j
++) {
6976 /* Add the key in the client structure, to map clients -> keys */
6977 c
->blockingkeys
[j
] = keys
[j
];
6978 incrRefCount(keys
[j
]);
6980 /* And in the other "side", to map keys -> clients */
6981 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
6985 /* For every key we take a list of clients blocked for it */
6987 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
6988 incrRefCount(keys
[j
]);
6989 assert(retval
== DICT_OK
);
6991 l
= dictGetEntryVal(de
);
6993 listAddNodeTail(l
,c
);
6995 /* Mark the client as a blocked client */
6996 c
->flags
|= REDIS_BLOCKED
;
6997 server
.blpop_blocked_clients
++;
7000 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7001 static void unblockClientWaitingData(redisClient
*c
) {
7006 assert(c
->blockingkeys
!= NULL
);
7007 /* The client may wait for multiple keys, so unblock it for every key. */
7008 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7009 /* Remove this client from the list of clients waiting for this key. */
7010 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7012 l
= dictGetEntryVal(de
);
7013 listDelNode(l
,listSearchKey(l
,c
));
7014 /* If the list is empty we need to remove it to avoid wasting memory */
7015 if (listLength(l
) == 0)
7016 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7017 decrRefCount(c
->blockingkeys
[j
]);
7019 /* Cleanup the client structure */
7020 zfree(c
->blockingkeys
);
7021 c
->blockingkeys
= NULL
;
7022 c
->flags
&= (~REDIS_BLOCKED
);
7023 server
.blpop_blocked_clients
--;
7024 /* We want to process data if there is some command waiting
7025 * in the input buffer. Note that this is safe even if
7026 * unblockClientWaitingData() gets called from freeClient() because
7027 * freeClient() will be smart enough to call this function
7028 * *after* c->querybuf was set to NULL. */
7029 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7032 /* This should be called from any function PUSHing into lists.
7033 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7034 * 'ele' is the element pushed.
7036 * If the function returns 0 there was no client waiting for a list push
7039 * If the function returns 1 there was a client waiting for a list push
7040 * against this key, the element was passed to this client thus it's not
7041 * needed to actually add it to the list and the caller should return asap. */
7042 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7043 struct dictEntry
*de
;
7044 redisClient
*receiver
;
7048 de
= dictFind(c
->db
->blockingkeys
,key
);
7049 if (de
== NULL
) return 0;
7050 l
= dictGetEntryVal(de
);
7053 receiver
= ln
->value
;
7055 addReplySds(receiver
,sdsnew("*2\r\n"));
7056 addReplyBulk(receiver
,key
);
7057 addReplyBulk(receiver
,ele
);
7058 unblockClientWaitingData(receiver
);
7062 /* Blocking RPOP/LPOP */
7063 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7068 for (j
= 1; j
< c
->argc
-1; j
++) {
7069 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7071 if (o
->type
!= REDIS_LIST
) {
7072 addReply(c
,shared
.wrongtypeerr
);
7075 list
*list
= o
->ptr
;
7076 if (listLength(list
) != 0) {
7077 /* If the list contains elements fall back to the usual
7078 * non-blocking POP operation */
7079 robj
*argv
[2], **orig_argv
;
7082 /* We need to alter the command arguments before to call
7083 * popGenericCommand() as the command takes a single key. */
7084 orig_argv
= c
->argv
;
7085 orig_argc
= c
->argc
;
7086 argv
[1] = c
->argv
[j
];
7090 /* Also the return value is different, we need to output
7091 * the multi bulk reply header and the key name. The
7092 * "real" command will add the last element (the value)
7093 * for us. If this souds like an hack to you it's just
7094 * because it is... */
7095 addReplySds(c
,sdsnew("*2\r\n"));
7096 addReplyBulk(c
,argv
[1]);
7097 popGenericCommand(c
,where
);
7099 /* Fix the client structure with the original stuff */
7100 c
->argv
= orig_argv
;
7101 c
->argc
= orig_argc
;
7107 /* If the list is empty or the key does not exists we must block */
7108 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7109 if (timeout
> 0) timeout
+= time(NULL
);
7110 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7113 static void blpopCommand(redisClient
*c
) {
7114 blockingPopGenericCommand(c
,REDIS_HEAD
);
7117 static void brpopCommand(redisClient
*c
) {
7118 blockingPopGenericCommand(c
,REDIS_TAIL
);
7121 /* =============================== Replication ============================= */
7123 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7124 ssize_t nwritten
, ret
= size
;
7125 time_t start
= time(NULL
);
7129 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7130 nwritten
= write(fd
,ptr
,size
);
7131 if (nwritten
== -1) return -1;
7135 if ((time(NULL
)-start
) > timeout
) {
7143 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7144 ssize_t nread
, totread
= 0;
7145 time_t start
= time(NULL
);
7149 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7150 nread
= read(fd
,ptr
,size
);
7151 if (nread
== -1) return -1;
7156 if ((time(NULL
)-start
) > timeout
) {
7164 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7171 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7174 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7185 static void syncCommand(redisClient
*c
) {
7186 /* ignore SYNC if aleady slave or in monitor mode */
7187 if (c
->flags
& REDIS_SLAVE
) return;
7189 /* SYNC can't be issued when the server has pending data to send to
7190 * the client about already issued commands. We need a fresh reply
7191 * buffer registering the differences between the BGSAVE and the current
7192 * dataset, so that we can copy to other slaves if needed. */
7193 if (listLength(c
->reply
) != 0) {
7194 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7198 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7199 /* Here we need to check if there is a background saving operation
7200 * in progress, or if it is required to start one */
7201 if (server
.bgsavechildpid
!= -1) {
7202 /* Ok a background save is in progress. Let's check if it is a good
7203 * one for replication, i.e. if there is another slave that is
7204 * registering differences since the server forked to save */
7209 listRewind(server
.slaves
,&li
);
7210 while((ln
= listNext(&li
))) {
7212 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7215 /* Perfect, the server is already registering differences for
7216 * another slave. Set the right state, and copy the buffer. */
7217 listRelease(c
->reply
);
7218 c
->reply
= listDup(slave
->reply
);
7219 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7220 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7222 /* No way, we need to wait for the next BGSAVE in order to
7223 * register differences */
7224 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7225 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7228 /* Ok we don't have a BGSAVE in progress, let's start one */
7229 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7230 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7231 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7232 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7235 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7238 c
->flags
|= REDIS_SLAVE
;
7240 listAddNodeTail(server
.slaves
,c
);
7244 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7245 redisClient
*slave
= privdata
;
7247 REDIS_NOTUSED(mask
);
7248 char buf
[REDIS_IOBUF_LEN
];
7249 ssize_t nwritten
, buflen
;
7251 if (slave
->repldboff
== 0) {
7252 /* Write the bulk write count before to transfer the DB. In theory here
7253 * we don't know how much room there is in the output buffer of the
7254 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7255 * operations) will never be smaller than the few bytes we need. */
7258 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7260 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7268 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7269 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7271 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7272 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7276 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7277 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7282 slave
->repldboff
+= nwritten
;
7283 if (slave
->repldboff
== slave
->repldbsize
) {
7284 close(slave
->repldbfd
);
7285 slave
->repldbfd
= -1;
7286 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7287 slave
->replstate
= REDIS_REPL_ONLINE
;
7288 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7289 sendReplyToClient
, slave
) == AE_ERR
) {
7293 addReplySds(slave
,sdsempty());
7294 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7298 /* This function is called at the end of every backgrond saving.
7299 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7300 * otherwise REDIS_ERR is passed to the function.
7302 * The goal of this function is to handle slaves waiting for a successful
7303 * background saving in order to perform non-blocking synchronization. */
7304 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7306 int startbgsave
= 0;
7309 listRewind(server
.slaves
,&li
);
7310 while((ln
= listNext(&li
))) {
7311 redisClient
*slave
= ln
->value
;
7313 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7315 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7316 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7317 struct redis_stat buf
;
7319 if (bgsaveerr
!= REDIS_OK
) {
7321 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7324 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7325 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7327 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7330 slave
->repldboff
= 0;
7331 slave
->repldbsize
= buf
.st_size
;
7332 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7333 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7334 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7341 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7344 listRewind(server
.slaves
,&li
);
7345 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7346 while((ln
= listNext(&li
))) {
7347 redisClient
*slave
= ln
->value
;
7349 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7356 static int syncWithMaster(void) {
7357 char buf
[1024], tmpfile
[256], authcmd
[1024];
7359 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7360 int dfd
, maxtries
= 5;
7363 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7368 /* AUTH with the master if required. */
7369 if(server
.masterauth
) {
7370 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7371 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7373 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7377 /* Read the AUTH result. */
7378 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7380 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7384 if (buf
[0] != '+') {
7386 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7391 /* Issue the SYNC command */
7392 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7394 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7398 /* Read the bulk write count */
7399 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7401 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7405 if (buf
[0] != '$') {
7407 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7410 dumpsize
= strtol(buf
+1,NULL
,10);
7411 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7412 /* Read the bulk write data on a temp file */
7414 snprintf(tmpfile
,256,
7415 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7416 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7417 if (dfd
!= -1) break;
7422 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7426 int nread
, nwritten
;
7428 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7430 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7436 nwritten
= write(dfd
,buf
,nread
);
7437 if (nwritten
== -1) {
7438 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7446 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7447 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7453 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7454 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7458 server
.master
= createClient(fd
);
7459 server
.master
->flags
|= REDIS_MASTER
;
7460 server
.master
->authenticated
= 1;
7461 server
.replstate
= REDIS_REPL_CONNECTED
;
7465 static void slaveofCommand(redisClient
*c
) {
7466 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7467 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7468 if (server
.masterhost
) {
7469 sdsfree(server
.masterhost
);
7470 server
.masterhost
= NULL
;
7471 if (server
.master
) freeClient(server
.master
);
7472 server
.replstate
= REDIS_REPL_NONE
;
7473 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7476 sdsfree(server
.masterhost
);
7477 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7478 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7479 if (server
.master
) freeClient(server
.master
);
7480 server
.replstate
= REDIS_REPL_CONNECT
;
7481 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7482 server
.masterhost
, server
.masterport
);
7484 addReply(c
,shared
.ok
);
7487 /* ============================ Maxmemory directive ======================== */
7489 /* Try to free one object form the pre-allocated objects free list.
7490 * This is useful under low mem conditions as by default we take 1 million
7491 * free objects allocated. On success REDIS_OK is returned, otherwise
7493 static int tryFreeOneObjectFromFreelist(void) {
7496 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7497 if (listLength(server
.objfreelist
)) {
7498 listNode
*head
= listFirst(server
.objfreelist
);
7499 o
= listNodeValue(head
);
7500 listDelNode(server
.objfreelist
,head
);
7501 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7505 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7510 /* This function gets called when 'maxmemory' is set on the config file to limit
7511 * the max memory used by the server, and we are out of memory.
7512 * This function will try to, in order:
7514 * - Free objects from the free list
7515 * - Try to remove keys with an EXPIRE set
7517 * It is not possible to free enough memory to reach used-memory < maxmemory
7518 * the server will start refusing commands that will enlarge even more the
7521 static void freeMemoryIfNeeded(void) {
7522 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7523 int j
, k
, freed
= 0;
7525 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7526 for (j
= 0; j
< server
.dbnum
; j
++) {
7528 robj
*minkey
= NULL
;
7529 struct dictEntry
*de
;
7531 if (dictSize(server
.db
[j
].expires
)) {
7533 /* From a sample of three keys drop the one nearest to
7534 * the natural expire */
7535 for (k
= 0; k
< 3; k
++) {
7538 de
= dictGetRandomKey(server
.db
[j
].expires
);
7539 t
= (time_t) dictGetEntryVal(de
);
7540 if (minttl
== -1 || t
< minttl
) {
7541 minkey
= dictGetEntryKey(de
);
7545 deleteKey(server
.db
+j
,minkey
);
7548 if (!freed
) return; /* nothing to free... */
7552 /* ============================== Append Only file ========================== */
7554 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7555 sds buf
= sdsempty();
7561 /* The DB this command was targetting is not the same as the last command
7562 * we appendend. To issue a SELECT command is needed. */
7563 if (dictid
!= server
.appendseldb
) {
7566 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7567 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7568 (unsigned long)strlen(seldb
),seldb
);
7569 server
.appendseldb
= dictid
;
7572 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7573 * EXPIREs into EXPIREATs calls */
7574 if (cmd
->proc
== expireCommand
) {
7577 tmpargv
[0] = createStringObject("EXPIREAT",8);
7578 tmpargv
[1] = argv
[1];
7579 incrRefCount(argv
[1]);
7580 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7581 tmpargv
[2] = createObject(REDIS_STRING
,
7582 sdscatprintf(sdsempty(),"%ld",when
));
7586 /* Append the actual command */
7587 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7588 for (j
= 0; j
< argc
; j
++) {
7591 o
= getDecodedObject(o
);
7592 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7593 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7594 buf
= sdscatlen(buf
,"\r\n",2);
7598 /* Free the objects from the modified argv for EXPIREAT */
7599 if (cmd
->proc
== expireCommand
) {
7600 for (j
= 0; j
< 3; j
++)
7601 decrRefCount(argv
[j
]);
7604 /* We want to perform a single write. This should be guaranteed atomic
7605 * at least if the filesystem we are writing is a real physical one.
7606 * While this will save us against the server being killed I don't think
7607 * there is much to do about the whole server stopping for power problems
7609 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7610 if (nwritten
!= (signed)sdslen(buf
)) {
7611 /* Ooops, we are in troubles. The best thing to do for now is
7612 * to simply exit instead to give the illusion that everything is
7613 * working as expected. */
7614 if (nwritten
== -1) {
7615 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7617 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7621 /* If a background append only file rewriting is in progress we want to
7622 * accumulate the differences between the child DB and the current one
7623 * in a buffer, so that when the child process will do its work we
7624 * can append the differences to the new append only file. */
7625 if (server
.bgrewritechildpid
!= -1)
7626 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7630 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7631 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7632 now
-server
.lastfsync
> 1))
7634 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7635 server
.lastfsync
= now
;
7639 /* In Redis commands are always executed in the context of a client, so in
7640 * order to load the append only file we need to create a fake client. */
7641 static struct redisClient
*createFakeClient(void) {
7642 struct redisClient
*c
= zmalloc(sizeof(*c
));
7646 c
->querybuf
= sdsempty();
7650 /* We set the fake client as a slave waiting for the synchronization
7651 * so that Redis will not try to send replies to this client. */
7652 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7653 c
->reply
= listCreate();
7654 listSetFreeMethod(c
->reply
,decrRefCount
);
7655 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7659 static void freeFakeClient(struct redisClient
*c
) {
7660 sdsfree(c
->querybuf
);
7661 listRelease(c
->reply
);
7665 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7666 * error (the append only file is zero-length) REDIS_ERR is returned. On
7667 * fatal error an error message is logged and the program exists. */
7668 int loadAppendOnlyFile(char *filename
) {
7669 struct redisClient
*fakeClient
;
7670 FILE *fp
= fopen(filename
,"r");
7671 struct redis_stat sb
;
7672 unsigned long long loadedkeys
= 0;
7674 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7678 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7682 fakeClient
= createFakeClient();
7689 struct redisCommand
*cmd
;
7691 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7697 if (buf
[0] != '*') goto fmterr
;
7699 argv
= zmalloc(sizeof(robj
*)*argc
);
7700 for (j
= 0; j
< argc
; j
++) {
7701 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7702 if (buf
[0] != '$') goto fmterr
;
7703 len
= strtol(buf
+1,NULL
,10);
7704 argsds
= sdsnewlen(NULL
,len
);
7705 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7706 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7707 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7710 /* Command lookup */
7711 cmd
= lookupCommand(argv
[0]->ptr
);
7713 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7716 /* Try object sharing and encoding */
7717 if (server
.shareobjects
) {
7719 for(j
= 1; j
< argc
; j
++)
7720 argv
[j
] = tryObjectSharing(argv
[j
]);
7722 if (cmd
->flags
& REDIS_CMD_BULK
)
7723 tryObjectEncoding(argv
[argc
-1]);
7724 /* Run the command in the context of a fake client */
7725 fakeClient
->argc
= argc
;
7726 fakeClient
->argv
= argv
;
7727 cmd
->proc(fakeClient
);
7728 /* Discard the reply objects list from the fake client */
7729 while(listLength(fakeClient
->reply
))
7730 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7731 /* Clean up, ready for the next command */
7732 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7734 /* Handle swapping while loading big datasets when VM is on */
7736 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7737 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7738 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7743 freeFakeClient(fakeClient
);
7748 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7750 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7754 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7758 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7759 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7763 /* Avoid the incr/decr ref count business if possible to help
7764 * copy-on-write (we are often in a child process when this function
7766 * Also makes sure that key objects don't get incrRefCount-ed when VM
7768 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7769 obj
= getDecodedObject(obj
);
7772 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7773 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7774 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7776 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7777 if (decrrc
) decrRefCount(obj
);
7780 if (decrrc
) decrRefCount(obj
);
7784 /* Write binary-safe string into a file in the bulkformat
7785 * $<count>\r\n<payload>\r\n */
7786 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7789 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7790 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7791 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7792 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7796 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7797 static int fwriteBulkDouble(FILE *fp
, double d
) {
7798 char buf
[128], dbuf
[128];
7800 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7801 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7802 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7803 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7807 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7808 static int fwriteBulkLong(FILE *fp
, long l
) {
7809 char buf
[128], lbuf
[128];
7811 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7812 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7813 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7814 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7818 /* Write a sequence of commands able to fully rebuild the dataset into
7819 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7820 static int rewriteAppendOnlyFile(char *filename
) {
7821 dictIterator
*di
= NULL
;
7826 time_t now
= time(NULL
);
7828 /* Note that we have to use a different temp name here compared to the
7829 * one used by rewriteAppendOnlyFileBackground() function. */
7830 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7831 fp
= fopen(tmpfile
,"w");
7833 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7836 for (j
= 0; j
< server
.dbnum
; j
++) {
7837 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7838 redisDb
*db
= server
.db
+j
;
7840 if (dictSize(d
) == 0) continue;
7841 di
= dictGetIterator(d
);
7847 /* SELECT the new DB */
7848 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7849 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7851 /* Iterate this DB writing every entry */
7852 while((de
= dictNext(di
)) != NULL
) {
7857 key
= dictGetEntryKey(de
);
7858 /* If the value for this key is swapped, load a preview in memory.
7859 * We use a "swapped" flag to remember if we need to free the
7860 * value object instead to just increment the ref count anyway
7861 * in order to avoid copy-on-write of pages if we are forked() */
7862 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7863 key
->storage
== REDIS_VM_SWAPPING
) {
7864 o
= dictGetEntryVal(de
);
7867 o
= vmPreviewObject(key
);
7870 expiretime
= getExpire(db
,key
);
7872 /* Save the key and associated value */
7873 if (o
->type
== REDIS_STRING
) {
7874 /* Emit a SET command */
7875 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7876 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7878 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7879 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7880 } else if (o
->type
== REDIS_LIST
) {
7881 /* Emit the RPUSHes needed to rebuild the list */
7882 list
*list
= o
->ptr
;
7886 listRewind(list
,&li
);
7887 while((ln
= listNext(&li
))) {
7888 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7889 robj
*eleobj
= listNodeValue(ln
);
7891 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7892 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7893 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7895 } else if (o
->type
== REDIS_SET
) {
7896 /* Emit the SADDs needed to rebuild the set */
7898 dictIterator
*di
= dictGetIterator(set
);
7901 while((de
= dictNext(di
)) != NULL
) {
7902 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7903 robj
*eleobj
= dictGetEntryKey(de
);
7905 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7906 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7907 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7909 dictReleaseIterator(di
);
7910 } else if (o
->type
== REDIS_ZSET
) {
7911 /* Emit the ZADDs needed to rebuild the sorted set */
7913 dictIterator
*di
= dictGetIterator(zs
->dict
);
7916 while((de
= dictNext(di
)) != NULL
) {
7917 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7918 robj
*eleobj
= dictGetEntryKey(de
);
7919 double *score
= dictGetEntryVal(de
);
7921 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7922 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7923 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7924 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7926 dictReleaseIterator(di
);
7927 } else if (o
->type
== REDIS_HASH
) {
7928 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7930 /* Emit the HSETs needed to rebuild the hash */
7931 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7932 unsigned char *p
= zipmapRewind(o
->ptr
);
7933 unsigned char *field
, *val
;
7934 unsigned int flen
, vlen
;
7936 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7937 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7938 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7939 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7941 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7945 dictIterator
*di
= dictGetIterator(o
->ptr
);
7948 while((de
= dictNext(di
)) != NULL
) {
7949 robj
*field
= dictGetEntryKey(de
);
7950 robj
*val
= dictGetEntryVal(de
);
7952 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7953 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7954 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7955 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7957 dictReleaseIterator(di
);
7962 /* Save the expire time */
7963 if (expiretime
!= -1) {
7964 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7965 /* If this key is already expired skip it */
7966 if (expiretime
< now
) continue;
7967 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7968 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7969 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7971 if (swapped
) decrRefCount(o
);
7973 dictReleaseIterator(di
);
7976 /* Make sure data will not remain on the OS's output buffers */
7981 /* Use RENAME to make sure the DB file is changed atomically only
7982 * if the generate DB file is ok. */
7983 if (rename(tmpfile
,filename
) == -1) {
7984 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
7988 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
7994 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
7995 if (di
) dictReleaseIterator(di
);
7999 /* This is how rewriting of the append only file in background works:
8001 * 1) The user calls BGREWRITEAOF
8002 * 2) Redis calls this function, that forks():
8003 * 2a) the child rewrite the append only file in a temp file.
8004 * 2b) the parent accumulates differences in server.bgrewritebuf.
8005 * 3) When the child finished '2a' exists.
8006 * 4) The parent will trap the exit code, if it's OK, will append the
8007 * data accumulated into server.bgrewritebuf into the temp file, and
8008 * finally will rename(2) the temp file in the actual file name.
8009 * The the new file is reopened as the new append only file. Profit!
8011 static int rewriteAppendOnlyFileBackground(void) {
8014 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8015 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8016 if ((childpid
= fork()) == 0) {
8020 if (server
.vm_enabled
) vmReopenSwapFile();
8022 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8023 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8030 if (childpid
== -1) {
8031 redisLog(REDIS_WARNING
,
8032 "Can't rewrite append only file in background: fork: %s",
8036 redisLog(REDIS_NOTICE
,
8037 "Background append only file rewriting started by pid %d",childpid
);
8038 server
.bgrewritechildpid
= childpid
;
8039 /* We set appendseldb to -1 in order to force the next call to the
8040 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8041 * accumulated by the parent into server.bgrewritebuf will start
8042 * with a SELECT statement and it will be safe to merge. */
8043 server
.appendseldb
= -1;
8046 return REDIS_OK
; /* unreached */
8049 static void bgrewriteaofCommand(redisClient
*c
) {
8050 if (server
.bgrewritechildpid
!= -1) {
8051 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8054 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8055 char *status
= "+Background append only file rewriting started\r\n";
8056 addReplySds(c
,sdsnew(status
));
8058 addReply(c
,shared
.err
);
8062 static void aofRemoveTempFile(pid_t childpid
) {
8065 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8069 /* Virtual Memory is composed mainly of two subsystems:
8070 * - Blocking Virutal Memory
8071 * - Threaded Virtual Memory I/O
8072 * The two parts are not fully decoupled, but functions are split among two
8073 * different sections of the source code (delimited by comments) in order to
8074 * make more clear what functionality is about the blocking VM and what about
8075 * the threaded (not blocking) VM.
8079 * Redis VM is a blocking VM (one that blocks reading swapped values from
8080 * disk into memory when a value swapped out is needed in memory) that is made
8081 * unblocking by trying to examine the command argument vector in order to
8082 * load in background values that will likely be needed in order to exec
8083 * the command. The command is executed only once all the relevant keys
8084 * are loaded into memory.
8086 * This basically is almost as simple of a blocking VM, but almost as parallel
8087 * as a fully non-blocking VM.
8090 /* =================== Virtual Memory - Blocking Side ====================== */
8092 /* substitute the first occurrence of '%p' with the process pid in the
8093 * swap file name. */
8094 static void expandVmSwapFilename(void) {
8095 char *p
= strstr(server
.vm_swap_file
,"%p");
8101 new = sdscat(new,server
.vm_swap_file
);
8102 new = sdscatprintf(new,"%ld",(long) getpid());
8103 new = sdscat(new,p
+2);
8104 zfree(server
.vm_swap_file
);
8105 server
.vm_swap_file
= new;
8108 static void vmInit(void) {
8113 if (server
.vm_max_threads
!= 0)
8114 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8116 expandVmSwapFilename();
8117 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8118 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8119 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8121 if (server
.vm_fp
== NULL
) {
8122 redisLog(REDIS_WARNING
,
8123 "Impossible to open the swap file: %s. Exiting.",
8127 server
.vm_fd
= fileno(server
.vm_fp
);
8128 server
.vm_next_page
= 0;
8129 server
.vm_near_pages
= 0;
8130 server
.vm_stats_used_pages
= 0;
8131 server
.vm_stats_swapped_objects
= 0;
8132 server
.vm_stats_swapouts
= 0;
8133 server
.vm_stats_swapins
= 0;
8134 totsize
= server
.vm_pages
*server
.vm_page_size
;
8135 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8136 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8137 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8141 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8143 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8144 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8145 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8146 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8148 /* Initialize threaded I/O (used by Virtual Memory) */
8149 server
.io_newjobs
= listCreate();
8150 server
.io_processing
= listCreate();
8151 server
.io_processed
= listCreate();
8152 server
.io_ready_clients
= listCreate();
8153 pthread_mutex_init(&server
.io_mutex
,NULL
);
8154 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8155 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8156 server
.io_active_threads
= 0;
8157 if (pipe(pipefds
) == -1) {
8158 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8162 server
.io_ready_pipe_read
= pipefds
[0];
8163 server
.io_ready_pipe_write
= pipefds
[1];
8164 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8165 /* LZF requires a lot of stack */
8166 pthread_attr_init(&server
.io_threads_attr
);
8167 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8168 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8169 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8170 /* Listen for events in the threaded I/O pipe */
8171 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8172 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8173 oom("creating file event");
8176 /* Mark the page as used */
8177 static void vmMarkPageUsed(off_t page
) {
8178 off_t byte
= page
/8;
8180 redisAssert(vmFreePage(page
) == 1);
8181 server
.vm_bitmap
[byte
] |= 1<<bit
;
8184 /* Mark N contiguous pages as used, with 'page' being the first. */
8185 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8188 for (j
= 0; j
< count
; j
++)
8189 vmMarkPageUsed(page
+j
);
8190 server
.vm_stats_used_pages
+= count
;
8191 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8192 (long long)count
, (long long)page
);
8195 /* Mark the page as free */
8196 static void vmMarkPageFree(off_t page
) {
8197 off_t byte
= page
/8;
8199 redisAssert(vmFreePage(page
) == 0);
8200 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8203 /* Mark N contiguous pages as free, with 'page' being the first. */
8204 static void vmMarkPagesFree(off_t page
, off_t count
) {
8207 for (j
= 0; j
< count
; j
++)
8208 vmMarkPageFree(page
+j
);
8209 server
.vm_stats_used_pages
-= count
;
8210 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8211 (long long)count
, (long long)page
);
8214 /* Test if the page is free */
8215 static int vmFreePage(off_t page
) {
8216 off_t byte
= page
/8;
8218 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8221 /* Find N contiguous free pages storing the first page of the cluster in *first.
8222 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8223 * REDIS_ERR is returned.
8225 * This function uses a simple algorithm: we try to allocate
8226 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8227 * again from the start of the swap file searching for free spaces.
8229 * If it looks pretty clear that there are no free pages near our offset
8230 * we try to find less populated places doing a forward jump of
8231 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8232 * without hurry, and then we jump again and so forth...
8234 * This function can be improved using a free list to avoid to guess
8235 * too much, since we could collect data about freed pages.
8237 * note: I implemented this function just after watching an episode of
8238 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8240 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8241 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8243 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8244 server
.vm_near_pages
= 0;
8245 server
.vm_next_page
= 0;
8247 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8248 base
= server
.vm_next_page
;
8250 while(offset
< server
.vm_pages
) {
8251 off_t
this = base
+offset
;
8253 /* If we overflow, restart from page zero */
8254 if (this >= server
.vm_pages
) {
8255 this -= server
.vm_pages
;
8257 /* Just overflowed, what we found on tail is no longer
8258 * interesting, as it's no longer contiguous. */
8262 if (vmFreePage(this)) {
8263 /* This is a free page */
8265 /* Already got N free pages? Return to the caller, with success */
8267 *first
= this-(n
-1);
8268 server
.vm_next_page
= this+1;
8269 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8273 /* The current one is not a free page */
8277 /* Fast-forward if the current page is not free and we already
8278 * searched enough near this place. */
8280 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8281 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8283 /* Note that even if we rewind after the jump, we are don't need
8284 * to make sure numfree is set to zero as we only jump *if* it
8285 * is set to zero. */
8287 /* Otherwise just check the next page */
8294 /* Write the specified object at the specified page of the swap file */
8295 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8296 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8297 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8298 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8299 redisLog(REDIS_WARNING
,
8300 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8304 rdbSaveObject(server
.vm_fp
,o
);
8305 fflush(server
.vm_fp
);
8306 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8310 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8311 * needed to later retrieve the object into the key object.
8312 * If we can't find enough contiguous empty pages to swap the object on disk
8313 * REDIS_ERR is returned. */
8314 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8315 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8318 assert(key
->storage
== REDIS_VM_MEMORY
);
8319 assert(key
->refcount
== 1);
8320 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8321 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8322 key
->vm
.page
= page
;
8323 key
->vm
.usedpages
= pages
;
8324 key
->storage
= REDIS_VM_SWAPPED
;
8325 key
->vtype
= val
->type
;
8326 decrRefCount(val
); /* Deallocate the object from memory. */
8327 vmMarkPagesUsed(page
,pages
);
8328 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8329 (unsigned char*) key
->ptr
,
8330 (unsigned long long) page
, (unsigned long long) pages
);
8331 server
.vm_stats_swapped_objects
++;
8332 server
.vm_stats_swapouts
++;
8336 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8339 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8340 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8341 redisLog(REDIS_WARNING
,
8342 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8346 o
= rdbLoadObject(type
,server
.vm_fp
);
8348 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8351 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8355 /* Load the value object relative to the 'key' object from swap to memory.
8356 * The newly allocated object is returned.
8358 * If preview is true the unserialized object is returned to the caller but
8359 * no changes are made to the key object, nor the pages are marked as freed */
8360 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8363 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8364 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8366 key
->storage
= REDIS_VM_MEMORY
;
8367 key
->vm
.atime
= server
.unixtime
;
8368 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8369 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8370 (unsigned char*) key
->ptr
);
8371 server
.vm_stats_swapped_objects
--;
8373 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8374 (unsigned char*) key
->ptr
);
8376 server
.vm_stats_swapins
++;
8380 /* Plain object loading, from swap to memory */
8381 static robj
*vmLoadObject(robj
*key
) {
8382 /* If we are loading the object in background, stop it, we
8383 * need to load this object synchronously ASAP. */
8384 if (key
->storage
== REDIS_VM_LOADING
)
8385 vmCancelThreadedIOJob(key
);
8386 return vmGenericLoadObject(key
,0);
8389 /* Just load the value on disk, without to modify the key.
8390 * This is useful when we want to perform some operation on the value
8391 * without to really bring it from swap to memory, like while saving the
8392 * dataset or rewriting the append only log. */
8393 static robj
*vmPreviewObject(robj
*key
) {
8394 return vmGenericLoadObject(key
,1);
8397 /* How a good candidate is this object for swapping?
8398 * The better candidate it is, the greater the returned value.
8400 * Currently we try to perform a fast estimation of the object size in
8401 * memory, and combine it with aging informations.
8403 * Basically swappability = idle-time * log(estimated size)
8405 * Bigger objects are preferred over smaller objects, but not
8406 * proportionally, this is why we use the logarithm. This algorithm is
8407 * just a first try and will probably be tuned later. */
8408 static double computeObjectSwappability(robj
*o
) {
8409 time_t age
= server
.unixtime
- o
->vm
.atime
;
8413 struct dictEntry
*de
;
8416 if (age
<= 0) return 0;
8419 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8422 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8427 listNode
*ln
= listFirst(l
);
8429 asize
= sizeof(list
);
8431 robj
*ele
= ln
->value
;
8434 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8435 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8437 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8442 z
= (o
->type
== REDIS_ZSET
);
8443 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8445 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8446 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8451 de
= dictGetRandomKey(d
);
8452 ele
= dictGetEntryKey(de
);
8453 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8454 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8456 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8457 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8461 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8462 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8463 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8464 unsigned int klen
, vlen
;
8465 unsigned char *key
, *val
;
8467 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8471 asize
= len
*(klen
+vlen
+3);
8472 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8474 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8479 de
= dictGetRandomKey(d
);
8480 ele
= dictGetEntryKey(de
);
8481 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8482 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8484 ele
= dictGetEntryVal(de
);
8485 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8486 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8488 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8493 return (double)age
*log(1+asize
);
8496 /* Try to swap an object that's a good candidate for swapping.
8497 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8498 * to swap any object at all.
8500 * If 'usethreaded' is true, Redis will try to swap the object in background
8501 * using I/O threads. */
8502 static int vmSwapOneObject(int usethreads
) {
8504 struct dictEntry
*best
= NULL
;
8505 double best_swappability
= 0;
8506 redisDb
*best_db
= NULL
;
8509 for (j
= 0; j
< server
.dbnum
; j
++) {
8510 redisDb
*db
= server
.db
+j
;
8511 /* Why maxtries is set to 100?
8512 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8513 * are swappable objects */
8516 if (dictSize(db
->dict
) == 0) continue;
8517 for (i
= 0; i
< 5; i
++) {
8519 double swappability
;
8521 if (maxtries
) maxtries
--;
8522 de
= dictGetRandomKey(db
->dict
);
8523 key
= dictGetEntryKey(de
);
8524 val
= dictGetEntryVal(de
);
8525 /* Only swap objects that are currently in memory.
8527 * Also don't swap shared objects if threaded VM is on, as we
8528 * try to ensure that the main thread does not touch the
8529 * object while the I/O thread is using it, but we can't
8530 * control other keys without adding additional mutex. */
8531 if (key
->storage
!= REDIS_VM_MEMORY
||
8532 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8533 if (maxtries
) i
--; /* don't count this try */
8536 swappability
= computeObjectSwappability(val
);
8537 if (!best
|| swappability
> best_swappability
) {
8539 best_swappability
= swappability
;
8544 if (best
== NULL
) return REDIS_ERR
;
8545 key
= dictGetEntryKey(best
);
8546 val
= dictGetEntryVal(best
);
8548 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8549 key
->ptr
, best_swappability
);
8551 /* Unshare the key if needed */
8552 if (key
->refcount
> 1) {
8553 robj
*newkey
= dupStringObject(key
);
8555 key
= dictGetEntryKey(best
) = newkey
;
8559 vmSwapObjectThreaded(key
,val
,best_db
);
8562 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8563 dictGetEntryVal(best
) = NULL
;
8571 static int vmSwapOneObjectBlocking() {
8572 return vmSwapOneObject(0);
8575 static int vmSwapOneObjectThreaded() {
8576 return vmSwapOneObject(1);
8579 /* Return true if it's safe to swap out objects in a given moment.
8580 * Basically we don't want to swap objects out while there is a BGSAVE
8581 * or a BGAEOREWRITE running in backgroud. */
8582 static int vmCanSwapOut(void) {
8583 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8586 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8587 * and was deleted. Otherwise 0 is returned. */
8588 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8592 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8593 foundkey
= dictGetEntryKey(de
);
8594 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8599 /* =================== Virtual Memory - Threaded I/O ======================= */
8601 static void freeIOJob(iojob
*j
) {
8602 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8603 j
->type
== REDIS_IOJOB_DO_SWAP
||
8604 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8605 decrRefCount(j
->val
);
8606 decrRefCount(j
->key
);
8610 /* Every time a thread finished a Job, it writes a byte into the write side
8611 * of an unix pipe in order to "awake" the main thread, and this function
8613 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8617 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8619 REDIS_NOTUSED(mask
);
8620 REDIS_NOTUSED(privdata
);
8622 /* For every byte we read in the read side of the pipe, there is one
8623 * I/O job completed to process. */
8624 while((retval
= read(fd
,buf
,1)) == 1) {
8628 struct dictEntry
*de
;
8630 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8632 /* Get the processed element (the oldest one) */
8634 assert(listLength(server
.io_processed
) != 0);
8635 if (toprocess
== -1) {
8636 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8637 if (toprocess
<= 0) toprocess
= 1;
8639 ln
= listFirst(server
.io_processed
);
8641 listDelNode(server
.io_processed
,ln
);
8643 /* If this job is marked as canceled, just ignore it */
8648 /* Post process it in the main thread, as there are things we
8649 * can do just here to avoid race conditions and/or invasive locks */
8650 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8651 de
= dictFind(j
->db
->dict
,j
->key
);
8653 key
= dictGetEntryKey(de
);
8654 if (j
->type
== REDIS_IOJOB_LOAD
) {
8657 /* Key loaded, bring it at home */
8658 key
->storage
= REDIS_VM_MEMORY
;
8659 key
->vm
.atime
= server
.unixtime
;
8660 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8661 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8662 (unsigned char*) key
->ptr
);
8663 server
.vm_stats_swapped_objects
--;
8664 server
.vm_stats_swapins
++;
8665 dictGetEntryVal(de
) = j
->val
;
8666 incrRefCount(j
->val
);
8669 /* Handle clients waiting for this key to be loaded. */
8670 handleClientsBlockedOnSwappedKey(db
,key
);
8671 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8672 /* Now we know the amount of pages required to swap this object.
8673 * Let's find some space for it, and queue this task again
8674 * rebranded as REDIS_IOJOB_DO_SWAP. */
8675 if (!vmCanSwapOut() ||
8676 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8678 /* Ooops... no space or we can't swap as there is
8679 * a fork()ed Redis trying to save stuff on disk. */
8681 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8683 /* Note that we need to mark this pages as used now,
8684 * if the job will be canceled, we'll mark them as freed
8686 vmMarkPagesUsed(j
->page
,j
->pages
);
8687 j
->type
= REDIS_IOJOB_DO_SWAP
;
8692 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8695 /* Key swapped. We can finally free some memory. */
8696 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8697 printf("key->storage: %d\n",key
->storage
);
8698 printf("key->name: %s\n",(char*)key
->ptr
);
8699 printf("key->refcount: %d\n",key
->refcount
);
8700 printf("val: %p\n",(void*)j
->val
);
8701 printf("val->type: %d\n",j
->val
->type
);
8702 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8704 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8705 val
= dictGetEntryVal(de
);
8706 key
->vm
.page
= j
->page
;
8707 key
->vm
.usedpages
= j
->pages
;
8708 key
->storage
= REDIS_VM_SWAPPED
;
8709 key
->vtype
= j
->val
->type
;
8710 decrRefCount(val
); /* Deallocate the object from memory. */
8711 dictGetEntryVal(de
) = NULL
;
8712 redisLog(REDIS_DEBUG
,
8713 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8714 (unsigned char*) key
->ptr
,
8715 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8716 server
.vm_stats_swapped_objects
++;
8717 server
.vm_stats_swapouts
++;
8719 /* Put a few more swap requests in queue if we are still
8721 if (trytoswap
&& vmCanSwapOut() &&
8722 zmalloc_used_memory() > server
.vm_max_memory
)
8727 more
= listLength(server
.io_newjobs
) <
8728 (unsigned) server
.vm_max_threads
;
8730 /* Don't waste CPU time if swappable objects are rare. */
8731 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8739 if (processed
== toprocess
) return;
8741 if (retval
< 0 && errno
!= EAGAIN
) {
8742 redisLog(REDIS_WARNING
,
8743 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8748 static void lockThreadedIO(void) {
8749 pthread_mutex_lock(&server
.io_mutex
);
8752 static void unlockThreadedIO(void) {
8753 pthread_mutex_unlock(&server
.io_mutex
);
8756 /* Remove the specified object from the threaded I/O queue if still not
8757 * processed, otherwise make sure to flag it as canceled. */
8758 static void vmCancelThreadedIOJob(robj
*o
) {
8760 server
.io_newjobs
, /* 0 */
8761 server
.io_processing
, /* 1 */
8762 server
.io_processed
/* 2 */
8766 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8769 /* Search for a matching key in one of the queues */
8770 for (i
= 0; i
< 3; i
++) {
8774 listRewind(lists
[i
],&li
);
8775 while ((ln
= listNext(&li
)) != NULL
) {
8776 iojob
*job
= ln
->value
;
8778 if (job
->canceled
) continue; /* Skip this, already canceled. */
8779 if (compareStringObjects(job
->key
,o
) == 0) {
8780 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8781 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8782 /* Mark the pages as free since the swap didn't happened
8783 * or happened but is now discarded. */
8784 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8785 vmMarkPagesFree(job
->page
,job
->pages
);
8786 /* Cancel the job. It depends on the list the job is
8789 case 0: /* io_newjobs */
8790 /* If the job was yet not processed the best thing to do
8791 * is to remove it from the queue at all */
8793 listDelNode(lists
[i
],ln
);
8795 case 1: /* io_processing */
8796 /* Oh Shi- the thread is messing with the Job:
8798 * Probably it's accessing the object if this is a
8799 * PREPARE_SWAP or DO_SWAP job.
8800 * If it's a LOAD job it may be reading from disk and
8801 * if we don't wait for the job to terminate before to
8802 * cancel it, maybe in a few microseconds data can be
8803 * corrupted in this pages. So the short story is:
8805 * Better to wait for the job to move into the
8806 * next queue (processed)... */
8808 /* We try again and again until the job is completed. */
8810 /* But let's wait some time for the I/O thread
8811 * to finish with this job. After all this condition
8812 * should be very rare. */
8815 case 2: /* io_processed */
8816 /* The job was already processed, that's easy...
8817 * just mark it as canceled so that we'll ignore it
8818 * when processing completed jobs. */
8822 /* Finally we have to adjust the storage type of the object
8823 * in order to "UNDO" the operaiton. */
8824 if (o
->storage
== REDIS_VM_LOADING
)
8825 o
->storage
= REDIS_VM_SWAPPED
;
8826 else if (o
->storage
== REDIS_VM_SWAPPING
)
8827 o
->storage
= REDIS_VM_MEMORY
;
8834 assert(1 != 1); /* We should never reach this */
8837 static void *IOThreadEntryPoint(void *arg
) {
8842 pthread_detach(pthread_self());
8844 /* Get a new job to process */
8846 if (listLength(server
.io_newjobs
) == 0) {
8847 /* No new jobs in queue, exit. */
8848 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8849 (long) pthread_self());
8850 server
.io_active_threads
--;
8854 ln
= listFirst(server
.io_newjobs
);
8856 listDelNode(server
.io_newjobs
,ln
);
8857 /* Add the job in the processing queue */
8858 j
->thread
= pthread_self();
8859 listAddNodeTail(server
.io_processing
,j
);
8860 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8862 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8863 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8865 /* Process the Job */
8866 if (j
->type
== REDIS_IOJOB_LOAD
) {
8867 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8868 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8869 FILE *fp
= fopen("/dev/null","w+");
8870 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8872 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8873 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8877 /* Done: insert the job into the processed queue */
8878 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8879 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8881 listDelNode(server
.io_processing
,ln
);
8882 listAddNodeTail(server
.io_processed
,j
);
8885 /* Signal the main thread there is new stuff to process */
8886 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8888 return NULL
; /* never reached */
8891 static void spawnIOThread(void) {
8893 sigset_t mask
, omask
;
8897 sigaddset(&mask
,SIGCHLD
);
8898 sigaddset(&mask
,SIGHUP
);
8899 sigaddset(&mask
,SIGPIPE
);
8900 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8901 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8902 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8906 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8907 server
.io_active_threads
++;
8910 /* We need to wait for the last thread to exit before we are able to
8911 * fork() in order to BGSAVE or BGREWRITEAOF. */
8912 static void waitEmptyIOJobsQueue(void) {
8914 int io_processed_len
;
8917 if (listLength(server
.io_newjobs
) == 0 &&
8918 listLength(server
.io_processing
) == 0 &&
8919 server
.io_active_threads
== 0)
8924 /* While waiting for empty jobs queue condition we post-process some
8925 * finshed job, as I/O threads may be hanging trying to write against
8926 * the io_ready_pipe_write FD but there are so much pending jobs that
8928 io_processed_len
= listLength(server
.io_processed
);
8930 if (io_processed_len
) {
8931 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8932 usleep(1000); /* 1 millisecond */
8934 usleep(10000); /* 10 milliseconds */
8939 static void vmReopenSwapFile(void) {
8940 /* Note: we don't close the old one as we are in the child process
8941 * and don't want to mess at all with the original file object. */
8942 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8943 if (server
.vm_fp
== NULL
) {
8944 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8945 server
.vm_swap_file
);
8948 server
.vm_fd
= fileno(server
.vm_fp
);
8951 /* This function must be called while with threaded IO locked */
8952 static void queueIOJob(iojob
*j
) {
8953 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8954 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8955 listAddNodeTail(server
.io_newjobs
,j
);
8956 if (server
.io_active_threads
< server
.vm_max_threads
)
8960 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8963 assert(key
->storage
== REDIS_VM_MEMORY
);
8964 assert(key
->refcount
== 1);
8966 j
= zmalloc(sizeof(*j
));
8967 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8969 j
->key
= dupStringObject(key
);
8973 j
->thread
= (pthread_t
) -1;
8974 key
->storage
= REDIS_VM_SWAPPING
;
8982 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
8984 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
8985 * If there is not already a job loading the key, it is craeted.
8986 * The key is added to the io_keys list in the client structure, and also
8987 * in the hash table mapping swapped keys to waiting clients, that is,
8988 * server.io_waited_keys. */
8989 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
8990 struct dictEntry
*de
;
8994 /* If the key does not exist or is already in RAM we don't need to
8995 * block the client at all. */
8996 de
= dictFind(c
->db
->dict
,key
);
8997 if (de
== NULL
) return 0;
8998 o
= dictGetEntryKey(de
);
8999 if (o
->storage
== REDIS_VM_MEMORY
) {
9001 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9002 /* We were swapping the key, undo it! */
9003 vmCancelThreadedIOJob(o
);
9007 /* OK: the key is either swapped, or being loaded just now. */
9009 /* Add the key to the list of keys this client is waiting for.
9010 * This maps clients to keys they are waiting for. */
9011 listAddNodeTail(c
->io_keys
,key
);
9014 /* Add the client to the swapped keys => clients waiting map. */
9015 de
= dictFind(c
->db
->io_keys
,key
);
9019 /* For every key we take a list of clients blocked for it */
9021 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9023 assert(retval
== DICT_OK
);
9025 l
= dictGetEntryVal(de
);
9027 listAddNodeTail(l
,c
);
9029 /* Are we already loading the key from disk? If not create a job */
9030 if (o
->storage
== REDIS_VM_SWAPPED
) {
9033 o
->storage
= REDIS_VM_LOADING
;
9034 j
= zmalloc(sizeof(*j
));
9035 j
->type
= REDIS_IOJOB_LOAD
;
9037 j
->key
= dupStringObject(key
);
9038 j
->key
->vtype
= o
->vtype
;
9039 j
->page
= o
->vm
.page
;
9042 j
->thread
= (pthread_t
) -1;
9050 /* Preload keys needed for the ZUNION and ZINTER commands. */
9051 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9053 num
= atoi(c
->argv
[2]->ptr
);
9054 for (i
= 0; i
< num
; i
++) {
9055 waitForSwappedKey(c
,c
->argv
[3+i
]);
9059 /* Is this client attempting to run a command against swapped keys?
9060 * If so, block it ASAP, load the keys in background, then resume it.
9062 * The important idea about this function is that it can fail! If keys will
9063 * still be swapped when the client is resumed, this key lookups will
9064 * just block loading keys from disk. In practical terms this should only
9065 * happen with SORT BY command or if there is a bug in this function.
9067 * Return 1 if the client is marked as blocked, 0 if the client can
9068 * continue as the keys it is going to access appear to be in memory. */
9069 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9072 if (cmd
->vm_preload_proc
!= NULL
) {
9073 cmd
->vm_preload_proc(c
);
9075 if (cmd
->vm_firstkey
== 0) return 0;
9076 last
= cmd
->vm_lastkey
;
9077 if (last
< 0) last
= c
->argc
+last
;
9078 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9079 waitForSwappedKey(c
,c
->argv
[j
]);
9082 /* If the client was blocked for at least one key, mark it as blocked. */
9083 if (listLength(c
->io_keys
)) {
9084 c
->flags
|= REDIS_IO_WAIT
;
9085 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9086 server
.vm_blocked_clients
++;
9093 /* Remove the 'key' from the list of blocked keys for a given client.
9095 * The function returns 1 when there are no longer blocking keys after
9096 * the current one was removed (and the client can be unblocked). */
9097 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9101 struct dictEntry
*de
;
9103 /* Remove the key from the list of keys this client is waiting for. */
9104 listRewind(c
->io_keys
,&li
);
9105 while ((ln
= listNext(&li
)) != NULL
) {
9106 if (compareStringObjects(ln
->value
,key
) == 0) {
9107 listDelNode(c
->io_keys
,ln
);
9113 /* Remove the client form the key => waiting clients map. */
9114 de
= dictFind(c
->db
->io_keys
,key
);
9116 l
= dictGetEntryVal(de
);
9117 ln
= listSearchKey(l
,c
);
9120 if (listLength(l
) == 0)
9121 dictDelete(c
->db
->io_keys
,key
);
9123 return listLength(c
->io_keys
) == 0;
9126 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9127 struct dictEntry
*de
;
9132 de
= dictFind(db
->io_keys
,key
);
9135 l
= dictGetEntryVal(de
);
9136 len
= listLength(l
);
9137 /* Note: we can't use something like while(listLength(l)) as the list
9138 * can be freed by the calling function when we remove the last element. */
9141 redisClient
*c
= ln
->value
;
9143 if (dontWaitForSwappedKey(c
,key
)) {
9144 /* Put the client in the list of clients ready to go as we
9145 * loaded all the keys about it. */
9146 listAddNodeTail(server
.io_ready_clients
,c
);
9151 /* =========================== Remote Configuration ========================= */
9153 static void configSetCommand(redisClient
*c
) {
9154 robj
*o
= getDecodedObject(c
->argv
[3]);
9155 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9156 zfree(server
.dbfilename
);
9157 server
.dbfilename
= zstrdup(o
->ptr
);
9158 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9159 zfree(server
.requirepass
);
9160 server
.requirepass
= zstrdup(o
->ptr
);
9161 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9162 zfree(server
.masterauth
);
9163 server
.masterauth
= zstrdup(o
->ptr
);
9164 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9165 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9167 addReplySds(c
,sdscatprintf(sdsempty(),
9168 "-ERR not supported CONFIG parameter %s\r\n",
9169 (char*)c
->argv
[2]->ptr
));
9174 addReply(c
,shared
.ok
);
9177 static void configGetCommand(redisClient
*c
) {
9178 robj
*o
= getDecodedObject(c
->argv
[2]);
9179 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9180 char *pattern
= o
->ptr
;
9184 decrRefCount(lenobj
);
9186 if (stringmatch(pattern
,"dbfilename",0)) {
9187 addReplyBulkCString(c
,"dbfilename");
9188 addReplyBulkCString(c
,server
.dbfilename
);
9191 if (stringmatch(pattern
,"requirepass",0)) {
9192 addReplyBulkCString(c
,"requirepass");
9193 addReplyBulkCString(c
,server
.requirepass
);
9196 if (stringmatch(pattern
,"masterauth",0)) {
9197 addReplyBulkCString(c
,"masterauth");
9198 addReplyBulkCString(c
,server
.masterauth
);
9201 if (stringmatch(pattern
,"maxmemory",0)) {
9204 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9205 addReplyBulkCString(c
,"maxmemory");
9206 addReplyBulkCString(c
,buf
);
9210 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9213 static void configCommand(redisClient
*c
) {
9214 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9215 if (c
->argc
!= 4) goto badarity
;
9216 configSetCommand(c
);
9217 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9218 if (c
->argc
!= 3) goto badarity
;
9219 configGetCommand(c
);
9220 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9221 if (c
->argc
!= 2) goto badarity
;
9222 server
.stat_numcommands
= 0;
9223 server
.stat_numconnections
= 0;
9224 server
.stat_expiredkeys
= 0;
9225 server
.stat_starttime
= time(NULL
);
9226 addReply(c
,shared
.ok
);
9228 addReplySds(c
,sdscatprintf(sdsempty(),
9229 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9234 addReplySds(c
,sdscatprintf(sdsempty(),
9235 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9236 (char*) c
->argv
[1]->ptr
));
9239 /* ================================= Debugging ============================== */
9241 static void debugCommand(redisClient
*c
) {
9242 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9244 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9245 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9246 addReply(c
,shared
.err
);
9250 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9251 addReply(c
,shared
.err
);
9254 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9255 addReply(c
,shared
.ok
);
9256 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9258 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9259 addReply(c
,shared
.err
);
9262 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9263 addReply(c
,shared
.ok
);
9264 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9265 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9269 addReply(c
,shared
.nokeyerr
);
9272 key
= dictGetEntryKey(de
);
9273 val
= dictGetEntryVal(de
);
9274 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9275 key
->storage
== REDIS_VM_SWAPPING
)) {
9279 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9280 strenc
= strencoding
[val
->encoding
];
9282 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9285 addReplySds(c
,sdscatprintf(sdsempty(),
9286 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9287 "encoding:%s serializedlength:%lld\r\n",
9288 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9289 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9291 addReplySds(c
,sdscatprintf(sdsempty(),
9292 "+Key at:%p refcount:%d, value swapped at: page %llu "
9293 "using %llu pages\r\n",
9294 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9295 (unsigned long long) key
->vm
.usedpages
));
9297 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9298 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9301 if (!server
.vm_enabled
) {
9302 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9306 addReply(c
,shared
.nokeyerr
);
9309 key
= dictGetEntryKey(de
);
9310 val
= dictGetEntryVal(de
);
9311 /* If the key is shared we want to create a copy */
9312 if (key
->refcount
> 1) {
9313 robj
*newkey
= dupStringObject(key
);
9315 key
= dictGetEntryKey(de
) = newkey
;
9318 if (key
->storage
!= REDIS_VM_MEMORY
) {
9319 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9320 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9321 dictGetEntryVal(de
) = NULL
;
9322 addReply(c
,shared
.ok
);
9324 addReply(c
,shared
.err
);
9327 addReplySds(c
,sdsnew(
9328 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9332 static void _redisAssert(char *estr
, char *file
, int line
) {
9333 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9334 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9335 #ifdef HAVE_BACKTRACE
9336 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9341 /* =================================== Main! ================================ */
9344 int linuxOvercommitMemoryValue(void) {
9345 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9349 if (fgets(buf
,64,fp
) == NULL
) {
9358 void linuxOvercommitMemoryWarning(void) {
9359 if (linuxOvercommitMemoryValue() == 0) {
9360 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9363 #endif /* __linux__ */
9365 static void daemonize(void) {
9369 if (fork() != 0) exit(0); /* parent exits */
9370 setsid(); /* create a new session */
9372 /* Every output goes to /dev/null. If Redis is daemonized but
9373 * the 'logfile' is set to 'stdout' in the configuration file
9374 * it will not log at all. */
9375 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9376 dup2(fd
, STDIN_FILENO
);
9377 dup2(fd
, STDOUT_FILENO
);
9378 dup2(fd
, STDERR_FILENO
);
9379 if (fd
> STDERR_FILENO
) close(fd
);
9381 /* Try to write the pid file */
9382 fp
= fopen(server
.pidfile
,"w");
9384 fprintf(fp
,"%d\n",getpid());
9389 static void version() {
9390 printf("Redis server version %s\n", REDIS_VERSION
);
9394 static void usage() {
9395 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9396 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9400 int main(int argc
, char **argv
) {
9405 if (strcmp(argv
[1], "-v") == 0 ||
9406 strcmp(argv
[1], "--version") == 0) version();
9407 if (strcmp(argv
[1], "--help") == 0) usage();
9408 resetServerSaveParams();
9409 loadServerConfig(argv
[1]);
9410 } else if ((argc
> 2)) {
9413 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9415 if (server
.daemonize
) daemonize();
9417 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9419 linuxOvercommitMemoryWarning();
9422 if (server
.appendonly
) {
9423 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9424 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9426 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9427 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9429 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9430 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9432 aeDeleteEventLoop(server
.el
);
9436 /* ============================= Backtrace support ========================= */
9438 #ifdef HAVE_BACKTRACE
9439 static char *findFuncName(void *pointer
, unsigned long *offset
);
9441 static void *getMcontextEip(ucontext_t
*uc
) {
9442 #if defined(__FreeBSD__)
9443 return (void*) uc
->uc_mcontext
.mc_eip
;
9444 #elif defined(__dietlibc__)
9445 return (void*) uc
->uc_mcontext
.eip
;
9446 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9448 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9450 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9452 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9453 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9454 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9456 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9458 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9459 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9460 #elif defined(__ia64__) /* Linux IA64 */
9461 return (void*) uc
->uc_mcontext
.sc_ip
;
9467 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9469 char **messages
= NULL
;
9470 int i
, trace_size
= 0;
9471 unsigned long offset
=0;
9472 ucontext_t
*uc
= (ucontext_t
*) secret
;
9474 REDIS_NOTUSED(info
);
9476 redisLog(REDIS_WARNING
,
9477 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9478 infostring
= genRedisInfoString();
9479 redisLog(REDIS_WARNING
, "%s",infostring
);
9480 /* It's not safe to sdsfree() the returned string under memory
9481 * corruption conditions. Let it leak as we are going to abort */
9483 trace_size
= backtrace(trace
, 100);
9484 /* overwrite sigaction with caller's address */
9485 if (getMcontextEip(uc
) != NULL
) {
9486 trace
[1] = getMcontextEip(uc
);
9488 messages
= backtrace_symbols(trace
, trace_size
);
9490 for (i
=1; i
<trace_size
; ++i
) {
9491 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9493 p
= strchr(messages
[i
],'+');
9494 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9495 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9497 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9500 /* free(messages); Don't call free() with possibly corrupted memory. */
9504 static void setupSigSegvAction(void) {
9505 struct sigaction act
;
9507 sigemptyset (&act
.sa_mask
);
9508 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9509 * is used. Otherwise, sa_handler is used */
9510 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9511 act
.sa_sigaction
= segvHandler
;
9512 sigaction (SIGSEGV
, &act
, NULL
);
9513 sigaction (SIGBUS
, &act
, NULL
);
9514 sigaction (SIGFPE
, &act
, NULL
);
9515 sigaction (SIGILL
, &act
, NULL
);
9516 sigaction (SIGBUS
, &act
, NULL
);
9520 #include "staticsymbols.h"
9521 /* This function try to convert a pointer into a function name. It's used in
9522 * oreder to provide a backtrace under segmentation fault that's able to
9523 * display functions declared as static (otherwise the backtrace is useless). */
9524 static char *findFuncName(void *pointer
, unsigned long *offset
){
9526 unsigned long off
, minoff
= 0;
9528 /* Try to match against the Symbol with the smallest offset */
9529 for (i
=0; symsTable
[i
].pointer
; i
++) {
9530 unsigned long lp
= (unsigned long) pointer
;
9532 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9533 off
=lp
-symsTable
[i
].pointer
;
9534 if (ret
< 0 || off
< minoff
) {
9540 if (ret
== -1) return NULL
;
9542 return symsTable
[ret
].name
;
9544 #else /* HAVE_BACKTRACE */
9545 static void setupSigSegvAction(void) {
9547 #endif /* HAVE_BACKTRACE */