2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
330 dict
*pubsub_classes
; /* Classes a client is interested in (SUBSCRIBE) */
338 /* Global server state structure */
343 dict
*sharingpool
; /* Poll used for object sharing */
344 unsigned int sharingpoolsize
;
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_classes
; /* Associate classes to list of subscribed clients */
445 typedef void redisCommandProc(redisClient
*c
);
446 struct redisCommand
{
448 redisCommandProc
*proc
;
451 /* Use a function to determine which keys need to be loaded
452 * in the background prior to executing this command. Takes precedence
453 * over vm_firstkey and others, ignored when NULL */
454 redisCommandProc
*vm_preload_proc
;
455 /* What keys should be loaded in background when calling this command? */
456 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
457 int vm_lastkey
; /* THe last argument that's a key */
458 int vm_keystep
; /* The step between first and last key */
461 struct redisFunctionSym
{
463 unsigned long pointer
;
466 typedef struct _redisSortObject
{
474 typedef struct _redisSortOperation
{
477 } redisSortOperation
;
479 /* ZSETs use a specialized version of Skiplists */
481 typedef struct zskiplistNode
{
482 struct zskiplistNode
**forward
;
483 struct zskiplistNode
*backward
;
489 typedef struct zskiplist
{
490 struct zskiplistNode
*header
, *tail
;
491 unsigned long length
;
495 typedef struct zset
{
500 /* Our shared "common" objects */
502 struct sharedObjectsStruct
{
503 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
504 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
505 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
506 *outofrangeerr
, *plus
,
507 *select0
, *select1
, *select2
, *select3
, *select4
,
508 *select5
, *select6
, *select7
, *select8
, *select9
,
509 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
;
512 /* Global vars that are actally used as constants. The following double
513 * values are used for double on-disk serialization, and are initialized
514 * at runtime to avoid strange compiler optimizations. */
516 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
518 /* VM threaded I/O request message */
519 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
520 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
521 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
522 typedef struct iojob
{
523 int type
; /* Request type, REDIS_IOJOB_* */
524 redisDb
*db
;/* Redis database */
525 robj
*key
; /* This I/O request is about swapping this key */
526 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
527 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
528 off_t page
; /* Swap page where to read/write the object */
529 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
530 int canceled
; /* True if this command was canceled by blocking side of VM */
531 pthread_t thread
; /* ID of the thread processing this entry */
534 /*================================ Prototypes =============================== */
536 static void freeStringObject(robj
*o
);
537 static void freeListObject(robj
*o
);
538 static void freeSetObject(robj
*o
);
539 static void decrRefCount(void *o
);
540 static robj
*createObject(int type
, void *ptr
);
541 static void freeClient(redisClient
*c
);
542 static int rdbLoad(char *filename
);
543 static void addReply(redisClient
*c
, robj
*obj
);
544 static void addReplySds(redisClient
*c
, sds s
);
545 static void incrRefCount(robj
*o
);
546 static int rdbSaveBackground(char *filename
);
547 static robj
*createStringObject(char *ptr
, size_t len
);
548 static robj
*dupStringObject(robj
*o
);
549 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
550 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
551 static int syncWithMaster(void);
552 static robj
*tryObjectSharing(robj
*o
);
553 static int tryObjectEncoding(robj
*o
);
554 static robj
*getDecodedObject(robj
*o
);
555 static int removeExpire(redisDb
*db
, robj
*key
);
556 static int expireIfNeeded(redisDb
*db
, robj
*key
);
557 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
558 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
559 static int deleteKey(redisDb
*db
, robj
*key
);
560 static time_t getExpire(redisDb
*db
, robj
*key
);
561 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
562 static void updateSlavesWaitingBgsave(int bgsaveerr
);
563 static void freeMemoryIfNeeded(void);
564 static int processCommand(redisClient
*c
);
565 static void setupSigSegvAction(void);
566 static void rdbRemoveTempFile(pid_t childpid
);
567 static void aofRemoveTempFile(pid_t childpid
);
568 static size_t stringObjectLen(robj
*o
);
569 static void processInputBuffer(redisClient
*c
);
570 static zskiplist
*zslCreate(void);
571 static void zslFree(zskiplist
*zsl
);
572 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
573 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
574 static void initClientMultiState(redisClient
*c
);
575 static void freeClientMultiState(redisClient
*c
);
576 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
577 static void unblockClientWaitingData(redisClient
*c
);
578 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
579 static void vmInit(void);
580 static void vmMarkPagesFree(off_t page
, off_t count
);
581 static robj
*vmLoadObject(robj
*key
);
582 static robj
*vmPreviewObject(robj
*key
);
583 static int vmSwapOneObjectBlocking(void);
584 static int vmSwapOneObjectThreaded(void);
585 static int vmCanSwapOut(void);
586 static int tryFreeOneObjectFromFreelist(void);
587 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
588 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
589 static void vmCancelThreadedIOJob(robj
*o
);
590 static void lockThreadedIO(void);
591 static void unlockThreadedIO(void);
592 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
593 static void freeIOJob(iojob
*j
);
594 static void queueIOJob(iojob
*j
);
595 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
596 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
597 static void waitEmptyIOJobsQueue(void);
598 static void vmReopenSwapFile(void);
599 static int vmFreePage(off_t page
);
600 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
601 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
602 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
603 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
604 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
605 static struct redisCommand
*lookupCommand(char *name
);
606 static void call(redisClient
*c
, struct redisCommand
*cmd
);
607 static void resetClient(redisClient
*c
);
608 static void convertToRealHash(robj
*o
);
609 static int pubsubUnsubscribeAll(redisClient
*c
, int notify
);
612 static void authCommand(redisClient
*c
);
613 static void pingCommand(redisClient
*c
);
614 static void echoCommand(redisClient
*c
);
615 static void setCommand(redisClient
*c
);
616 static void setnxCommand(redisClient
*c
);
617 static void getCommand(redisClient
*c
);
618 static void delCommand(redisClient
*c
);
619 static void existsCommand(redisClient
*c
);
620 static void incrCommand(redisClient
*c
);
621 static void decrCommand(redisClient
*c
);
622 static void incrbyCommand(redisClient
*c
);
623 static void decrbyCommand(redisClient
*c
);
624 static void selectCommand(redisClient
*c
);
625 static void randomkeyCommand(redisClient
*c
);
626 static void keysCommand(redisClient
*c
);
627 static void dbsizeCommand(redisClient
*c
);
628 static void lastsaveCommand(redisClient
*c
);
629 static void saveCommand(redisClient
*c
);
630 static void bgsaveCommand(redisClient
*c
);
631 static void bgrewriteaofCommand(redisClient
*c
);
632 static void shutdownCommand(redisClient
*c
);
633 static void moveCommand(redisClient
*c
);
634 static void renameCommand(redisClient
*c
);
635 static void renamenxCommand(redisClient
*c
);
636 static void lpushCommand(redisClient
*c
);
637 static void rpushCommand(redisClient
*c
);
638 static void lpopCommand(redisClient
*c
);
639 static void rpopCommand(redisClient
*c
);
640 static void llenCommand(redisClient
*c
);
641 static void lindexCommand(redisClient
*c
);
642 static void lrangeCommand(redisClient
*c
);
643 static void ltrimCommand(redisClient
*c
);
644 static void typeCommand(redisClient
*c
);
645 static void lsetCommand(redisClient
*c
);
646 static void saddCommand(redisClient
*c
);
647 static void sremCommand(redisClient
*c
);
648 static void smoveCommand(redisClient
*c
);
649 static void sismemberCommand(redisClient
*c
);
650 static void scardCommand(redisClient
*c
);
651 static void spopCommand(redisClient
*c
);
652 static void srandmemberCommand(redisClient
*c
);
653 static void sinterCommand(redisClient
*c
);
654 static void sinterstoreCommand(redisClient
*c
);
655 static void sunionCommand(redisClient
*c
);
656 static void sunionstoreCommand(redisClient
*c
);
657 static void sdiffCommand(redisClient
*c
);
658 static void sdiffstoreCommand(redisClient
*c
);
659 static void syncCommand(redisClient
*c
);
660 static void flushdbCommand(redisClient
*c
);
661 static void flushallCommand(redisClient
*c
);
662 static void sortCommand(redisClient
*c
);
663 static void lremCommand(redisClient
*c
);
664 static void rpoplpushcommand(redisClient
*c
);
665 static void infoCommand(redisClient
*c
);
666 static void mgetCommand(redisClient
*c
);
667 static void monitorCommand(redisClient
*c
);
668 static void expireCommand(redisClient
*c
);
669 static void expireatCommand(redisClient
*c
);
670 static void getsetCommand(redisClient
*c
);
671 static void ttlCommand(redisClient
*c
);
672 static void slaveofCommand(redisClient
*c
);
673 static void debugCommand(redisClient
*c
);
674 static void msetCommand(redisClient
*c
);
675 static void msetnxCommand(redisClient
*c
);
676 static void zaddCommand(redisClient
*c
);
677 static void zincrbyCommand(redisClient
*c
);
678 static void zrangeCommand(redisClient
*c
);
679 static void zrangebyscoreCommand(redisClient
*c
);
680 static void zcountCommand(redisClient
*c
);
681 static void zrevrangeCommand(redisClient
*c
);
682 static void zcardCommand(redisClient
*c
);
683 static void zremCommand(redisClient
*c
);
684 static void zscoreCommand(redisClient
*c
);
685 static void zremrangebyscoreCommand(redisClient
*c
);
686 static void multiCommand(redisClient
*c
);
687 static void execCommand(redisClient
*c
);
688 static void discardCommand(redisClient
*c
);
689 static void blpopCommand(redisClient
*c
);
690 static void brpopCommand(redisClient
*c
);
691 static void appendCommand(redisClient
*c
);
692 static void substrCommand(redisClient
*c
);
693 static void zrankCommand(redisClient
*c
);
694 static void zrevrankCommand(redisClient
*c
);
695 static void hsetCommand(redisClient
*c
);
696 static void hgetCommand(redisClient
*c
);
697 static void hdelCommand(redisClient
*c
);
698 static void hlenCommand(redisClient
*c
);
699 static void zremrangebyrankCommand(redisClient
*c
);
700 static void zunionCommand(redisClient
*c
);
701 static void zinterCommand(redisClient
*c
);
702 static void hkeysCommand(redisClient
*c
);
703 static void hvalsCommand(redisClient
*c
);
704 static void hgetallCommand(redisClient
*c
);
705 static void hexistsCommand(redisClient
*c
);
706 static void configCommand(redisClient
*c
);
707 static void hincrbyCommand(redisClient
*c
);
708 static void subscribeCommand(redisClient
*c
);
709 static void unsubscribeCommand(redisClient
*c
);
710 static void publishCommand(redisClient
*c
);
712 /*================================= Globals ================================= */
715 static struct redisServer server
; /* server global state */
716 static struct redisCommand cmdTable
[] = {
717 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
718 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
719 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
720 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
721 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
723 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
725 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
726 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
727 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
728 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
729 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
730 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
731 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
732 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
733 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
736 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
738 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
739 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
740 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
742 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
743 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
744 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
748 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
749 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
750 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
751 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
752 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
753 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
755 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
760 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
761 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
768 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
769 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
772 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
773 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
778 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
780 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
782 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
783 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
785 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
789 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
791 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
795 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
796 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
799 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
808 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
814 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"publish",publishCommand
,3,REDIS_CMD_BULK
,NULL
,0,0,0},
817 {NULL
,NULL
,0,0,NULL
,0,0,0}
820 /*============================ Utility functions ============================ */
822 /* Glob-style pattern matching. */
823 static int stringmatchlen(const char *pattern
, int patternLen
,
824 const char *string
, int stringLen
, int nocase
)
829 while (pattern
[1] == '*') {
834 return 1; /* match */
836 if (stringmatchlen(pattern
+1, patternLen
-1,
837 string
, stringLen
, nocase
))
838 return 1; /* match */
842 return 0; /* no match */
846 return 0; /* no match */
856 not = pattern
[0] == '^';
863 if (pattern
[0] == '\\') {
866 if (pattern
[0] == string
[0])
868 } else if (pattern
[0] == ']') {
870 } else if (patternLen
== 0) {
874 } else if (pattern
[1] == '-' && patternLen
>= 3) {
875 int start
= pattern
[0];
876 int end
= pattern
[2];
884 start
= tolower(start
);
890 if (c
>= start
&& c
<= end
)
894 if (pattern
[0] == string
[0])
897 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
907 return 0; /* no match */
913 if (patternLen
>= 2) {
920 if (pattern
[0] != string
[0])
921 return 0; /* no match */
923 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
924 return 0; /* no match */
932 if (stringLen
== 0) {
933 while(*pattern
== '*') {
940 if (patternLen
== 0 && stringLen
== 0)
945 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
946 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
949 static void redisLog(int level
, const char *fmt
, ...) {
953 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
957 if (level
>= server
.verbosity
) {
963 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
964 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
965 vfprintf(fp
, fmt
, ap
);
971 if (server
.logfile
) fclose(fp
);
974 /*====================== Hash table type implementation ==================== */
976 /* This is an hash table type that uses the SDS dynamic strings libary as
977 * keys and radis objects as values (objects can hold SDS strings,
980 static void dictVanillaFree(void *privdata
, void *val
)
982 DICT_NOTUSED(privdata
);
986 static void dictListDestructor(void *privdata
, void *val
)
988 DICT_NOTUSED(privdata
);
989 listRelease((list
*)val
);
992 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
996 DICT_NOTUSED(privdata
);
998 l1
= sdslen((sds
)key1
);
999 l2
= sdslen((sds
)key2
);
1000 if (l1
!= l2
) return 0;
1001 return memcmp(key1
, key2
, l1
) == 0;
1004 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1006 DICT_NOTUSED(privdata
);
1008 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1012 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1015 const robj
*o1
= key1
, *o2
= key2
;
1016 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1019 static unsigned int dictObjHash(const void *key
) {
1020 const robj
*o
= key
;
1021 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1024 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1027 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1030 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1031 o2
->encoding
== REDIS_ENCODING_INT
&&
1032 o1
->ptr
== o2
->ptr
) return 1;
1034 o1
= getDecodedObject(o1
);
1035 o2
= getDecodedObject(o2
);
1036 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1042 static unsigned int dictEncObjHash(const void *key
) {
1043 robj
*o
= (robj
*) key
;
1045 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1046 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1048 if (o
->encoding
== REDIS_ENCODING_INT
) {
1052 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1053 return dictGenHashFunction((unsigned char*)buf
, len
);
1057 o
= getDecodedObject(o
);
1058 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1065 /* Sets type and expires */
1066 static dictType setDictType
= {
1067 dictEncObjHash
, /* hash function */
1070 dictEncObjKeyCompare
, /* key compare */
1071 dictRedisObjectDestructor
, /* key destructor */
1072 NULL
/* val destructor */
1075 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1076 static dictType zsetDictType
= {
1077 dictEncObjHash
, /* hash function */
1080 dictEncObjKeyCompare
, /* key compare */
1081 dictRedisObjectDestructor
, /* key destructor */
1082 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1086 static dictType dbDictType
= {
1087 dictObjHash
, /* hash function */
1090 dictObjKeyCompare
, /* key compare */
1091 dictRedisObjectDestructor
, /* key destructor */
1092 dictRedisObjectDestructor
/* val destructor */
1096 static dictType keyptrDictType
= {
1097 dictObjHash
, /* hash function */
1100 dictObjKeyCompare
, /* key compare */
1101 dictRedisObjectDestructor
, /* key destructor */
1102 NULL
/* val destructor */
1105 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1106 static dictType hashDictType
= {
1107 dictEncObjHash
, /* hash function */
1110 dictEncObjKeyCompare
, /* key compare */
1111 dictRedisObjectDestructor
, /* key destructor */
1112 dictRedisObjectDestructor
/* val destructor */
1115 /* Keylist hash table type has unencoded redis objects as keys and
1116 * lists as values. It's used for blocking operations (BLPOP) and to
1117 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1118 static dictType keylistDictType
= {
1119 dictObjHash
, /* hash function */
1122 dictObjKeyCompare
, /* key compare */
1123 dictRedisObjectDestructor
, /* key destructor */
1124 dictListDestructor
/* val destructor */
1127 static void version();
1129 /* ========================= Random utility functions ======================= */
1131 /* Redis generally does not try to recover from out of memory conditions
1132 * when allocating objects or strings, it is not clear if it will be possible
1133 * to report this condition to the client since the networking layer itself
1134 * is based on heap allocation for send buffers, so we simply abort.
1135 * At least the code will be simpler to read... */
1136 static void oom(const char *msg
) {
1137 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1142 /* ====================== Redis server networking stuff ===================== */
1143 static void closeTimedoutClients(void) {
1146 time_t now
= time(NULL
);
1149 listRewind(server
.clients
,&li
);
1150 while ((ln
= listNext(&li
)) != NULL
) {
1151 c
= listNodeValue(ln
);
1152 if (server
.maxidletime
&&
1153 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1154 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1155 (now
- c
->lastinteraction
> server
.maxidletime
))
1157 redisLog(REDIS_VERBOSE
,"Closing idle client");
1159 } else if (c
->flags
& REDIS_BLOCKED
) {
1160 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1161 addReply(c
,shared
.nullmultibulk
);
1162 unblockClientWaitingData(c
);
1168 static int htNeedsResize(dict
*dict
) {
1169 long long size
, used
;
1171 size
= dictSlots(dict
);
1172 used
= dictSize(dict
);
1173 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1174 (used
*100/size
< REDIS_HT_MINFILL
));
1177 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1178 * we resize the hash table to save memory */
1179 static void tryResizeHashTables(void) {
1182 for (j
= 0; j
< server
.dbnum
; j
++) {
1183 if (htNeedsResize(server
.db
[j
].dict
)) {
1184 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1185 dictResize(server
.db
[j
].dict
);
1186 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1188 if (htNeedsResize(server
.db
[j
].expires
))
1189 dictResize(server
.db
[j
].expires
);
1193 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1194 void backgroundSaveDoneHandler(int statloc
) {
1195 int exitcode
= WEXITSTATUS(statloc
);
1196 int bysignal
= WIFSIGNALED(statloc
);
1198 if (!bysignal
&& exitcode
== 0) {
1199 redisLog(REDIS_NOTICE
,
1200 "Background saving terminated with success");
1202 server
.lastsave
= time(NULL
);
1203 } else if (!bysignal
&& exitcode
!= 0) {
1204 redisLog(REDIS_WARNING
, "Background saving error");
1206 redisLog(REDIS_WARNING
,
1207 "Background saving terminated by signal");
1208 rdbRemoveTempFile(server
.bgsavechildpid
);
1210 server
.bgsavechildpid
= -1;
1211 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1212 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1213 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1216 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1218 void backgroundRewriteDoneHandler(int statloc
) {
1219 int exitcode
= WEXITSTATUS(statloc
);
1220 int bysignal
= WIFSIGNALED(statloc
);
1222 if (!bysignal
&& exitcode
== 0) {
1226 redisLog(REDIS_NOTICE
,
1227 "Background append only file rewriting terminated with success");
1228 /* Now it's time to flush the differences accumulated by the parent */
1229 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1230 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1232 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1235 /* Flush our data... */
1236 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1237 (signed) sdslen(server
.bgrewritebuf
)) {
1238 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1242 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1243 /* Now our work is to rename the temp file into the stable file. And
1244 * switch the file descriptor used by the server for append only. */
1245 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1246 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1250 /* Mission completed... almost */
1251 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1252 if (server
.appendfd
!= -1) {
1253 /* If append only is actually enabled... */
1254 close(server
.appendfd
);
1255 server
.appendfd
= fd
;
1257 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1258 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1260 /* If append only is disabled we just generate a dump in this
1261 * format. Why not? */
1264 } else if (!bysignal
&& exitcode
!= 0) {
1265 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1267 redisLog(REDIS_WARNING
,
1268 "Background append only file rewriting terminated by signal");
1271 sdsfree(server
.bgrewritebuf
);
1272 server
.bgrewritebuf
= sdsempty();
1273 aofRemoveTempFile(server
.bgrewritechildpid
);
1274 server
.bgrewritechildpid
= -1;
1277 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1278 int j
, loops
= server
.cronloops
++;
1279 REDIS_NOTUSED(eventLoop
);
1281 REDIS_NOTUSED(clientData
);
1283 /* We take a cached value of the unix time in the global state because
1284 * with virtual memory and aging there is to store the current time
1285 * in objects at every object access, and accuracy is not needed.
1286 * To access a global var is faster than calling time(NULL) */
1287 server
.unixtime
= time(NULL
);
1289 /* Show some info about non-empty databases */
1290 for (j
= 0; j
< server
.dbnum
; j
++) {
1291 long long size
, used
, vkeys
;
1293 size
= dictSlots(server
.db
[j
].dict
);
1294 used
= dictSize(server
.db
[j
].dict
);
1295 vkeys
= dictSize(server
.db
[j
].expires
);
1296 if (!(loops
% 50) && (used
|| vkeys
)) {
1297 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1298 /* dictPrintStats(server.dict); */
1302 /* We don't want to resize the hash tables while a bacground saving
1303 * is in progress: the saving child is created using fork() that is
1304 * implemented with a copy-on-write semantic in most modern systems, so
1305 * if we resize the HT while there is the saving child at work actually
1306 * a lot of memory movements in the parent will cause a lot of pages
1308 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1310 /* Show information about connected clients */
1311 if (!(loops
% 50)) {
1312 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1313 listLength(server
.clients
)-listLength(server
.slaves
),
1314 listLength(server
.slaves
),
1315 zmalloc_used_memory(),
1316 dictSize(server
.sharingpool
));
1319 /* Close connections of timedout clients */
1320 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1321 closeTimedoutClients();
1323 /* Check if a background saving or AOF rewrite in progress terminated */
1324 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1328 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1329 if (pid
== server
.bgsavechildpid
) {
1330 backgroundSaveDoneHandler(statloc
);
1332 backgroundRewriteDoneHandler(statloc
);
1336 /* If there is not a background saving in progress check if
1337 * we have to save now */
1338 time_t now
= time(NULL
);
1339 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1340 struct saveparam
*sp
= server
.saveparams
+j
;
1342 if (server
.dirty
>= sp
->changes
&&
1343 now
-server
.lastsave
> sp
->seconds
) {
1344 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1345 sp
->changes
, sp
->seconds
);
1346 rdbSaveBackground(server
.dbfilename
);
1352 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1353 * will use few CPU cycles if there are few expiring keys, otherwise
1354 * it will get more aggressive to avoid that too much memory is used by
1355 * keys that can be removed from the keyspace. */
1356 for (j
= 0; j
< server
.dbnum
; j
++) {
1358 redisDb
*db
= server
.db
+j
;
1360 /* Continue to expire if at the end of the cycle more than 25%
1361 * of the keys were expired. */
1363 long num
= dictSize(db
->expires
);
1364 time_t now
= time(NULL
);
1367 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1368 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1373 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1374 t
= (time_t) dictGetEntryVal(de
);
1376 deleteKey(db
,dictGetEntryKey(de
));
1378 server
.stat_expiredkeys
++;
1381 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1384 /* Swap a few keys on disk if we are over the memory limit and VM
1385 * is enbled. Try to free objects from the free list first. */
1386 if (vmCanSwapOut()) {
1387 while (server
.vm_enabled
&& zmalloc_used_memory() >
1388 server
.vm_max_memory
)
1392 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1393 retval
= (server
.vm_max_threads
== 0) ?
1394 vmSwapOneObjectBlocking() :
1395 vmSwapOneObjectThreaded();
1396 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1397 zmalloc_used_memory() >
1398 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1400 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1402 /* Note that when using threade I/O we free just one object,
1403 * because anyway when the I/O thread in charge to swap this
1404 * object out will finish, the handler of completed jobs
1405 * will try to swap more objects if we are still out of memory. */
1406 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1410 /* Check if we should connect to a MASTER */
1411 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1412 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1413 if (syncWithMaster() == REDIS_OK
) {
1414 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1420 /* This function gets called every time Redis is entering the
1421 * main loop of the event driven library, that is, before to sleep
1422 * for ready file descriptors. */
1423 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1424 REDIS_NOTUSED(eventLoop
);
1426 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1430 listRewind(server
.io_ready_clients
,&li
);
1431 while((ln
= listNext(&li
))) {
1432 redisClient
*c
= ln
->value
;
1433 struct redisCommand
*cmd
;
1435 /* Resume the client. */
1436 listDelNode(server
.io_ready_clients
,ln
);
1437 c
->flags
&= (~REDIS_IO_WAIT
);
1438 server
.vm_blocked_clients
--;
1439 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1440 readQueryFromClient
, c
);
1441 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1442 assert(cmd
!= NULL
);
1445 /* There may be more data to process in the input buffer. */
1446 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1447 processInputBuffer(c
);
1452 static void createSharedObjects(void) {
1453 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1454 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1455 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1456 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1457 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1458 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1459 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1460 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1461 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1462 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1463 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1464 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1465 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1466 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1467 "-ERR no such key\r\n"));
1468 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1469 "-ERR syntax error\r\n"));
1470 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1471 "-ERR source and destination objects are the same\r\n"));
1472 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1473 "-ERR index out of range\r\n"));
1474 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1475 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1476 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1477 shared
.select0
= createStringObject("select 0\r\n",10);
1478 shared
.select1
= createStringObject("select 1\r\n",10);
1479 shared
.select2
= createStringObject("select 2\r\n",10);
1480 shared
.select3
= createStringObject("select 3\r\n",10);
1481 shared
.select4
= createStringObject("select 4\r\n",10);
1482 shared
.select5
= createStringObject("select 5\r\n",10);
1483 shared
.select6
= createStringObject("select 6\r\n",10);
1484 shared
.select7
= createStringObject("select 7\r\n",10);
1485 shared
.select8
= createStringObject("select 8\r\n",10);
1486 shared
.select9
= createStringObject("select 9\r\n",10);
1487 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1488 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1489 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",17);
1490 shared
.mbulk3
= createStringObject("*3\r\n",4);
1493 static void appendServerSaveParams(time_t seconds
, int changes
) {
1494 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1495 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1496 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1497 server
.saveparamslen
++;
1500 static void resetServerSaveParams() {
1501 zfree(server
.saveparams
);
1502 server
.saveparams
= NULL
;
1503 server
.saveparamslen
= 0;
1506 static void initServerConfig() {
1507 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1508 server
.port
= REDIS_SERVERPORT
;
1509 server
.verbosity
= REDIS_VERBOSE
;
1510 server
.maxidletime
= REDIS_MAXIDLETIME
;
1511 server
.saveparams
= NULL
;
1512 server
.logfile
= NULL
; /* NULL = log on standard output */
1513 server
.bindaddr
= NULL
;
1514 server
.glueoutputbuf
= 1;
1515 server
.daemonize
= 0;
1516 server
.appendonly
= 0;
1517 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1518 server
.lastfsync
= time(NULL
);
1519 server
.appendfd
= -1;
1520 server
.appendseldb
= -1; /* Make sure the first time will not match */
1521 server
.pidfile
= zstrdup("/var/run/redis.pid");
1522 server
.dbfilename
= zstrdup("dump.rdb");
1523 server
.appendfilename
= zstrdup("appendonly.aof");
1524 server
.requirepass
= NULL
;
1525 server
.shareobjects
= 0;
1526 server
.rdbcompression
= 1;
1527 server
.sharingpoolsize
= 1024;
1528 server
.maxclients
= 0;
1529 server
.blpop_blocked_clients
= 0;
1530 server
.maxmemory
= 0;
1531 server
.vm_enabled
= 0;
1532 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1533 server
.vm_page_size
= 256; /* 256 bytes per page */
1534 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1535 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1536 server
.vm_max_threads
= 4;
1537 server
.vm_blocked_clients
= 0;
1538 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1539 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1541 resetServerSaveParams();
1543 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1544 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1545 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1546 /* Replication related */
1548 server
.masterauth
= NULL
;
1549 server
.masterhost
= NULL
;
1550 server
.masterport
= 6379;
1551 server
.master
= NULL
;
1552 server
.replstate
= REDIS_REPL_NONE
;
1554 /* Double constants initialization */
1556 R_PosInf
= 1.0/R_Zero
;
1557 R_NegInf
= -1.0/R_Zero
;
1558 R_Nan
= R_Zero
/R_Zero
;
1561 static void initServer() {
1564 signal(SIGHUP
, SIG_IGN
);
1565 signal(SIGPIPE
, SIG_IGN
);
1566 setupSigSegvAction();
1568 server
.devnull
= fopen("/dev/null","w");
1569 if (server
.devnull
== NULL
) {
1570 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1573 server
.clients
= listCreate();
1574 server
.slaves
= listCreate();
1575 server
.monitors
= listCreate();
1576 server
.objfreelist
= listCreate();
1577 createSharedObjects();
1578 server
.el
= aeCreateEventLoop();
1579 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1580 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1581 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1582 if (server
.fd
== -1) {
1583 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1586 for (j
= 0; j
< server
.dbnum
; j
++) {
1587 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1588 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1589 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1590 if (server
.vm_enabled
)
1591 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1592 server
.db
[j
].id
= j
;
1594 server
.pubsub_classes
= dictCreate(&keylistDictType
,NULL
);
1595 server
.cronloops
= 0;
1596 server
.bgsavechildpid
= -1;
1597 server
.bgrewritechildpid
= -1;
1598 server
.bgrewritebuf
= sdsempty();
1599 server
.lastsave
= time(NULL
);
1601 server
.stat_numcommands
= 0;
1602 server
.stat_numconnections
= 0;
1603 server
.stat_expiredkeys
= 0;
1604 server
.stat_starttime
= time(NULL
);
1605 server
.unixtime
= time(NULL
);
1606 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1607 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1608 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1610 if (server
.appendonly
) {
1611 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1612 if (server
.appendfd
== -1) {
1613 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1619 if (server
.vm_enabled
) vmInit();
1622 /* Empty the whole database */
1623 static long long emptyDb() {
1625 long long removed
= 0;
1627 for (j
= 0; j
< server
.dbnum
; j
++) {
1628 removed
+= dictSize(server
.db
[j
].dict
);
1629 dictEmpty(server
.db
[j
].dict
);
1630 dictEmpty(server
.db
[j
].expires
);
1635 static int yesnotoi(char *s
) {
1636 if (!strcasecmp(s
,"yes")) return 1;
1637 else if (!strcasecmp(s
,"no")) return 0;
1641 /* I agree, this is a very rudimental way to load a configuration...
1642 will improve later if the config gets more complex */
1643 static void loadServerConfig(char *filename
) {
1645 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1648 char *errormsg
= "Fatal error, can't open config file '%s'";
1649 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1650 sprintf(errorbuf
, errormsg
, filename
);
1652 if (filename
[0] == '-' && filename
[1] == '\0')
1655 if ((fp
= fopen(filename
,"r")) == NULL
) {
1656 redisLog(REDIS_WARNING
, errorbuf
);
1661 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1667 line
= sdstrim(line
," \t\r\n");
1669 /* Skip comments and blank lines*/
1670 if (line
[0] == '#' || line
[0] == '\0') {
1675 /* Split into arguments */
1676 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1677 sdstolower(argv
[0]);
1679 /* Execute config directives */
1680 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1681 server
.maxidletime
= atoi(argv
[1]);
1682 if (server
.maxidletime
< 0) {
1683 err
= "Invalid timeout value"; goto loaderr
;
1685 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1686 server
.port
= atoi(argv
[1]);
1687 if (server
.port
< 1 || server
.port
> 65535) {
1688 err
= "Invalid port"; goto loaderr
;
1690 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1691 server
.bindaddr
= zstrdup(argv
[1]);
1692 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1693 int seconds
= atoi(argv
[1]);
1694 int changes
= atoi(argv
[2]);
1695 if (seconds
< 1 || changes
< 0) {
1696 err
= "Invalid save parameters"; goto loaderr
;
1698 appendServerSaveParams(seconds
,changes
);
1699 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1700 if (chdir(argv
[1]) == -1) {
1701 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1702 argv
[1], strerror(errno
));
1705 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1706 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1707 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1708 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1709 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1711 err
= "Invalid log level. Must be one of debug, notice, warning";
1714 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1717 server
.logfile
= zstrdup(argv
[1]);
1718 if (!strcasecmp(server
.logfile
,"stdout")) {
1719 zfree(server
.logfile
);
1720 server
.logfile
= NULL
;
1722 if (server
.logfile
) {
1723 /* Test if we are able to open the file. The server will not
1724 * be able to abort just for this problem later... */
1725 logfp
= fopen(server
.logfile
,"a");
1726 if (logfp
== NULL
) {
1727 err
= sdscatprintf(sdsempty(),
1728 "Can't open the log file: %s", strerror(errno
));
1733 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1734 server
.dbnum
= atoi(argv
[1]);
1735 if (server
.dbnum
< 1) {
1736 err
= "Invalid number of databases"; goto loaderr
;
1738 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1739 loadServerConfig(argv
[1]);
1740 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1741 server
.maxclients
= atoi(argv
[1]);
1742 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1743 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1744 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1745 server
.masterhost
= sdsnew(argv
[1]);
1746 server
.masterport
= atoi(argv
[2]);
1747 server
.replstate
= REDIS_REPL_CONNECT
;
1748 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1749 server
.masterauth
= zstrdup(argv
[1]);
1750 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1751 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1752 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1754 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1755 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1756 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1758 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1759 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1760 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1762 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1763 server
.sharingpoolsize
= atoi(argv
[1]);
1764 if (server
.sharingpoolsize
< 1) {
1765 err
= "invalid object sharing pool size"; goto loaderr
;
1767 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1768 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1769 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1771 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1772 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1773 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1775 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1776 if (!strcasecmp(argv
[1],"no")) {
1777 server
.appendfsync
= APPENDFSYNC_NO
;
1778 } else if (!strcasecmp(argv
[1],"always")) {
1779 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1780 } else if (!strcasecmp(argv
[1],"everysec")) {
1781 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1783 err
= "argument must be 'no', 'always' or 'everysec'";
1786 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1787 server
.requirepass
= zstrdup(argv
[1]);
1788 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1789 zfree(server
.pidfile
);
1790 server
.pidfile
= zstrdup(argv
[1]);
1791 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1792 zfree(server
.dbfilename
);
1793 server
.dbfilename
= zstrdup(argv
[1]);
1794 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1795 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1796 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1798 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1799 zfree(server
.vm_swap_file
);
1800 server
.vm_swap_file
= zstrdup(argv
[1]);
1801 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1802 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1803 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1804 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1805 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1806 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1807 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1808 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1809 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1810 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1811 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1812 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1813 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1814 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1816 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1818 for (j
= 0; j
< argc
; j
++)
1823 if (fp
!= stdin
) fclose(fp
);
1827 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1828 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1829 fprintf(stderr
, ">>> '%s'\n", line
);
1830 fprintf(stderr
, "%s\n", err
);
1834 static void freeClientArgv(redisClient
*c
) {
1837 for (j
= 0; j
< c
->argc
; j
++)
1838 decrRefCount(c
->argv
[j
]);
1839 for (j
= 0; j
< c
->mbargc
; j
++)
1840 decrRefCount(c
->mbargv
[j
]);
1845 static void freeClient(redisClient
*c
) {
1848 /* Note that if the client we are freeing is blocked into a blocking
1849 * call, we have to set querybuf to NULL *before* to call
1850 * unblockClientWaitingData() to avoid processInputBuffer() will get
1851 * called. Also it is important to remove the file events after
1852 * this, because this call adds the READABLE event. */
1853 sdsfree(c
->querybuf
);
1855 if (c
->flags
& REDIS_BLOCKED
)
1856 unblockClientWaitingData(c
);
1858 /* Unsubscribe from all the pubsub classes */
1859 pubsubUnsubscribeAll(c
,0);
1860 dictRelease(c
->pubsub_classes
);
1861 /* Obvious cleanup */
1862 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1863 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1864 listRelease(c
->reply
);
1867 /* Remove from the list of clients */
1868 ln
= listSearchKey(server
.clients
,c
);
1869 redisAssert(ln
!= NULL
);
1870 listDelNode(server
.clients
,ln
);
1871 /* Remove from the list of clients waiting for swapped keys */
1872 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1873 ln
= listSearchKey(server
.io_ready_clients
,c
);
1875 listDelNode(server
.io_ready_clients
,ln
);
1876 server
.vm_blocked_clients
--;
1879 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1880 ln
= listFirst(c
->io_keys
);
1881 dontWaitForSwappedKey(c
,ln
->value
);
1883 listRelease(c
->io_keys
);
1884 /* Master/slave cleanup */
1885 if (c
->flags
& REDIS_SLAVE
) {
1886 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1888 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1889 ln
= listSearchKey(l
,c
);
1890 redisAssert(ln
!= NULL
);
1893 if (c
->flags
& REDIS_MASTER
) {
1894 server
.master
= NULL
;
1895 server
.replstate
= REDIS_REPL_CONNECT
;
1897 /* Release memory */
1900 freeClientMultiState(c
);
1904 #define GLUEREPLY_UP_TO (1024)
1905 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1907 char buf
[GLUEREPLY_UP_TO
];
1912 listRewind(c
->reply
,&li
);
1913 while((ln
= listNext(&li
))) {
1917 objlen
= sdslen(o
->ptr
);
1918 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1919 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1921 listDelNode(c
->reply
,ln
);
1923 if (copylen
== 0) return;
1927 /* Now the output buffer is empty, add the new single element */
1928 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1929 listAddNodeHead(c
->reply
,o
);
1932 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1933 redisClient
*c
= privdata
;
1934 int nwritten
= 0, totwritten
= 0, objlen
;
1937 REDIS_NOTUSED(mask
);
1939 /* Use writev() if we have enough buffers to send */
1940 if (!server
.glueoutputbuf
&&
1941 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1942 !(c
->flags
& REDIS_MASTER
))
1944 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1948 while(listLength(c
->reply
)) {
1949 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1950 glueReplyBuffersIfNeeded(c
);
1952 o
= listNodeValue(listFirst(c
->reply
));
1953 objlen
= sdslen(o
->ptr
);
1956 listDelNode(c
->reply
,listFirst(c
->reply
));
1960 if (c
->flags
& REDIS_MASTER
) {
1961 /* Don't reply to a master */
1962 nwritten
= objlen
- c
->sentlen
;
1964 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1965 if (nwritten
<= 0) break;
1967 c
->sentlen
+= nwritten
;
1968 totwritten
+= nwritten
;
1969 /* If we fully sent the object on head go to the next one */
1970 if (c
->sentlen
== objlen
) {
1971 listDelNode(c
->reply
,listFirst(c
->reply
));
1974 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1975 * bytes, in a single threaded server it's a good idea to serve
1976 * other clients as well, even if a very large request comes from
1977 * super fast link that is always able to accept data (in real world
1978 * scenario think about 'KEYS *' against the loopback interfae) */
1979 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1981 if (nwritten
== -1) {
1982 if (errno
== EAGAIN
) {
1985 redisLog(REDIS_VERBOSE
,
1986 "Error writing to client: %s", strerror(errno
));
1991 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1992 if (listLength(c
->reply
) == 0) {
1994 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1998 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2000 redisClient
*c
= privdata
;
2001 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2003 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2004 int offset
, ion
= 0;
2006 REDIS_NOTUSED(mask
);
2009 while (listLength(c
->reply
)) {
2010 offset
= c
->sentlen
;
2014 /* fill-in the iov[] array */
2015 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2016 o
= listNodeValue(node
);
2017 objlen
= sdslen(o
->ptr
);
2019 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2022 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2023 break; /* no more iovecs */
2025 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2026 iov
[ion
].iov_len
= objlen
- offset
;
2027 willwrite
+= objlen
- offset
;
2028 offset
= 0; /* just for the first item */
2035 /* write all collected blocks at once */
2036 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2037 if (errno
!= EAGAIN
) {
2038 redisLog(REDIS_VERBOSE
,
2039 "Error writing to client: %s", strerror(errno
));
2046 totwritten
+= nwritten
;
2047 offset
= c
->sentlen
;
2049 /* remove written robjs from c->reply */
2050 while (nwritten
&& listLength(c
->reply
)) {
2051 o
= listNodeValue(listFirst(c
->reply
));
2052 objlen
= sdslen(o
->ptr
);
2054 if(nwritten
>= objlen
- offset
) {
2055 listDelNode(c
->reply
, listFirst(c
->reply
));
2056 nwritten
-= objlen
- offset
;
2060 c
->sentlen
+= nwritten
;
2068 c
->lastinteraction
= time(NULL
);
2070 if (listLength(c
->reply
) == 0) {
2072 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2076 static struct redisCommand
*lookupCommand(char *name
) {
2078 while(cmdTable
[j
].name
!= NULL
) {
2079 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2085 /* resetClient prepare the client to process the next command */
2086 static void resetClient(redisClient
*c
) {
2092 /* Call() is the core of Redis execution of a command */
2093 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2096 dirty
= server
.dirty
;
2098 if (server
.appendonly
&& server
.dirty
-dirty
)
2099 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2100 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2101 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2102 if (listLength(server
.monitors
))
2103 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2104 server
.stat_numcommands
++;
2107 /* If this function gets called we already read a whole
2108 * command, argments are in the client argv/argc fields.
2109 * processCommand() execute the command or prepare the
2110 * server for a bulk read from the client.
2112 * If 1 is returned the client is still alive and valid and
2113 * and other operations can be performed by the caller. Otherwise
2114 * if 0 is returned the client was destroied (i.e. after QUIT). */
2115 static int processCommand(redisClient
*c
) {
2116 struct redisCommand
*cmd
;
2118 /* Free some memory if needed (maxmemory setting) */
2119 if (server
.maxmemory
) freeMemoryIfNeeded();
2121 /* Handle the multi bulk command type. This is an alternative protocol
2122 * supported by Redis in order to receive commands that are composed of
2123 * multiple binary-safe "bulk" arguments. The latency of processing is
2124 * a bit higher but this allows things like multi-sets, so if this
2125 * protocol is used only for MSET and similar commands this is a big win. */
2126 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2127 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2128 if (c
->multibulk
<= 0) {
2132 decrRefCount(c
->argv
[c
->argc
-1]);
2136 } else if (c
->multibulk
) {
2137 if (c
->bulklen
== -1) {
2138 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2139 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2143 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2144 decrRefCount(c
->argv
[0]);
2145 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2147 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2152 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2156 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2157 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2161 if (c
->multibulk
== 0) {
2165 /* Here we need to swap the multi-bulk argc/argv with the
2166 * normal argc/argv of the client structure. */
2168 c
->argv
= c
->mbargv
;
2169 c
->mbargv
= auxargv
;
2172 c
->argc
= c
->mbargc
;
2173 c
->mbargc
= auxargc
;
2175 /* We need to set bulklen to something different than -1
2176 * in order for the code below to process the command without
2177 * to try to read the last argument of a bulk command as
2178 * a special argument. */
2180 /* continue below and process the command */
2187 /* -- end of multi bulk commands processing -- */
2189 /* The QUIT command is handled as a special case. Normal command
2190 * procs are unable to close the client connection safely */
2191 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2196 /* Now lookup the command and check ASAP about trivial error conditions
2197 * such wrong arity, bad command name and so forth. */
2198 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2201 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2202 (char*)c
->argv
[0]->ptr
));
2205 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2206 (c
->argc
< -cmd
->arity
)) {
2208 sdscatprintf(sdsempty(),
2209 "-ERR wrong number of arguments for '%s' command\r\n",
2213 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2214 /* This is a bulk command, we have to read the last argument yet. */
2215 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2217 decrRefCount(c
->argv
[c
->argc
-1]);
2218 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2220 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2225 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2226 /* It is possible that the bulk read is already in the
2227 * buffer. Check this condition and handle it accordingly.
2228 * This is just a fast path, alternative to call processInputBuffer().
2229 * It's a good idea since the code is small and this condition
2230 * happens most of the times. */
2231 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2232 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2234 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2236 /* Otherwise return... there is to read the last argument
2237 * from the socket. */
2241 /* Let's try to share objects on the command arguments vector */
2242 if (server
.shareobjects
) {
2244 for(j
= 1; j
< c
->argc
; j
++)
2245 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2247 /* Let's try to encode the bulk object to save space. */
2248 if (cmd
->flags
& REDIS_CMD_BULK
)
2249 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2251 /* Check if the user is authenticated */
2252 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2253 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2258 /* Handle the maxmemory directive */
2259 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2260 zmalloc_used_memory() > server
.maxmemory
)
2262 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2267 /* Exec the command */
2268 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2269 queueMultiCommand(c
,cmd
);
2270 addReply(c
,shared
.queued
);
2272 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2273 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2277 /* Prepare the client for the next command */
2282 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2287 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2288 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2289 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2290 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2293 if (argc
<= REDIS_STATIC_ARGS
) {
2296 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2299 lenobj
= createObject(REDIS_STRING
,
2300 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2301 lenobj
->refcount
= 0;
2302 outv
[outc
++] = lenobj
;
2303 for (j
= 0; j
< argc
; j
++) {
2304 lenobj
= createObject(REDIS_STRING
,
2305 sdscatprintf(sdsempty(),"$%lu\r\n",
2306 (unsigned long) stringObjectLen(argv
[j
])));
2307 lenobj
->refcount
= 0;
2308 outv
[outc
++] = lenobj
;
2309 outv
[outc
++] = argv
[j
];
2310 outv
[outc
++] = shared
.crlf
;
2313 /* Increment all the refcounts at start and decrement at end in order to
2314 * be sure to free objects if there is no slave in a replication state
2315 * able to be feed with commands */
2316 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2317 listRewind(slaves
,&li
);
2318 while((ln
= listNext(&li
))) {
2319 redisClient
*slave
= ln
->value
;
2321 /* Don't feed slaves that are still waiting for BGSAVE to start */
2322 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2324 /* Feed all the other slaves, MONITORs and so on */
2325 if (slave
->slaveseldb
!= dictid
) {
2329 case 0: selectcmd
= shared
.select0
; break;
2330 case 1: selectcmd
= shared
.select1
; break;
2331 case 2: selectcmd
= shared
.select2
; break;
2332 case 3: selectcmd
= shared
.select3
; break;
2333 case 4: selectcmd
= shared
.select4
; break;
2334 case 5: selectcmd
= shared
.select5
; break;
2335 case 6: selectcmd
= shared
.select6
; break;
2336 case 7: selectcmd
= shared
.select7
; break;
2337 case 8: selectcmd
= shared
.select8
; break;
2338 case 9: selectcmd
= shared
.select9
; break;
2340 selectcmd
= createObject(REDIS_STRING
,
2341 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2342 selectcmd
->refcount
= 0;
2345 addReply(slave
,selectcmd
);
2346 slave
->slaveseldb
= dictid
;
2348 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2350 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2351 if (outv
!= static_outv
) zfree(outv
);
2354 static void processInputBuffer(redisClient
*c
) {
2356 /* Before to process the input buffer, make sure the client is not
2357 * waitig for a blocking operation such as BLPOP. Note that the first
2358 * iteration the client is never blocked, otherwise the processInputBuffer
2359 * would not be called at all, but after the execution of the first commands
2360 * in the input buffer the client may be blocked, and the "goto again"
2361 * will try to reiterate. The following line will make it return asap. */
2362 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2363 if (c
->bulklen
== -1) {
2364 /* Read the first line of the query */
2365 char *p
= strchr(c
->querybuf
,'\n');
2372 query
= c
->querybuf
;
2373 c
->querybuf
= sdsempty();
2374 querylen
= 1+(p
-(query
));
2375 if (sdslen(query
) > querylen
) {
2376 /* leave data after the first line of the query in the buffer */
2377 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2379 *p
= '\0'; /* remove "\n" */
2380 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2381 sdsupdatelen(query
);
2383 /* Now we can split the query in arguments */
2384 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2387 if (c
->argv
) zfree(c
->argv
);
2388 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2390 for (j
= 0; j
< argc
; j
++) {
2391 if (sdslen(argv
[j
])) {
2392 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2400 /* Execute the command. If the client is still valid
2401 * after processCommand() return and there is something
2402 * on the query buffer try to process the next command. */
2403 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2405 /* Nothing to process, argc == 0. Just process the query
2406 * buffer if it's not empty or return to the caller */
2407 if (sdslen(c
->querybuf
)) goto again
;
2410 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2411 redisLog(REDIS_VERBOSE
, "Client protocol error");
2416 /* Bulk read handling. Note that if we are at this point
2417 the client already sent a command terminated with a newline,
2418 we are reading the bulk data that is actually the last
2419 argument of the command. */
2420 int qbl
= sdslen(c
->querybuf
);
2422 if (c
->bulklen
<= qbl
) {
2423 /* Copy everything but the final CRLF as final argument */
2424 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2426 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2427 /* Process the command. If the client is still valid after
2428 * the processing and there is more data in the buffer
2429 * try to parse it. */
2430 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2436 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2437 redisClient
*c
= (redisClient
*) privdata
;
2438 char buf
[REDIS_IOBUF_LEN
];
2441 REDIS_NOTUSED(mask
);
2443 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2445 if (errno
== EAGAIN
) {
2448 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2452 } else if (nread
== 0) {
2453 redisLog(REDIS_VERBOSE
, "Client closed connection");
2458 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2459 c
->lastinteraction
= time(NULL
);
2463 processInputBuffer(c
);
2466 static int selectDb(redisClient
*c
, int id
) {
2467 if (id
< 0 || id
>= server
.dbnum
)
2469 c
->db
= &server
.db
[id
];
2473 static void *dupClientReplyValue(void *o
) {
2474 incrRefCount((robj
*)o
);
2478 static redisClient
*createClient(int fd
) {
2479 redisClient
*c
= zmalloc(sizeof(*c
));
2481 anetNonBlock(NULL
,fd
);
2482 anetTcpNoDelay(NULL
,fd
);
2483 if (!c
) return NULL
;
2486 c
->querybuf
= sdsempty();
2495 c
->lastinteraction
= time(NULL
);
2496 c
->authenticated
= 0;
2497 c
->replstate
= REDIS_REPL_NONE
;
2498 c
->reply
= listCreate();
2499 listSetFreeMethod(c
->reply
,decrRefCount
);
2500 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2501 c
->blockingkeys
= NULL
;
2502 c
->blockingkeysnum
= 0;
2503 c
->io_keys
= listCreate();
2504 c
->pubsub_classes
= dictCreate(&setDictType
,NULL
);
2505 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2506 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2507 readQueryFromClient
, c
) == AE_ERR
) {
2511 listAddNodeTail(server
.clients
,c
);
2512 initClientMultiState(c
);
2516 static void addReply(redisClient
*c
, robj
*obj
) {
2517 if (listLength(c
->reply
) == 0 &&
2518 (c
->replstate
== REDIS_REPL_NONE
||
2519 c
->replstate
== REDIS_REPL_ONLINE
) &&
2520 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2521 sendReplyToClient
, c
) == AE_ERR
) return;
2523 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2524 obj
= dupStringObject(obj
);
2525 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2527 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2530 static void addReplySds(redisClient
*c
, sds s
) {
2531 robj
*o
= createObject(REDIS_STRING
,s
);
2536 static void addReplyDouble(redisClient
*c
, double d
) {
2539 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2540 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2541 (unsigned long) strlen(buf
),buf
));
2544 static void addReplyLong(redisClient
*c
, long l
) {
2549 addReply(c
,shared
.czero
);
2551 } else if (l
== 1) {
2552 addReply(c
,shared
.cone
);
2555 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2556 addReplySds(c
,sdsnewlen(buf
,len
));
2559 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2564 addReply(c
,shared
.czero
);
2566 } else if (ul
== 1) {
2567 addReply(c
,shared
.cone
);
2570 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2571 addReplySds(c
,sdsnewlen(buf
,len
));
2574 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2577 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2578 len
= sdslen(obj
->ptr
);
2580 long n
= (long)obj
->ptr
;
2582 /* Compute how many bytes will take this integer as a radix 10 string */
2588 while((n
= n
/10) != 0) {
2592 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2595 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2596 addReplyBulkLen(c
,obj
);
2598 addReply(c
,shared
.crlf
);
2601 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2602 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2604 addReply(c
,shared
.nullbulk
);
2606 robj
*o
= createStringObject(s
,strlen(s
));
2612 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2617 REDIS_NOTUSED(mask
);
2618 REDIS_NOTUSED(privdata
);
2620 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2621 if (cfd
== AE_ERR
) {
2622 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2625 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2626 if ((c
= createClient(cfd
)) == NULL
) {
2627 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2628 close(cfd
); /* May be already closed, just ingore errors */
2631 /* If maxclient directive is set and this is one client more... close the
2632 * connection. Note that we create the client instead to check before
2633 * for this condition, since now the socket is already set in nonblocking
2634 * mode and we can send an error for free using the Kernel I/O */
2635 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2636 char *err
= "-ERR max number of clients reached\r\n";
2638 /* That's a best effort error message, don't check write errors */
2639 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2640 /* Nothing to do, Just to avoid the warning... */
2645 server
.stat_numconnections
++;
2648 /* ======================= Redis objects implementation ===================== */
2650 static robj
*createObject(int type
, void *ptr
) {
2653 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2654 if (listLength(server
.objfreelist
)) {
2655 listNode
*head
= listFirst(server
.objfreelist
);
2656 o
= listNodeValue(head
);
2657 listDelNode(server
.objfreelist
,head
);
2658 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2660 if (server
.vm_enabled
) {
2661 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2662 o
= zmalloc(sizeof(*o
));
2664 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2668 o
->encoding
= REDIS_ENCODING_RAW
;
2671 if (server
.vm_enabled
) {
2672 /* Note that this code may run in the context of an I/O thread
2673 * and accessing to server.unixtime in theory is an error
2674 * (no locks). But in practice this is safe, and even if we read
2675 * garbage Redis will not fail, as it's just a statistical info */
2676 o
->vm
.atime
= server
.unixtime
;
2677 o
->storage
= REDIS_VM_MEMORY
;
2682 static robj
*createStringObject(char *ptr
, size_t len
) {
2683 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2686 static robj
*dupStringObject(robj
*o
) {
2687 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2688 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2691 static robj
*createListObject(void) {
2692 list
*l
= listCreate();
2694 listSetFreeMethod(l
,decrRefCount
);
2695 return createObject(REDIS_LIST
,l
);
2698 static robj
*createSetObject(void) {
2699 dict
*d
= dictCreate(&setDictType
,NULL
);
2700 return createObject(REDIS_SET
,d
);
2703 static robj
*createHashObject(void) {
2704 /* All the Hashes start as zipmaps. Will be automatically converted
2705 * into hash tables if there are enough elements or big elements
2707 unsigned char *zm
= zipmapNew();
2708 robj
*o
= createObject(REDIS_HASH
,zm
);
2709 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2713 static robj
*createZsetObject(void) {
2714 zset
*zs
= zmalloc(sizeof(*zs
));
2716 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2717 zs
->zsl
= zslCreate();
2718 return createObject(REDIS_ZSET
,zs
);
2721 static void freeStringObject(robj
*o
) {
2722 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2727 static void freeListObject(robj
*o
) {
2728 listRelease((list
*) o
->ptr
);
2731 static void freeSetObject(robj
*o
) {
2732 dictRelease((dict
*) o
->ptr
);
2735 static void freeZsetObject(robj
*o
) {
2738 dictRelease(zs
->dict
);
2743 static void freeHashObject(robj
*o
) {
2744 switch (o
->encoding
) {
2745 case REDIS_ENCODING_HT
:
2746 dictRelease((dict
*) o
->ptr
);
2748 case REDIS_ENCODING_ZIPMAP
:
2757 static void incrRefCount(robj
*o
) {
2758 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2762 static void decrRefCount(void *obj
) {
2765 /* Object is a key of a swapped out value, or in the process of being
2767 if (server
.vm_enabled
&&
2768 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2770 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2771 redisAssert(o
->refcount
== 1);
2773 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2774 redisAssert(o
->type
== REDIS_STRING
);
2775 freeStringObject(o
);
2776 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2777 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2778 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2779 !listAddNodeHead(server
.objfreelist
,o
))
2781 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2782 server
.vm_stats_swapped_objects
--;
2785 /* Object is in memory, or in the process of being swapped out. */
2786 if (--(o
->refcount
) == 0) {
2787 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2788 vmCancelThreadedIOJob(obj
);
2790 case REDIS_STRING
: freeStringObject(o
); break;
2791 case REDIS_LIST
: freeListObject(o
); break;
2792 case REDIS_SET
: freeSetObject(o
); break;
2793 case REDIS_ZSET
: freeZsetObject(o
); break;
2794 case REDIS_HASH
: freeHashObject(o
); break;
2795 default: redisAssert(0); break;
2797 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2798 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2799 !listAddNodeHead(server
.objfreelist
,o
))
2801 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2805 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2806 dictEntry
*de
= dictFind(db
->dict
,key
);
2808 robj
*key
= dictGetEntryKey(de
);
2809 robj
*val
= dictGetEntryVal(de
);
2811 if (server
.vm_enabled
) {
2812 if (key
->storage
== REDIS_VM_MEMORY
||
2813 key
->storage
== REDIS_VM_SWAPPING
)
2815 /* If we were swapping the object out, stop it, this key
2817 if (key
->storage
== REDIS_VM_SWAPPING
)
2818 vmCancelThreadedIOJob(key
);
2819 /* Update the access time of the key for the aging algorithm. */
2820 key
->vm
.atime
= server
.unixtime
;
2822 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2824 /* Our value was swapped on disk. Bring it at home. */
2825 redisAssert(val
== NULL
);
2826 val
= vmLoadObject(key
);
2827 dictGetEntryVal(de
) = val
;
2829 /* Clients blocked by the VM subsystem may be waiting for
2831 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2840 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2841 expireIfNeeded(db
,key
);
2842 return lookupKey(db
,key
);
2845 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2846 deleteIfVolatile(db
,key
);
2847 return lookupKey(db
,key
);
2850 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2851 robj
*o
= lookupKeyRead(c
->db
, key
);
2852 if (!o
) addReply(c
,reply
);
2856 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2857 robj
*o
= lookupKeyWrite(c
->db
, key
);
2858 if (!o
) addReply(c
,reply
);
2862 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2863 if (o
->type
!= type
) {
2864 addReply(c
,shared
.wrongtypeerr
);
2870 static int deleteKey(redisDb
*db
, robj
*key
) {
2873 /* We need to protect key from destruction: after the first dictDelete()
2874 * it may happen that 'key' is no longer valid if we don't increment
2875 * it's count. This may happen when we get the object reference directly
2876 * from the hash table with dictRandomKey() or dict iterators */
2878 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2879 retval
= dictDelete(db
->dict
,key
);
2882 return retval
== DICT_OK
;
2885 /* Try to share an object against the shared objects pool */
2886 static robj
*tryObjectSharing(robj
*o
) {
2887 struct dictEntry
*de
;
2890 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2892 redisAssert(o
->type
== REDIS_STRING
);
2893 de
= dictFind(server
.sharingpool
,o
);
2895 robj
*shared
= dictGetEntryKey(de
);
2897 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2898 dictGetEntryVal(de
) = (void*) c
;
2899 incrRefCount(shared
);
2903 /* Here we are using a stream algorihtm: Every time an object is
2904 * shared we increment its count, everytime there is a miss we
2905 * recrement the counter of a random object. If this object reaches
2906 * zero we remove the object and put the current object instead. */
2907 if (dictSize(server
.sharingpool
) >=
2908 server
.sharingpoolsize
) {
2909 de
= dictGetRandomKey(server
.sharingpool
);
2910 redisAssert(de
!= NULL
);
2911 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2912 dictGetEntryVal(de
) = (void*) c
;
2914 dictDelete(server
.sharingpool
,de
->key
);
2917 c
= 0; /* If the pool is empty we want to add this object */
2922 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2923 redisAssert(retval
== DICT_OK
);
2930 /* Check if the nul-terminated string 's' can be represented by a long
2931 * (that is, is a number that fits into long without any other space or
2932 * character before or after the digits).
2934 * If so, the function returns REDIS_OK and *longval is set to the value
2935 * of the number. Otherwise REDIS_ERR is returned */
2936 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2937 char buf
[32], *endptr
;
2941 value
= strtol(s
, &endptr
, 10);
2942 if (endptr
[0] != '\0') return REDIS_ERR
;
2943 slen
= snprintf(buf
,32,"%ld",value
);
2945 /* If the number converted back into a string is not identical
2946 * then it's not possible to encode the string as integer */
2947 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2948 if (longval
) *longval
= value
;
2952 /* Try to encode a string object in order to save space */
2953 static int tryObjectEncoding(robj
*o
) {
2957 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2958 return REDIS_ERR
; /* Already encoded */
2960 /* It's not save to encode shared objects: shared objects can be shared
2961 * everywhere in the "object space" of Redis. Encoded objects can only
2962 * appear as "values" (and not, for instance, as keys) */
2963 if (o
->refcount
> 1) return REDIS_ERR
;
2965 /* Currently we try to encode only strings */
2966 redisAssert(o
->type
== REDIS_STRING
);
2968 /* Check if we can represent this string as a long integer */
2969 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2971 /* Ok, this object can be encoded */
2972 o
->encoding
= REDIS_ENCODING_INT
;
2974 o
->ptr
= (void*) value
;
2978 /* Get a decoded version of an encoded object (returned as a new object).
2979 * If the object is already raw-encoded just increment the ref count. */
2980 static robj
*getDecodedObject(robj
*o
) {
2983 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2987 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2990 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2991 dec
= createStringObject(buf
,strlen(buf
));
2994 redisAssert(1 != 1);
2998 /* Compare two string objects via strcmp() or alike.
2999 * Note that the objects may be integer-encoded. In such a case we
3000 * use snprintf() to get a string representation of the numbers on the stack
3001 * and compare the strings, it's much faster than calling getDecodedObject().
3003 * Important note: if objects are not integer encoded, but binary-safe strings,
3004 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3006 static int compareStringObjects(robj
*a
, robj
*b
) {
3007 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3008 char bufa
[128], bufb
[128], *astr
, *bstr
;
3011 if (a
== b
) return 0;
3012 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3013 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3019 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3020 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3026 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3029 static size_t stringObjectLen(robj
*o
) {
3030 redisAssert(o
->type
== REDIS_STRING
);
3031 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3032 return sdslen(o
->ptr
);
3036 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3040 /*============================ RDB saving/loading =========================== */
3042 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3043 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3047 static int rdbSaveTime(FILE *fp
, time_t t
) {
3048 int32_t t32
= (int32_t) t
;
3049 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3053 /* check rdbLoadLen() comments for more info */
3054 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3055 unsigned char buf
[2];
3058 /* Save a 6 bit len */
3059 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3060 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3061 } else if (len
< (1<<14)) {
3062 /* Save a 14 bit len */
3063 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3065 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3067 /* Save a 32 bit len */
3068 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3069 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3071 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3076 /* String objects in the form "2391" "-100" without any space and with a
3077 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3078 * encoded as integers to save space */
3079 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3081 char *endptr
, buf
[32];
3083 /* Check if it's possible to encode this value as a number */
3084 value
= strtoll(s
, &endptr
, 10);
3085 if (endptr
[0] != '\0') return 0;
3086 snprintf(buf
,32,"%lld",value
);
3088 /* If the number converted back into a string is not identical
3089 * then it's not possible to encode the string as integer */
3090 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3092 /* Finally check if it fits in our ranges */
3093 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3094 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3095 enc
[1] = value
&0xFF;
3097 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3098 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3099 enc
[1] = value
&0xFF;
3100 enc
[2] = (value
>>8)&0xFF;
3102 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3103 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3104 enc
[1] = value
&0xFF;
3105 enc
[2] = (value
>>8)&0xFF;
3106 enc
[3] = (value
>>16)&0xFF;
3107 enc
[4] = (value
>>24)&0xFF;
3114 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3115 size_t comprlen
, outlen
;
3119 /* We require at least four bytes compression for this to be worth it */
3120 if (len
<= 4) return 0;
3122 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3123 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3124 if (comprlen
== 0) {
3128 /* Data compressed! Let's save it on disk */
3129 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3130 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3131 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3132 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3133 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3142 /* Save a string objet as [len][data] on disk. If the object is a string
3143 * representation of an integer value we try to safe it in a special form */
3144 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3147 /* Try integer encoding */
3149 unsigned char buf
[5];
3150 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3151 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3156 /* Try LZF compression - under 20 bytes it's unable to compress even
3157 * aaaaaaaaaaaaaaaaaa so skip it */
3158 if (server
.rdbcompression
&& len
> 20) {
3161 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3162 if (retval
== -1) return -1;
3163 if (retval
> 0) return 0;
3164 /* retval == 0 means data can't be compressed, save the old way */
3167 /* Store verbatim */
3168 if (rdbSaveLen(fp
,len
) == -1) return -1;
3169 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3173 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3174 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3177 /* Avoid incr/decr ref count business when possible.
3178 * This plays well with copy-on-write given that we are probably
3179 * in a child process (BGSAVE). Also this makes sure key objects
3180 * of swapped objects are not incRefCount-ed (an assert does not allow
3181 * this in order to avoid bugs) */
3182 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3183 obj
= getDecodedObject(obj
);
3184 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3187 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3192 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3193 * 8 bit integer specifing the length of the representation.
3194 * This 8 bit integer has special values in order to specify the following
3200 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3201 unsigned char buf
[128];
3207 } else if (!isfinite(val
)) {
3209 buf
[0] = (val
< 0) ? 255 : 254;
3211 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3212 buf
[0] = strlen((char*)buf
+1);
3215 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3219 /* Save a Redis object. */
3220 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3221 if (o
->type
== REDIS_STRING
) {
3222 /* Save a string value */
3223 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3224 } else if (o
->type
== REDIS_LIST
) {
3225 /* Save a list value */
3226 list
*list
= o
->ptr
;
3230 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3231 listRewind(list
,&li
);
3232 while((ln
= listNext(&li
))) {
3233 robj
*eleobj
= listNodeValue(ln
);
3235 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3237 } else if (o
->type
== REDIS_SET
) {
3238 /* Save a set value */
3240 dictIterator
*di
= dictGetIterator(set
);
3243 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3244 while((de
= dictNext(di
)) != NULL
) {
3245 robj
*eleobj
= dictGetEntryKey(de
);
3247 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3249 dictReleaseIterator(di
);
3250 } else if (o
->type
== REDIS_ZSET
) {
3251 /* Save a set value */
3253 dictIterator
*di
= dictGetIterator(zs
->dict
);
3256 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3257 while((de
= dictNext(di
)) != NULL
) {
3258 robj
*eleobj
= dictGetEntryKey(de
);
3259 double *score
= dictGetEntryVal(de
);
3261 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3262 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3264 dictReleaseIterator(di
);
3265 } else if (o
->type
== REDIS_HASH
) {
3266 /* Save a hash value */
3267 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3268 unsigned char *p
= zipmapRewind(o
->ptr
);
3269 unsigned int count
= zipmapLen(o
->ptr
);
3270 unsigned char *key
, *val
;
3271 unsigned int klen
, vlen
;
3273 if (rdbSaveLen(fp
,count
) == -1) return -1;
3274 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3275 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3276 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3279 dictIterator
*di
= dictGetIterator(o
->ptr
);
3282 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3283 while((de
= dictNext(di
)) != NULL
) {
3284 robj
*key
= dictGetEntryKey(de
);
3285 robj
*val
= dictGetEntryVal(de
);
3287 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3288 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3290 dictReleaseIterator(di
);
3298 /* Return the length the object will have on disk if saved with
3299 * the rdbSaveObject() function. Currently we use a trick to get
3300 * this length with very little changes to the code. In the future
3301 * we could switch to a faster solution. */
3302 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3303 if (fp
== NULL
) fp
= server
.devnull
;
3305 assert(rdbSaveObject(fp
,o
) != 1);
3309 /* Return the number of pages required to save this object in the swap file */
3310 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3311 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3313 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3316 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3317 static int rdbSave(char *filename
) {
3318 dictIterator
*di
= NULL
;
3323 time_t now
= time(NULL
);
3325 /* Wait for I/O therads to terminate, just in case this is a
3326 * foreground-saving, to avoid seeking the swap file descriptor at the
3328 if (server
.vm_enabled
)
3329 waitEmptyIOJobsQueue();
3331 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3332 fp
= fopen(tmpfile
,"w");
3334 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3337 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3338 for (j
= 0; j
< server
.dbnum
; j
++) {
3339 redisDb
*db
= server
.db
+j
;
3341 if (dictSize(d
) == 0) continue;
3342 di
= dictGetIterator(d
);
3348 /* Write the SELECT DB opcode */
3349 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3350 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3352 /* Iterate this DB writing every entry */
3353 while((de
= dictNext(di
)) != NULL
) {
3354 robj
*key
= dictGetEntryKey(de
);
3355 robj
*o
= dictGetEntryVal(de
);
3356 time_t expiretime
= getExpire(db
,key
);
3358 /* Save the expire time */
3359 if (expiretime
!= -1) {
3360 /* If this key is already expired skip it */
3361 if (expiretime
< now
) continue;
3362 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3363 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3365 /* Save the key and associated value. This requires special
3366 * handling if the value is swapped out. */
3367 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3368 key
->storage
== REDIS_VM_SWAPPING
) {
3369 /* Save type, key, value */
3370 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3371 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3372 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3374 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3376 /* Get a preview of the object in memory */
3377 po
= vmPreviewObject(key
);
3378 /* Save type, key, value */
3379 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3380 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3381 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3382 /* Remove the loaded object from memory */
3386 dictReleaseIterator(di
);
3389 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3391 /* Make sure data will not remain on the OS's output buffers */
3396 /* Use RENAME to make sure the DB file is changed atomically only
3397 * if the generate DB file is ok. */
3398 if (rename(tmpfile
,filename
) == -1) {
3399 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3403 redisLog(REDIS_NOTICE
,"DB saved on disk");
3405 server
.lastsave
= time(NULL
);
3411 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3412 if (di
) dictReleaseIterator(di
);
3416 static int rdbSaveBackground(char *filename
) {
3419 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3420 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3421 if ((childpid
= fork()) == 0) {
3423 if (server
.vm_enabled
) vmReopenSwapFile();
3425 if (rdbSave(filename
) == REDIS_OK
) {
3432 if (childpid
== -1) {
3433 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3437 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3438 server
.bgsavechildpid
= childpid
;
3441 return REDIS_OK
; /* unreached */
3444 static void rdbRemoveTempFile(pid_t childpid
) {
3447 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3451 static int rdbLoadType(FILE *fp
) {
3453 if (fread(&type
,1,1,fp
) == 0) return -1;
3457 static time_t rdbLoadTime(FILE *fp
) {
3459 if (fread(&t32
,4,1,fp
) == 0) return -1;
3460 return (time_t) t32
;
3463 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3464 * of this file for a description of how this are stored on disk.
3466 * isencoded is set to 1 if the readed length is not actually a length but
3467 * an "encoding type", check the above comments for more info */
3468 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3469 unsigned char buf
[2];
3473 if (isencoded
) *isencoded
= 0;
3474 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3475 type
= (buf
[0]&0xC0)>>6;
3476 if (type
== REDIS_RDB_6BITLEN
) {
3477 /* Read a 6 bit len */
3479 } else if (type
== REDIS_RDB_ENCVAL
) {
3480 /* Read a 6 bit len encoding type */
3481 if (isencoded
) *isencoded
= 1;
3483 } else if (type
== REDIS_RDB_14BITLEN
) {
3484 /* Read a 14 bit len */
3485 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3486 return ((buf
[0]&0x3F)<<8)|buf
[1];
3488 /* Read a 32 bit len */
3489 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3494 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3495 unsigned char enc
[4];
3498 if (enctype
== REDIS_RDB_ENC_INT8
) {
3499 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3500 val
= (signed char)enc
[0];
3501 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3503 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3504 v
= enc
[0]|(enc
[1]<<8);
3506 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3508 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3509 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3512 val
= 0; /* anti-warning */
3515 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3518 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3519 unsigned int len
, clen
;
3520 unsigned char *c
= NULL
;
3523 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3524 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3525 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3526 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3527 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3528 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3530 return createObject(REDIS_STRING
,val
);
3537 static robj
*rdbLoadStringObject(FILE*fp
) {
3542 len
= rdbLoadLen(fp
,&isencoded
);
3545 case REDIS_RDB_ENC_INT8
:
3546 case REDIS_RDB_ENC_INT16
:
3547 case REDIS_RDB_ENC_INT32
:
3548 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3549 case REDIS_RDB_ENC_LZF
:
3550 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3556 if (len
== REDIS_RDB_LENERR
) return NULL
;
3557 val
= sdsnewlen(NULL
,len
);
3558 if (len
&& fread(val
,len
,1,fp
) == 0) {
3562 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3565 /* For information about double serialization check rdbSaveDoubleValue() */
3566 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3570 if (fread(&len
,1,1,fp
) == 0) return -1;
3572 case 255: *val
= R_NegInf
; return 0;
3573 case 254: *val
= R_PosInf
; return 0;
3574 case 253: *val
= R_Nan
; return 0;
3576 if (fread(buf
,len
,1,fp
) == 0) return -1;
3578 sscanf(buf
, "%lg", val
);
3583 /* Load a Redis object of the specified type from the specified file.
3584 * On success a newly allocated object is returned, otherwise NULL. */
3585 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3588 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3589 if (type
== REDIS_STRING
) {
3590 /* Read string value */
3591 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3592 tryObjectEncoding(o
);
3593 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3594 /* Read list/set value */
3597 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3598 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3599 /* It's faster to expand the dict to the right size asap in order
3600 * to avoid rehashing */
3601 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3602 dictExpand(o
->ptr
,listlen
);
3603 /* Load every single element of the list/set */
3607 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3608 tryObjectEncoding(ele
);
3609 if (type
== REDIS_LIST
) {
3610 listAddNodeTail((list
*)o
->ptr
,ele
);
3612 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3615 } else if (type
== REDIS_ZSET
) {
3616 /* Read list/set value */
3620 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3621 o
= createZsetObject();
3623 /* Load every single element of the list/set */
3626 double *score
= zmalloc(sizeof(double));
3628 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3629 tryObjectEncoding(ele
);
3630 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3631 dictAdd(zs
->dict
,ele
,score
);
3632 zslInsert(zs
->zsl
,*score
,ele
);
3633 incrRefCount(ele
); /* added to skiplist */
3635 } else if (type
== REDIS_HASH
) {
3638 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3639 o
= createHashObject();
3640 /* Too many entries? Use an hash table. */
3641 if (hashlen
> server
.hash_max_zipmap_entries
)
3642 convertToRealHash(o
);
3643 /* Load every key/value, then set it into the zipmap or hash
3644 * table, as needed. */
3648 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3649 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3650 /* If we are using a zipmap and there are too big values
3651 * the object is converted to real hash table encoding. */
3652 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3653 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3654 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3656 convertToRealHash(o
);
3659 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3660 unsigned char *zm
= o
->ptr
;
3662 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3663 val
->ptr
,sdslen(val
->ptr
),NULL
);
3668 tryObjectEncoding(key
);
3669 tryObjectEncoding(val
);
3670 dictAdd((dict
*)o
->ptr
,key
,val
);
3679 static int rdbLoad(char *filename
) {
3681 robj
*keyobj
= NULL
;
3683 int type
, retval
, rdbver
;
3684 dict
*d
= server
.db
[0].dict
;
3685 redisDb
*db
= server
.db
+0;
3687 time_t expiretime
= -1, now
= time(NULL
);
3688 long long loadedkeys
= 0;
3690 fp
= fopen(filename
,"r");
3691 if (!fp
) return REDIS_ERR
;
3692 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3694 if (memcmp(buf
,"REDIS",5) != 0) {
3696 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3699 rdbver
= atoi(buf
+5);
3702 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3709 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3710 if (type
== REDIS_EXPIRETIME
) {
3711 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3712 /* We read the time so we need to read the object type again */
3713 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3715 if (type
== REDIS_EOF
) break;
3716 /* Handle SELECT DB opcode as a special case */
3717 if (type
== REDIS_SELECTDB
) {
3718 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3720 if (dbid
>= (unsigned)server
.dbnum
) {
3721 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3724 db
= server
.db
+dbid
;
3729 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3731 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3732 /* Add the new object in the hash table */
3733 retval
= dictAdd(d
,keyobj
,o
);
3734 if (retval
== DICT_ERR
) {
3735 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3738 /* Set the expire time if needed */
3739 if (expiretime
!= -1) {
3740 setExpire(db
,keyobj
,expiretime
);
3741 /* Delete this key if already expired */
3742 if (expiretime
< now
) deleteKey(db
,keyobj
);
3746 /* Handle swapping while loading big datasets when VM is on */
3748 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3749 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3750 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3757 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3758 if (keyobj
) decrRefCount(keyobj
);
3759 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3761 return REDIS_ERR
; /* Just to avoid warning */
3764 /*================================== Commands =============================== */
3766 static void authCommand(redisClient
*c
) {
3767 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3768 c
->authenticated
= 1;
3769 addReply(c
,shared
.ok
);
3771 c
->authenticated
= 0;
3772 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3776 static void pingCommand(redisClient
*c
) {
3777 addReply(c
,shared
.pong
);
3780 static void echoCommand(redisClient
*c
) {
3781 addReplyBulk(c
,c
->argv
[1]);
3784 /*=================================== Strings =============================== */
3786 static void setGenericCommand(redisClient
*c
, int nx
) {
3789 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3790 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3791 if (retval
== DICT_ERR
) {
3793 /* If the key is about a swapped value, we want a new key object
3794 * to overwrite the old. So we delete the old key in the database.
3795 * This will also make sure that swap pages about the old object
3796 * will be marked as free. */
3797 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3798 incrRefCount(c
->argv
[1]);
3799 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3800 incrRefCount(c
->argv
[2]);
3802 addReply(c
,shared
.czero
);
3806 incrRefCount(c
->argv
[1]);
3807 incrRefCount(c
->argv
[2]);
3810 removeExpire(c
->db
,c
->argv
[1]);
3811 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3814 static void setCommand(redisClient
*c
) {
3815 setGenericCommand(c
,0);
3818 static void setnxCommand(redisClient
*c
) {
3819 setGenericCommand(c
,1);
3822 static int getGenericCommand(redisClient
*c
) {
3825 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3828 if (o
->type
!= REDIS_STRING
) {
3829 addReply(c
,shared
.wrongtypeerr
);
3837 static void getCommand(redisClient
*c
) {
3838 getGenericCommand(c
);
3841 static void getsetCommand(redisClient
*c
) {
3842 if (getGenericCommand(c
) == REDIS_ERR
) return;
3843 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3844 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3846 incrRefCount(c
->argv
[1]);
3848 incrRefCount(c
->argv
[2]);
3850 removeExpire(c
->db
,c
->argv
[1]);
3853 static void mgetCommand(redisClient
*c
) {
3856 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3857 for (j
= 1; j
< c
->argc
; j
++) {
3858 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3860 addReply(c
,shared
.nullbulk
);
3862 if (o
->type
!= REDIS_STRING
) {
3863 addReply(c
,shared
.nullbulk
);
3871 static void msetGenericCommand(redisClient
*c
, int nx
) {
3872 int j
, busykeys
= 0;
3874 if ((c
->argc
% 2) == 0) {
3875 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3878 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3879 * set nothing at all if at least one already key exists. */
3881 for (j
= 1; j
< c
->argc
; j
+= 2) {
3882 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3888 addReply(c
, shared
.czero
);
3892 for (j
= 1; j
< c
->argc
; j
+= 2) {
3895 tryObjectEncoding(c
->argv
[j
+1]);
3896 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3897 if (retval
== DICT_ERR
) {
3898 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3899 incrRefCount(c
->argv
[j
+1]);
3901 incrRefCount(c
->argv
[j
]);
3902 incrRefCount(c
->argv
[j
+1]);
3904 removeExpire(c
->db
,c
->argv
[j
]);
3906 server
.dirty
+= (c
->argc
-1)/2;
3907 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3910 static void msetCommand(redisClient
*c
) {
3911 msetGenericCommand(c
,0);
3914 static void msetnxCommand(redisClient
*c
) {
3915 msetGenericCommand(c
,1);
3918 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3923 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3927 if (o
->type
!= REDIS_STRING
) {
3932 if (o
->encoding
== REDIS_ENCODING_RAW
)
3933 value
= strtoll(o
->ptr
, &eptr
, 10);
3934 else if (o
->encoding
== REDIS_ENCODING_INT
)
3935 value
= (long)o
->ptr
;
3937 redisAssert(1 != 1);
3942 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3943 tryObjectEncoding(o
);
3944 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3945 if (retval
== DICT_ERR
) {
3946 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3947 removeExpire(c
->db
,c
->argv
[1]);
3949 incrRefCount(c
->argv
[1]);
3952 addReply(c
,shared
.colon
);
3954 addReply(c
,shared
.crlf
);
3957 static void incrCommand(redisClient
*c
) {
3958 incrDecrCommand(c
,1);
3961 static void decrCommand(redisClient
*c
) {
3962 incrDecrCommand(c
,-1);
3965 static void incrbyCommand(redisClient
*c
) {
3966 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3967 incrDecrCommand(c
,incr
);
3970 static void decrbyCommand(redisClient
*c
) {
3971 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3972 incrDecrCommand(c
,-incr
);
3975 static void appendCommand(redisClient
*c
) {
3980 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3982 /* Create the key */
3983 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3984 incrRefCount(c
->argv
[1]);
3985 incrRefCount(c
->argv
[2]);
3986 totlen
= stringObjectLen(c
->argv
[2]);
3990 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3993 o
= dictGetEntryVal(de
);
3994 if (o
->type
!= REDIS_STRING
) {
3995 addReply(c
,shared
.wrongtypeerr
);
3998 /* If the object is specially encoded or shared we have to make
4000 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4001 robj
*decoded
= getDecodedObject(o
);
4003 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4004 decrRefCount(decoded
);
4005 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4008 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4009 o
->ptr
= sdscatlen(o
->ptr
,
4010 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4012 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4013 (unsigned long) c
->argv
[2]->ptr
);
4015 totlen
= sdslen(o
->ptr
);
4018 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4021 static void substrCommand(redisClient
*c
) {
4023 long start
= atoi(c
->argv
[2]->ptr
);
4024 long end
= atoi(c
->argv
[3]->ptr
);
4025 size_t rangelen
, strlen
;
4028 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4029 checkType(c
,o
,REDIS_STRING
)) return;
4031 o
= getDecodedObject(o
);
4032 strlen
= sdslen(o
->ptr
);
4034 /* convert negative indexes */
4035 if (start
< 0) start
= strlen
+start
;
4036 if (end
< 0) end
= strlen
+end
;
4037 if (start
< 0) start
= 0;
4038 if (end
< 0) end
= 0;
4040 /* indexes sanity checks */
4041 if (start
> end
|| (size_t)start
>= strlen
) {
4042 /* Out of range start or start > end result in null reply */
4043 addReply(c
,shared
.nullbulk
);
4047 if ((size_t)end
>= strlen
) end
= strlen
-1;
4048 rangelen
= (end
-start
)+1;
4050 /* Return the result */
4051 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4052 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4053 addReplySds(c
,range
);
4054 addReply(c
,shared
.crlf
);
4058 /* ========================= Type agnostic commands ========================= */
4060 static void delCommand(redisClient
*c
) {
4063 for (j
= 1; j
< c
->argc
; j
++) {
4064 if (deleteKey(c
->db
,c
->argv
[j
])) {
4069 addReplyLong(c
,deleted
);
4072 static void existsCommand(redisClient
*c
) {
4073 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4076 static void selectCommand(redisClient
*c
) {
4077 int id
= atoi(c
->argv
[1]->ptr
);
4079 if (selectDb(c
,id
) == REDIS_ERR
) {
4080 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4082 addReply(c
,shared
.ok
);
4086 static void randomkeyCommand(redisClient
*c
) {
4090 de
= dictGetRandomKey(c
->db
->dict
);
4091 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4094 addReply(c
,shared
.plus
);
4095 addReply(c
,shared
.crlf
);
4097 addReply(c
,shared
.plus
);
4098 addReply(c
,dictGetEntryKey(de
));
4099 addReply(c
,shared
.crlf
);
4103 static void keysCommand(redisClient
*c
) {
4106 sds pattern
= c
->argv
[1]->ptr
;
4107 int plen
= sdslen(pattern
);
4108 unsigned long numkeys
= 0;
4109 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4111 di
= dictGetIterator(c
->db
->dict
);
4113 decrRefCount(lenobj
);
4114 while((de
= dictNext(di
)) != NULL
) {
4115 robj
*keyobj
= dictGetEntryKey(de
);
4117 sds key
= keyobj
->ptr
;
4118 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4119 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4120 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4121 addReplyBulk(c
,keyobj
);
4126 dictReleaseIterator(di
);
4127 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4130 static void dbsizeCommand(redisClient
*c
) {
4132 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4135 static void lastsaveCommand(redisClient
*c
) {
4137 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4140 static void typeCommand(redisClient
*c
) {
4144 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4149 case REDIS_STRING
: type
= "+string"; break;
4150 case REDIS_LIST
: type
= "+list"; break;
4151 case REDIS_SET
: type
= "+set"; break;
4152 case REDIS_ZSET
: type
= "+zset"; break;
4153 case REDIS_HASH
: type
= "+hash"; break;
4154 default: type
= "+unknown"; break;
4157 addReplySds(c
,sdsnew(type
));
4158 addReply(c
,shared
.crlf
);
4161 static void saveCommand(redisClient
*c
) {
4162 if (server
.bgsavechildpid
!= -1) {
4163 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4166 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4167 addReply(c
,shared
.ok
);
4169 addReply(c
,shared
.err
);
4173 static void bgsaveCommand(redisClient
*c
) {
4174 if (server
.bgsavechildpid
!= -1) {
4175 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4178 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4179 char *status
= "+Background saving started\r\n";
4180 addReplySds(c
,sdsnew(status
));
4182 addReply(c
,shared
.err
);
4186 static void shutdownCommand(redisClient
*c
) {
4187 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4188 /* Kill the saving child if there is a background saving in progress.
4189 We want to avoid race conditions, for instance our saving child may
4190 overwrite the synchronous saving did by SHUTDOWN. */
4191 if (server
.bgsavechildpid
!= -1) {
4192 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4193 kill(server
.bgsavechildpid
,SIGKILL
);
4194 rdbRemoveTempFile(server
.bgsavechildpid
);
4196 if (server
.appendonly
) {
4197 /* Append only file: fsync() the AOF and exit */
4198 fsync(server
.appendfd
);
4199 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4202 /* Snapshotting. Perform a SYNC SAVE and exit */
4203 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4204 if (server
.daemonize
)
4205 unlink(server
.pidfile
);
4206 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4207 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4208 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4211 /* Ooops.. error saving! The best we can do is to continue
4212 * operating. Note that if there was a background saving process,
4213 * in the next cron() Redis will be notified that the background
4214 * saving aborted, handling special stuff like slaves pending for
4215 * synchronization... */
4216 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4218 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4223 static void renameGenericCommand(redisClient
*c
, int nx
) {
4226 /* To use the same key as src and dst is probably an error */
4227 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4228 addReply(c
,shared
.sameobjecterr
);
4232 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4236 deleteIfVolatile(c
->db
,c
->argv
[2]);
4237 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4240 addReply(c
,shared
.czero
);
4243 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4245 incrRefCount(c
->argv
[2]);
4247 deleteKey(c
->db
,c
->argv
[1]);
4249 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4252 static void renameCommand(redisClient
*c
) {
4253 renameGenericCommand(c
,0);
4256 static void renamenxCommand(redisClient
*c
) {
4257 renameGenericCommand(c
,1);
4260 static void moveCommand(redisClient
*c
) {
4265 /* Obtain source and target DB pointers */
4268 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4269 addReply(c
,shared
.outofrangeerr
);
4273 selectDb(c
,srcid
); /* Back to the source DB */
4275 /* If the user is moving using as target the same
4276 * DB as the source DB it is probably an error. */
4278 addReply(c
,shared
.sameobjecterr
);
4282 /* Check if the element exists and get a reference */
4283 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4285 addReply(c
,shared
.czero
);
4289 /* Try to add the element to the target DB */
4290 deleteIfVolatile(dst
,c
->argv
[1]);
4291 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4292 addReply(c
,shared
.czero
);
4295 incrRefCount(c
->argv
[1]);
4298 /* OK! key moved, free the entry in the source DB */
4299 deleteKey(src
,c
->argv
[1]);
4301 addReply(c
,shared
.cone
);
4304 /* =================================== Lists ================================ */
4305 static void pushGenericCommand(redisClient
*c
, int where
) {
4309 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4311 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4312 addReply(c
,shared
.cone
);
4315 lobj
= createListObject();
4317 if (where
== REDIS_HEAD
) {
4318 listAddNodeHead(list
,c
->argv
[2]);
4320 listAddNodeTail(list
,c
->argv
[2]);
4322 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4323 incrRefCount(c
->argv
[1]);
4324 incrRefCount(c
->argv
[2]);
4326 if (lobj
->type
!= REDIS_LIST
) {
4327 addReply(c
,shared
.wrongtypeerr
);
4330 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4331 addReply(c
,shared
.cone
);
4335 if (where
== REDIS_HEAD
) {
4336 listAddNodeHead(list
,c
->argv
[2]);
4338 listAddNodeTail(list
,c
->argv
[2]);
4340 incrRefCount(c
->argv
[2]);
4343 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4346 static void lpushCommand(redisClient
*c
) {
4347 pushGenericCommand(c
,REDIS_HEAD
);
4350 static void rpushCommand(redisClient
*c
) {
4351 pushGenericCommand(c
,REDIS_TAIL
);
4354 static void llenCommand(redisClient
*c
) {
4358 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4359 checkType(c
,o
,REDIS_LIST
)) return;
4362 addReplyUlong(c
,listLength(l
));
4365 static void lindexCommand(redisClient
*c
) {
4367 int index
= atoi(c
->argv
[2]->ptr
);
4371 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4372 checkType(c
,o
,REDIS_LIST
)) return;
4375 ln
= listIndex(list
, index
);
4377 addReply(c
,shared
.nullbulk
);
4379 robj
*ele
= listNodeValue(ln
);
4380 addReplyBulk(c
,ele
);
4384 static void lsetCommand(redisClient
*c
) {
4386 int index
= atoi(c
->argv
[2]->ptr
);
4390 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4391 checkType(c
,o
,REDIS_LIST
)) return;
4394 ln
= listIndex(list
, index
);
4396 addReply(c
,shared
.outofrangeerr
);
4398 robj
*ele
= listNodeValue(ln
);
4401 listNodeValue(ln
) = c
->argv
[3];
4402 incrRefCount(c
->argv
[3]);
4403 addReply(c
,shared
.ok
);
4408 static void popGenericCommand(redisClient
*c
, int where
) {
4413 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4414 checkType(c
,o
,REDIS_LIST
)) return;
4417 if (where
== REDIS_HEAD
)
4418 ln
= listFirst(list
);
4420 ln
= listLast(list
);
4423 addReply(c
,shared
.nullbulk
);
4425 robj
*ele
= listNodeValue(ln
);
4426 addReplyBulk(c
,ele
);
4427 listDelNode(list
,ln
);
4428 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4433 static void lpopCommand(redisClient
*c
) {
4434 popGenericCommand(c
,REDIS_HEAD
);
4437 static void rpopCommand(redisClient
*c
) {
4438 popGenericCommand(c
,REDIS_TAIL
);
4441 static void lrangeCommand(redisClient
*c
) {
4443 int start
= atoi(c
->argv
[2]->ptr
);
4444 int end
= atoi(c
->argv
[3]->ptr
);
4451 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4452 checkType(c
,o
,REDIS_LIST
)) return;
4454 llen
= listLength(list
);
4456 /* convert negative indexes */
4457 if (start
< 0) start
= llen
+start
;
4458 if (end
< 0) end
= llen
+end
;
4459 if (start
< 0) start
= 0;
4460 if (end
< 0) end
= 0;
4462 /* indexes sanity checks */
4463 if (start
> end
|| start
>= llen
) {
4464 /* Out of range start or start > end result in empty list */
4465 addReply(c
,shared
.emptymultibulk
);
4468 if (end
>= llen
) end
= llen
-1;
4469 rangelen
= (end
-start
)+1;
4471 /* Return the result in form of a multi-bulk reply */
4472 ln
= listIndex(list
, start
);
4473 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4474 for (j
= 0; j
< rangelen
; j
++) {
4475 ele
= listNodeValue(ln
);
4476 addReplyBulk(c
,ele
);
4481 static void ltrimCommand(redisClient
*c
) {
4483 int start
= atoi(c
->argv
[2]->ptr
);
4484 int end
= atoi(c
->argv
[3]->ptr
);
4486 int j
, ltrim
, rtrim
;
4490 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4491 checkType(c
,o
,REDIS_LIST
)) return;
4493 llen
= listLength(list
);
4495 /* convert negative indexes */
4496 if (start
< 0) start
= llen
+start
;
4497 if (end
< 0) end
= llen
+end
;
4498 if (start
< 0) start
= 0;
4499 if (end
< 0) end
= 0;
4501 /* indexes sanity checks */
4502 if (start
> end
|| start
>= llen
) {
4503 /* Out of range start or start > end result in empty list */
4507 if (end
>= llen
) end
= llen
-1;
4512 /* Remove list elements to perform the trim */
4513 for (j
= 0; j
< ltrim
; j
++) {
4514 ln
= listFirst(list
);
4515 listDelNode(list
,ln
);
4517 for (j
= 0; j
< rtrim
; j
++) {
4518 ln
= listLast(list
);
4519 listDelNode(list
,ln
);
4521 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4523 addReply(c
,shared
.ok
);
4526 static void lremCommand(redisClient
*c
) {
4529 listNode
*ln
, *next
;
4530 int toremove
= atoi(c
->argv
[2]->ptr
);
4534 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4535 checkType(c
,o
,REDIS_LIST
)) return;
4539 toremove
= -toremove
;
4542 ln
= fromtail
? list
->tail
: list
->head
;
4544 robj
*ele
= listNodeValue(ln
);
4546 next
= fromtail
? ln
->prev
: ln
->next
;
4547 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4548 listDelNode(list
,ln
);
4551 if (toremove
&& removed
== toremove
) break;
4555 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4556 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4559 /* This is the semantic of this command:
4560 * RPOPLPUSH srclist dstlist:
4561 * IF LLEN(srclist) > 0
4562 * element = RPOP srclist
4563 * LPUSH dstlist element
4570 * The idea is to be able to get an element from a list in a reliable way
4571 * since the element is not just returned but pushed against another list
4572 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4574 static void rpoplpushcommand(redisClient
*c
) {
4579 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4580 checkType(c
,sobj
,REDIS_LIST
)) return;
4581 srclist
= sobj
->ptr
;
4582 ln
= listLast(srclist
);
4585 addReply(c
,shared
.nullbulk
);
4587 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4588 robj
*ele
= listNodeValue(ln
);
4591 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4592 addReply(c
,shared
.wrongtypeerr
);
4596 /* Add the element to the target list (unless it's directly
4597 * passed to some BLPOP-ing client */
4598 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4600 /* Create the list if the key does not exist */
4601 dobj
= createListObject();
4602 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4603 incrRefCount(c
->argv
[2]);
4605 dstlist
= dobj
->ptr
;
4606 listAddNodeHead(dstlist
,ele
);
4610 /* Send the element to the client as reply as well */
4611 addReplyBulk(c
,ele
);
4613 /* Finally remove the element from the source list */
4614 listDelNode(srclist
,ln
);
4615 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4620 /* ==================================== Sets ================================ */
4622 static void saddCommand(redisClient
*c
) {
4625 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4627 set
= createSetObject();
4628 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4629 incrRefCount(c
->argv
[1]);
4631 if (set
->type
!= REDIS_SET
) {
4632 addReply(c
,shared
.wrongtypeerr
);
4636 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4637 incrRefCount(c
->argv
[2]);
4639 addReply(c
,shared
.cone
);
4641 addReply(c
,shared
.czero
);
4645 static void sremCommand(redisClient
*c
) {
4648 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4649 checkType(c
,set
,REDIS_SET
)) return;
4651 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4653 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4654 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4655 addReply(c
,shared
.cone
);
4657 addReply(c
,shared
.czero
);
4661 static void smoveCommand(redisClient
*c
) {
4662 robj
*srcset
, *dstset
;
4664 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4665 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4667 /* If the source key does not exist return 0, if it's of the wrong type
4669 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4670 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4673 /* Error if the destination key is not a set as well */
4674 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4675 addReply(c
,shared
.wrongtypeerr
);
4678 /* Remove the element from the source set */
4679 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4680 /* Key not found in the src set! return zero */
4681 addReply(c
,shared
.czero
);
4684 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4685 deleteKey(c
->db
,c
->argv
[1]);
4687 /* Add the element to the destination set */
4689 dstset
= createSetObject();
4690 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4691 incrRefCount(c
->argv
[2]);
4693 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4694 incrRefCount(c
->argv
[3]);
4695 addReply(c
,shared
.cone
);
4698 static void sismemberCommand(redisClient
*c
) {
4701 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4702 checkType(c
,set
,REDIS_SET
)) return;
4704 if (dictFind(set
->ptr
,c
->argv
[2]))
4705 addReply(c
,shared
.cone
);
4707 addReply(c
,shared
.czero
);
4710 static void scardCommand(redisClient
*c
) {
4714 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4715 checkType(c
,o
,REDIS_SET
)) return;
4718 addReplyUlong(c
,dictSize(s
));
4721 static void spopCommand(redisClient
*c
) {
4725 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4726 checkType(c
,set
,REDIS_SET
)) return;
4728 de
= dictGetRandomKey(set
->ptr
);
4730 addReply(c
,shared
.nullbulk
);
4732 robj
*ele
= dictGetEntryKey(de
);
4734 addReplyBulk(c
,ele
);
4735 dictDelete(set
->ptr
,ele
);
4736 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4737 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4742 static void srandmemberCommand(redisClient
*c
) {
4746 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4747 checkType(c
,set
,REDIS_SET
)) return;
4749 de
= dictGetRandomKey(set
->ptr
);
4751 addReply(c
,shared
.nullbulk
);
4753 robj
*ele
= dictGetEntryKey(de
);
4755 addReplyBulk(c
,ele
);
4759 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4760 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4762 return dictSize(*d1
)-dictSize(*d2
);
4765 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4766 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4769 robj
*lenobj
= NULL
, *dstset
= NULL
;
4770 unsigned long j
, cardinality
= 0;
4772 for (j
= 0; j
< setsnum
; j
++) {
4776 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4777 lookupKeyRead(c
->db
,setskeys
[j
]);
4781 if (deleteKey(c
->db
,dstkey
))
4783 addReply(c
,shared
.czero
);
4785 addReply(c
,shared
.nullmultibulk
);
4789 if (setobj
->type
!= REDIS_SET
) {
4791 addReply(c
,shared
.wrongtypeerr
);
4794 dv
[j
] = setobj
->ptr
;
4796 /* Sort sets from the smallest to largest, this will improve our
4797 * algorithm's performace */
4798 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4800 /* The first thing we should output is the total number of elements...
4801 * since this is a multi-bulk write, but at this stage we don't know
4802 * the intersection set size, so we use a trick, append an empty object
4803 * to the output list and save the pointer to later modify it with the
4806 lenobj
= createObject(REDIS_STRING
,NULL
);
4808 decrRefCount(lenobj
);
4810 /* If we have a target key where to store the resulting set
4811 * create this key with an empty set inside */
4812 dstset
= createSetObject();
4815 /* Iterate all the elements of the first (smallest) set, and test
4816 * the element against all the other sets, if at least one set does
4817 * not include the element it is discarded */
4818 di
= dictGetIterator(dv
[0]);
4820 while((de
= dictNext(di
)) != NULL
) {
4823 for (j
= 1; j
< setsnum
; j
++)
4824 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4826 continue; /* at least one set does not contain the member */
4827 ele
= dictGetEntryKey(de
);
4829 addReplyBulk(c
,ele
);
4832 dictAdd(dstset
->ptr
,ele
,NULL
);
4836 dictReleaseIterator(di
);
4839 /* Store the resulting set into the target, if the intersection
4840 * is not an empty set. */
4841 deleteKey(c
->db
,dstkey
);
4842 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4843 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4844 incrRefCount(dstkey
);
4845 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4847 decrRefCount(dstset
);
4848 addReply(c
,shared
.czero
);
4852 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4857 static void sinterCommand(redisClient
*c
) {
4858 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4861 static void sinterstoreCommand(redisClient
*c
) {
4862 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4865 #define REDIS_OP_UNION 0
4866 #define REDIS_OP_DIFF 1
4867 #define REDIS_OP_INTER 2
4869 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4870 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4873 robj
*dstset
= NULL
;
4874 int j
, cardinality
= 0;
4876 for (j
= 0; j
< setsnum
; j
++) {
4880 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4881 lookupKeyRead(c
->db
,setskeys
[j
]);
4886 if (setobj
->type
!= REDIS_SET
) {
4888 addReply(c
,shared
.wrongtypeerr
);
4891 dv
[j
] = setobj
->ptr
;
4894 /* We need a temp set object to store our union. If the dstkey
4895 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4896 * this set object will be the resulting object to set into the target key*/
4897 dstset
= createSetObject();
4899 /* Iterate all the elements of all the sets, add every element a single
4900 * time to the result set */
4901 for (j
= 0; j
< setsnum
; j
++) {
4902 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4903 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4905 di
= dictGetIterator(dv
[j
]);
4907 while((de
= dictNext(di
)) != NULL
) {
4910 /* dictAdd will not add the same element multiple times */
4911 ele
= dictGetEntryKey(de
);
4912 if (op
== REDIS_OP_UNION
|| j
== 0) {
4913 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4917 } else if (op
== REDIS_OP_DIFF
) {
4918 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4923 dictReleaseIterator(di
);
4925 /* result set is empty? Exit asap. */
4926 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4929 /* Output the content of the resulting set, if not in STORE mode */
4931 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4932 di
= dictGetIterator(dstset
->ptr
);
4933 while((de
= dictNext(di
)) != NULL
) {
4936 ele
= dictGetEntryKey(de
);
4937 addReplyBulk(c
,ele
);
4939 dictReleaseIterator(di
);
4940 decrRefCount(dstset
);
4942 /* If we have a target key where to store the resulting set
4943 * create this key with the result set inside */
4944 deleteKey(c
->db
,dstkey
);
4945 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4946 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4947 incrRefCount(dstkey
);
4948 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4950 decrRefCount(dstset
);
4951 addReply(c
,shared
.czero
);
4958 static void sunionCommand(redisClient
*c
) {
4959 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4962 static void sunionstoreCommand(redisClient
*c
) {
4963 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4966 static void sdiffCommand(redisClient
*c
) {
4967 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4970 static void sdiffstoreCommand(redisClient
*c
) {
4971 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4974 /* ==================================== ZSets =============================== */
4976 /* ZSETs are ordered sets using two data structures to hold the same elements
4977 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4980 * The elements are added to an hash table mapping Redis objects to scores.
4981 * At the same time the elements are added to a skip list mapping scores
4982 * to Redis objects (so objects are sorted by scores in this "view"). */
4984 /* This skiplist implementation is almost a C translation of the original
4985 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4986 * Alternative to Balanced Trees", modified in three ways:
4987 * a) this implementation allows for repeated values.
4988 * b) the comparison is not just by key (our 'score') but by satellite data.
4989 * c) there is a back pointer, so it's a doubly linked list with the back
4990 * pointers being only at "level 1". This allows to traverse the list
4991 * from tail to head, useful for ZREVRANGE. */
4993 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4994 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4996 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4998 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5004 static zskiplist
*zslCreate(void) {
5008 zsl
= zmalloc(sizeof(*zsl
));
5011 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5012 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5013 zsl
->header
->forward
[j
] = NULL
;
5015 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5016 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5017 zsl
->header
->span
[j
] = 0;
5019 zsl
->header
->backward
= NULL
;
5024 static void zslFreeNode(zskiplistNode
*node
) {
5025 decrRefCount(node
->obj
);
5026 zfree(node
->forward
);
5031 static void zslFree(zskiplist
*zsl
) {
5032 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5034 zfree(zsl
->header
->forward
);
5035 zfree(zsl
->header
->span
);
5038 next
= node
->forward
[0];
5045 static int zslRandomLevel(void) {
5047 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5052 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5053 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5054 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5058 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5059 /* store rank that is crossed to reach the insert position */
5060 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5062 while (x
->forward
[i
] &&
5063 (x
->forward
[i
]->score
< score
||
5064 (x
->forward
[i
]->score
== score
&&
5065 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5066 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5071 /* we assume the key is not already inside, since we allow duplicated
5072 * scores, and the re-insertion of score and redis object should never
5073 * happpen since the caller of zslInsert() should test in the hash table
5074 * if the element is already inside or not. */
5075 level
= zslRandomLevel();
5076 if (level
> zsl
->level
) {
5077 for (i
= zsl
->level
; i
< level
; i
++) {
5079 update
[i
] = zsl
->header
;
5080 update
[i
]->span
[i
-1] = zsl
->length
;
5084 x
= zslCreateNode(level
,score
,obj
);
5085 for (i
= 0; i
< level
; i
++) {
5086 x
->forward
[i
] = update
[i
]->forward
[i
];
5087 update
[i
]->forward
[i
] = x
;
5089 /* update span covered by update[i] as x is inserted here */
5091 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5092 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5096 /* increment span for untouched levels */
5097 for (i
= level
; i
< zsl
->level
; i
++) {
5098 update
[i
]->span
[i
-1]++;
5101 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5103 x
->forward
[0]->backward
= x
;
5109 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5110 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5112 for (i
= 0; i
< zsl
->level
; i
++) {
5113 if (update
[i
]->forward
[i
] == x
) {
5115 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5117 update
[i
]->forward
[i
] = x
->forward
[i
];
5119 /* invariant: i > 0, because update[0]->forward[0]
5120 * is always equal to x */
5121 update
[i
]->span
[i
-1] -= 1;
5124 if (x
->forward
[0]) {
5125 x
->forward
[0]->backward
= x
->backward
;
5127 zsl
->tail
= x
->backward
;
5129 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5134 /* Delete an element with matching score/object from the skiplist. */
5135 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5136 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5140 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5141 while (x
->forward
[i
] &&
5142 (x
->forward
[i
]->score
< score
||
5143 (x
->forward
[i
]->score
== score
&&
5144 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5148 /* We may have multiple elements with the same score, what we need
5149 * is to find the element with both the right score and object. */
5151 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5152 zslDeleteNode(zsl
, x
, update
);
5156 return 0; /* not found */
5158 return 0; /* not found */
5161 /* Delete all the elements with score between min and max from the skiplist.
5162 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5163 * Note that this function takes the reference to the hash table view of the
5164 * sorted set, in order to remove the elements from the hash table too. */
5165 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5166 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5167 unsigned long removed
= 0;
5171 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5172 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5176 /* We may have multiple elements with the same score, what we need
5177 * is to find the element with both the right score and object. */
5179 while (x
&& x
->score
<= max
) {
5180 zskiplistNode
*next
= x
->forward
[0];
5181 zslDeleteNode(zsl
, x
, update
);
5182 dictDelete(dict
,x
->obj
);
5187 return removed
; /* not found */
5190 /* Delete all the elements with rank between start and end from the skiplist.
5191 * Start and end are inclusive. Note that start and end need to be 1-based */
5192 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5193 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5194 unsigned long traversed
= 0, removed
= 0;
5198 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5199 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5200 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5208 while (x
&& traversed
<= end
) {
5209 zskiplistNode
*next
= x
->forward
[0];
5210 zslDeleteNode(zsl
, x
, update
);
5211 dictDelete(dict
,x
->obj
);
5220 /* Find the first node having a score equal or greater than the specified one.
5221 * Returns NULL if there is no match. */
5222 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5227 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5228 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5231 /* We may have multiple elements with the same score, what we need
5232 * is to find the element with both the right score and object. */
5233 return x
->forward
[0];
5236 /* Find the rank for an element by both score and key.
5237 * Returns 0 when the element cannot be found, rank otherwise.
5238 * Note that the rank is 1-based due to the span of zsl->header to the
5240 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5242 unsigned long rank
= 0;
5246 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5247 while (x
->forward
[i
] &&
5248 (x
->forward
[i
]->score
< score
||
5249 (x
->forward
[i
]->score
== score
&&
5250 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5251 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5255 /* x might be equal to zsl->header, so test if obj is non-NULL */
5256 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5263 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5264 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5266 unsigned long traversed
= 0;
5270 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5271 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5273 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5276 if (traversed
== rank
) {
5283 /* The actual Z-commands implementations */
5285 /* This generic command implements both ZADD and ZINCRBY.
5286 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5287 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5288 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5293 zsetobj
= lookupKeyWrite(c
->db
,key
);
5294 if (zsetobj
== NULL
) {
5295 zsetobj
= createZsetObject();
5296 dictAdd(c
->db
->dict
,key
,zsetobj
);
5299 if (zsetobj
->type
!= REDIS_ZSET
) {
5300 addReply(c
,shared
.wrongtypeerr
);
5306 /* Ok now since we implement both ZADD and ZINCRBY here the code
5307 * needs to handle the two different conditions. It's all about setting
5308 * '*score', that is, the new score to set, to the right value. */
5309 score
= zmalloc(sizeof(double));
5313 /* Read the old score. If the element was not present starts from 0 */
5314 de
= dictFind(zs
->dict
,ele
);
5316 double *oldscore
= dictGetEntryVal(de
);
5317 *score
= *oldscore
+ scoreval
;
5325 /* What follows is a simple remove and re-insert operation that is common
5326 * to both ZADD and ZINCRBY... */
5327 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5328 /* case 1: New element */
5329 incrRefCount(ele
); /* added to hash */
5330 zslInsert(zs
->zsl
,*score
,ele
);
5331 incrRefCount(ele
); /* added to skiplist */
5334 addReplyDouble(c
,*score
);
5336 addReply(c
,shared
.cone
);
5341 /* case 2: Score update operation */
5342 de
= dictFind(zs
->dict
,ele
);
5343 redisAssert(de
!= NULL
);
5344 oldscore
= dictGetEntryVal(de
);
5345 if (*score
!= *oldscore
) {
5348 /* Remove and insert the element in the skip list with new score */
5349 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5350 redisAssert(deleted
!= 0);
5351 zslInsert(zs
->zsl
,*score
,ele
);
5353 /* Update the score in the hash table */
5354 dictReplace(zs
->dict
,ele
,score
);
5360 addReplyDouble(c
,*score
);
5362 addReply(c
,shared
.czero
);
5366 static void zaddCommand(redisClient
*c
) {
5369 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5370 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5373 static void zincrbyCommand(redisClient
*c
) {
5376 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5377 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5380 static void zremCommand(redisClient
*c
) {
5387 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5388 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5391 de
= dictFind(zs
->dict
,c
->argv
[2]);
5393 addReply(c
,shared
.czero
);
5396 /* Delete from the skiplist */
5397 oldscore
= dictGetEntryVal(de
);
5398 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5399 redisAssert(deleted
!= 0);
5401 /* Delete from the hash table */
5402 dictDelete(zs
->dict
,c
->argv
[2]);
5403 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5404 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5406 addReply(c
,shared
.cone
);
5409 static void zremrangebyscoreCommand(redisClient
*c
) {
5410 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5411 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5416 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5417 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5420 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5421 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5422 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5423 server
.dirty
+= deleted
;
5424 addReplyLong(c
,deleted
);
5427 static void zremrangebyrankCommand(redisClient
*c
) {
5428 int start
= atoi(c
->argv
[2]->ptr
);
5429 int end
= atoi(c
->argv
[3]->ptr
);
5435 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5436 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5438 llen
= zs
->zsl
->length
;
5440 /* convert negative indexes */
5441 if (start
< 0) start
= llen
+start
;
5442 if (end
< 0) end
= llen
+end
;
5443 if (start
< 0) start
= 0;
5444 if (end
< 0) end
= 0;
5446 /* indexes sanity checks */
5447 if (start
> end
|| start
>= llen
) {
5448 addReply(c
,shared
.czero
);
5451 if (end
>= llen
) end
= llen
-1;
5453 /* increment start and end because zsl*Rank functions
5454 * use 1-based rank */
5455 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5456 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5457 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5458 server
.dirty
+= deleted
;
5459 addReplyLong(c
, deleted
);
5467 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5468 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5469 unsigned long size1
, size2
;
5470 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5471 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5472 return size1
- size2
;
5475 #define REDIS_AGGR_SUM 1
5476 #define REDIS_AGGR_MIN 2
5477 #define REDIS_AGGR_MAX 3
5479 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5480 if (aggregate
== REDIS_AGGR_SUM
) {
5481 *target
= *target
+ val
;
5482 } else if (aggregate
== REDIS_AGGR_MIN
) {
5483 *target
= val
< *target
? val
: *target
;
5484 } else if (aggregate
== REDIS_AGGR_MAX
) {
5485 *target
= val
> *target
? val
: *target
;
5488 redisAssert(0 != 0);
5492 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5494 int aggregate
= REDIS_AGGR_SUM
;
5501 /* expect zsetnum input keys to be given */
5502 zsetnum
= atoi(c
->argv
[2]->ptr
);
5504 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5508 /* test if the expected number of keys would overflow */
5509 if (3+zsetnum
> c
->argc
) {
5510 addReply(c
,shared
.syntaxerr
);
5514 /* read keys to be used for input */
5515 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5516 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5517 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5521 if (zsetobj
->type
!= REDIS_ZSET
) {
5523 addReply(c
,shared
.wrongtypeerr
);
5526 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5529 /* default all weights to 1 */
5530 src
[i
].weight
= 1.0;
5533 /* parse optional extra arguments */
5535 int remaining
= c
->argc
- j
;
5538 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5540 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5541 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5543 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5545 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5546 aggregate
= REDIS_AGGR_SUM
;
5547 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5548 aggregate
= REDIS_AGGR_MIN
;
5549 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5550 aggregate
= REDIS_AGGR_MAX
;
5553 addReply(c
,shared
.syntaxerr
);
5559 addReply(c
,shared
.syntaxerr
);
5565 /* sort sets from the smallest to largest, this will improve our
5566 * algorithm's performance */
5567 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5569 dstobj
= createZsetObject();
5570 dstzset
= dstobj
->ptr
;
5572 if (op
== REDIS_OP_INTER
) {
5573 /* skip going over all entries if the smallest zset is NULL or empty */
5574 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5575 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5576 * from small to large, all src[i > 0].dict are non-empty too */
5577 di
= dictGetIterator(src
[0].dict
);
5578 while((de
= dictNext(di
)) != NULL
) {
5579 double *score
= zmalloc(sizeof(double)), value
;
5580 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5582 for (j
= 1; j
< zsetnum
; j
++) {
5583 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5585 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5586 zunionInterAggregate(score
, value
, aggregate
);
5592 /* skip entry when not present in every source dict */
5596 robj
*o
= dictGetEntryKey(de
);
5597 dictAdd(dstzset
->dict
,o
,score
);
5598 incrRefCount(o
); /* added to dictionary */
5599 zslInsert(dstzset
->zsl
,*score
,o
);
5600 incrRefCount(o
); /* added to skiplist */
5603 dictReleaseIterator(di
);
5605 } else if (op
== REDIS_OP_UNION
) {
5606 for (i
= 0; i
< zsetnum
; i
++) {
5607 if (!src
[i
].dict
) continue;
5609 di
= dictGetIterator(src
[i
].dict
);
5610 while((de
= dictNext(di
)) != NULL
) {
5611 /* skip key when already processed */
5612 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5614 double *score
= zmalloc(sizeof(double)), value
;
5615 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5617 /* because the zsets are sorted by size, its only possible
5618 * for sets at larger indices to hold this entry */
5619 for (j
= (i
+1); j
< zsetnum
; j
++) {
5620 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5622 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5623 zunionInterAggregate(score
, value
, aggregate
);
5627 robj
*o
= dictGetEntryKey(de
);
5628 dictAdd(dstzset
->dict
,o
,score
);
5629 incrRefCount(o
); /* added to dictionary */
5630 zslInsert(dstzset
->zsl
,*score
,o
);
5631 incrRefCount(o
); /* added to skiplist */
5633 dictReleaseIterator(di
);
5636 /* unknown operator */
5637 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5640 deleteKey(c
->db
,dstkey
);
5641 if (dstzset
->zsl
->length
) {
5642 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5643 incrRefCount(dstkey
);
5644 addReplyLong(c
, dstzset
->zsl
->length
);
5647 decrRefCount(dstzset
);
5648 addReply(c
, shared
.czero
);
5653 static void zunionCommand(redisClient
*c
) {
5654 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5657 static void zinterCommand(redisClient
*c
) {
5658 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5661 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5663 int start
= atoi(c
->argv
[2]->ptr
);
5664 int end
= atoi(c
->argv
[3]->ptr
);
5673 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5675 } else if (c
->argc
>= 5) {
5676 addReply(c
,shared
.syntaxerr
);
5680 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5681 checkType(c
,o
,REDIS_ZSET
)) return;
5686 /* convert negative indexes */
5687 if (start
< 0) start
= llen
+start
;
5688 if (end
< 0) end
= llen
+end
;
5689 if (start
< 0) start
= 0;
5690 if (end
< 0) end
= 0;
5692 /* indexes sanity checks */
5693 if (start
> end
|| start
>= llen
) {
5694 /* Out of range start or start > end result in empty list */
5695 addReply(c
,shared
.emptymultibulk
);
5698 if (end
>= llen
) end
= llen
-1;
5699 rangelen
= (end
-start
)+1;
5701 /* check if starting point is trivial, before searching
5702 * the element in log(N) time */
5704 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5707 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5710 /* Return the result in form of a multi-bulk reply */
5711 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5712 withscores
? (rangelen
*2) : rangelen
));
5713 for (j
= 0; j
< rangelen
; j
++) {
5715 addReplyBulk(c
,ele
);
5717 addReplyDouble(c
,ln
->score
);
5718 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5722 static void zrangeCommand(redisClient
*c
) {
5723 zrangeGenericCommand(c
,0);
5726 static void zrevrangeCommand(redisClient
*c
) {
5727 zrangeGenericCommand(c
,1);
5730 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5731 * If justcount is non-zero, just the count is returned. */
5732 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5735 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5736 int offset
= 0, limit
= -1;
5740 /* Parse the min-max interval. If one of the values is prefixed
5741 * by the "(" character, it's considered "open". For instance
5742 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5743 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5744 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5745 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5748 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5750 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5751 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5754 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5757 /* Parse "WITHSCORES": note that if the command was called with
5758 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5759 * enter the following paths to parse WITHSCORES and LIMIT. */
5760 if (c
->argc
== 5 || c
->argc
== 8) {
5761 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5766 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5770 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5775 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5776 addReply(c
,shared
.syntaxerr
);
5778 } else if (c
->argc
== (7 + withscores
)) {
5779 offset
= atoi(c
->argv
[5]->ptr
);
5780 limit
= atoi(c
->argv
[6]->ptr
);
5781 if (offset
< 0) offset
= 0;
5784 /* Ok, lookup the key and get the range */
5785 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5787 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5789 if (o
->type
!= REDIS_ZSET
) {
5790 addReply(c
,shared
.wrongtypeerr
);
5792 zset
*zsetobj
= o
->ptr
;
5793 zskiplist
*zsl
= zsetobj
->zsl
;
5795 robj
*ele
, *lenobj
= NULL
;
5796 unsigned long rangelen
= 0;
5798 /* Get the first node with the score >= min, or with
5799 * score > min if 'minex' is true. */
5800 ln
= zslFirstWithScore(zsl
,min
);
5801 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5804 /* No element matching the speciifed interval */
5805 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5809 /* We don't know in advance how many matching elements there
5810 * are in the list, so we push this object that will represent
5811 * the multi-bulk length in the output buffer, and will "fix"
5814 lenobj
= createObject(REDIS_STRING
,NULL
);
5816 decrRefCount(lenobj
);
5819 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5822 ln
= ln
->forward
[0];
5825 if (limit
== 0) break;
5828 addReplyBulk(c
,ele
);
5830 addReplyDouble(c
,ln
->score
);
5832 ln
= ln
->forward
[0];
5834 if (limit
> 0) limit
--;
5837 addReplyLong(c
,(long)rangelen
);
5839 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5840 withscores
? (rangelen
*2) : rangelen
);
5846 static void zrangebyscoreCommand(redisClient
*c
) {
5847 genericZrangebyscoreCommand(c
,0);
5850 static void zcountCommand(redisClient
*c
) {
5851 genericZrangebyscoreCommand(c
,1);
5854 static void zcardCommand(redisClient
*c
) {
5858 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5859 checkType(c
,o
,REDIS_ZSET
)) return;
5862 addReplyUlong(c
,zs
->zsl
->length
);
5865 static void zscoreCommand(redisClient
*c
) {
5870 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5871 checkType(c
,o
,REDIS_ZSET
)) return;
5874 de
= dictFind(zs
->dict
,c
->argv
[2]);
5876 addReply(c
,shared
.nullbulk
);
5878 double *score
= dictGetEntryVal(de
);
5880 addReplyDouble(c
,*score
);
5884 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5892 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5893 checkType(c
,o
,REDIS_ZSET
)) return;
5897 de
= dictFind(zs
->dict
,c
->argv
[2]);
5899 addReply(c
,shared
.nullbulk
);
5903 score
= dictGetEntryVal(de
);
5904 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5907 addReplyLong(c
, zsl
->length
- rank
);
5909 addReplyLong(c
, rank
-1);
5912 addReply(c
,shared
.nullbulk
);
5916 static void zrankCommand(redisClient
*c
) {
5917 zrankGenericCommand(c
, 0);
5920 static void zrevrankCommand(redisClient
*c
) {
5921 zrankGenericCommand(c
, 1);
5924 /* =================================== Hashes =============================== */
5925 static void hsetCommand(redisClient
*c
) {
5927 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5930 o
= createHashObject();
5931 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5932 incrRefCount(c
->argv
[1]);
5934 if (o
->type
!= REDIS_HASH
) {
5935 addReply(c
,shared
.wrongtypeerr
);
5939 /* We want to convert the zipmap into an hash table right now if the
5940 * entry to be added is too big. Note that we check if the object
5941 * is integer encoded before to try fetching the length in the test below.
5942 * This is because integers are small, but currently stringObjectLen()
5943 * performs a slow conversion: not worth it. */
5944 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5945 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5946 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5947 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5948 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5950 convertToRealHash(o
);
5953 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5954 unsigned char *zm
= o
->ptr
;
5955 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5957 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5958 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5959 decrRefCount(valobj
);
5962 /* And here there is the second check for hash conversion...
5963 * we want to do it only if the operation was not just an update as
5964 * zipmapLen() is O(N). */
5965 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5966 convertToRealHash(o
);
5968 tryObjectEncoding(c
->argv
[2]);
5969 /* note that c->argv[3] is already encoded, as the latest arg
5970 * of a bulk command is always integer encoded if possible. */
5971 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5972 incrRefCount(c
->argv
[2]);
5976 incrRefCount(c
->argv
[3]);
5979 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5982 static void hincrbyCommand(redisClient
*c
) {
5984 long long value
= 0, incr
= 0;
5985 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5988 o
= createHashObject();
5989 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5990 incrRefCount(c
->argv
[1]);
5992 if (o
->type
!= REDIS_HASH
) {
5993 addReply(c
,shared
.wrongtypeerr
);
5998 robj
*o_incr
= getDecodedObject(c
->argv
[3]);
5999 incr
= strtoll(o_incr
->ptr
, NULL
, 10);
6000 decrRefCount(o_incr
);
6002 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6003 unsigned char *zm
= o
->ptr
;
6004 unsigned char *zval
;
6007 /* Find value if already present in hash */
6008 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6010 /* strtoll needs the char* to have a trailing \0, but
6011 * the zipmap doesn't include them. */
6012 sds szval
= sdsnewlen(zval
, zvlen
);
6013 value
= strtoll(szval
,NULL
,10);
6018 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6019 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6020 (unsigned char*)svalue
,sdslen(svalue
),&update
);
6024 /* Check if the zipmap needs to be converted
6025 * if this was not an update. */
6026 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6027 convertToRealHash(o
);
6032 /* Find value if already present in hash */
6033 de
= dictFind(o
->ptr
,c
->argv
[2]);
6035 hval
= dictGetEntryVal(de
);
6036 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6037 value
= strtoll(hval
->ptr
,NULL
,10);
6038 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6039 value
= (long)hval
->ptr
;
6041 redisAssert(1 != 1);
6045 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6046 tryObjectEncoding(hval
);
6047 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6048 incrRefCount(c
->argv
[2]);
6053 addReplyLong(c
, value
);
6056 static void hgetCommand(redisClient
*c
) {
6059 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6060 checkType(c
,o
,REDIS_HASH
)) return;
6062 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6063 unsigned char *zm
= o
->ptr
;
6068 field
= getDecodedObject(c
->argv
[2]);
6069 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6070 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6071 addReplySds(c
,sdsnewlen(val
,vlen
));
6072 addReply(c
,shared
.crlf
);
6073 decrRefCount(field
);
6076 addReply(c
,shared
.nullbulk
);
6077 decrRefCount(field
);
6081 struct dictEntry
*de
;
6083 de
= dictFind(o
->ptr
,c
->argv
[2]);
6085 addReply(c
,shared
.nullbulk
);
6087 robj
*e
= dictGetEntryVal(de
);
6094 static void hdelCommand(redisClient
*c
) {
6098 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6099 checkType(c
,o
,REDIS_HASH
)) return;
6101 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6102 robj
*field
= getDecodedObject(c
->argv
[2]);
6104 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6105 (unsigned char*) field
->ptr
,
6106 sdslen(field
->ptr
), &deleted
);
6107 decrRefCount(field
);
6108 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6109 deleteKey(c
->db
,c
->argv
[1]);
6111 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6112 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6113 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6115 if (deleted
) server
.dirty
++;
6116 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6119 static void hlenCommand(redisClient
*c
) {
6123 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6124 checkType(c
,o
,REDIS_HASH
)) return;
6126 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6127 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6128 addReplyUlong(c
,len
);
6131 #define REDIS_GETALL_KEYS 1
6132 #define REDIS_GETALL_VALS 2
6133 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6135 unsigned long count
= 0;
6137 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6138 || checkType(c
,o
,REDIS_HASH
)) return;
6140 lenobj
= createObject(REDIS_STRING
,NULL
);
6142 decrRefCount(lenobj
);
6144 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6145 unsigned char *p
= zipmapRewind(o
->ptr
);
6146 unsigned char *field
, *val
;
6147 unsigned int flen
, vlen
;
6149 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6152 if (flags
& REDIS_GETALL_KEYS
) {
6153 aux
= createStringObject((char*)field
,flen
);
6154 addReplyBulk(c
,aux
);
6158 if (flags
& REDIS_GETALL_VALS
) {
6159 aux
= createStringObject((char*)val
,vlen
);
6160 addReplyBulk(c
,aux
);
6166 dictIterator
*di
= dictGetIterator(o
->ptr
);
6169 while((de
= dictNext(di
)) != NULL
) {
6170 robj
*fieldobj
= dictGetEntryKey(de
);
6171 robj
*valobj
= dictGetEntryVal(de
);
6173 if (flags
& REDIS_GETALL_KEYS
) {
6174 addReplyBulk(c
,fieldobj
);
6177 if (flags
& REDIS_GETALL_VALS
) {
6178 addReplyBulk(c
,valobj
);
6182 dictReleaseIterator(di
);
6184 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6187 static void hkeysCommand(redisClient
*c
) {
6188 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6191 static void hvalsCommand(redisClient
*c
) {
6192 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6195 static void hgetallCommand(redisClient
*c
) {
6196 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6199 static void hexistsCommand(redisClient
*c
) {
6203 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6204 checkType(c
,o
,REDIS_HASH
)) return;
6206 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6208 unsigned char *zm
= o
->ptr
;
6210 field
= getDecodedObject(c
->argv
[2]);
6211 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6212 decrRefCount(field
);
6214 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6216 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6219 static void convertToRealHash(robj
*o
) {
6220 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6221 unsigned int klen
, vlen
;
6222 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6224 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6225 p
= zipmapRewind(zm
);
6226 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6227 robj
*keyobj
, *valobj
;
6229 keyobj
= createStringObject((char*)key
,klen
);
6230 valobj
= createStringObject((char*)val
,vlen
);
6231 tryObjectEncoding(keyobj
);
6232 tryObjectEncoding(valobj
);
6233 dictAdd(dict
,keyobj
,valobj
);
6235 o
->encoding
= REDIS_ENCODING_HT
;
6240 /* ========================= Non type-specific commands ==================== */
6242 static void flushdbCommand(redisClient
*c
) {
6243 server
.dirty
+= dictSize(c
->db
->dict
);
6244 dictEmpty(c
->db
->dict
);
6245 dictEmpty(c
->db
->expires
);
6246 addReply(c
,shared
.ok
);
6249 static void flushallCommand(redisClient
*c
) {
6250 server
.dirty
+= emptyDb();
6251 addReply(c
,shared
.ok
);
6252 if (server
.bgsavechildpid
!= -1) {
6253 kill(server
.bgsavechildpid
,SIGKILL
);
6254 rdbRemoveTempFile(server
.bgsavechildpid
);
6256 rdbSave(server
.dbfilename
);
6260 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6261 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6263 so
->pattern
= pattern
;
6267 /* Return the value associated to the key with a name obtained
6268 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6269 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6273 int prefixlen
, sublen
, postfixlen
;
6274 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6278 char buf
[REDIS_SORTKEY_MAX
+1];
6281 /* If the pattern is "#" return the substitution object itself in order
6282 * to implement the "SORT ... GET #" feature. */
6283 spat
= pattern
->ptr
;
6284 if (spat
[0] == '#' && spat
[1] == '\0') {
6288 /* The substitution object may be specially encoded. If so we create
6289 * a decoded object on the fly. Otherwise getDecodedObject will just
6290 * increment the ref count, that we'll decrement later. */
6291 subst
= getDecodedObject(subst
);
6294 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6295 p
= strchr(spat
,'*');
6297 decrRefCount(subst
);
6302 sublen
= sdslen(ssub
);
6303 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6304 memcpy(keyname
.buf
,spat
,prefixlen
);
6305 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6306 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6307 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6308 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6310 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6311 decrRefCount(subst
);
6313 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6314 return lookupKeyRead(db
,&keyobj
);
6317 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6318 * the additional parameter is not standard but a BSD-specific we have to
6319 * pass sorting parameters via the global 'server' structure */
6320 static int sortCompare(const void *s1
, const void *s2
) {
6321 const redisSortObject
*so1
= s1
, *so2
= s2
;
6324 if (!server
.sort_alpha
) {
6325 /* Numeric sorting. Here it's trivial as we precomputed scores */
6326 if (so1
->u
.score
> so2
->u
.score
) {
6328 } else if (so1
->u
.score
< so2
->u
.score
) {
6334 /* Alphanumeric sorting */
6335 if (server
.sort_bypattern
) {
6336 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6337 /* At least one compare object is NULL */
6338 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6340 else if (so1
->u
.cmpobj
== NULL
)
6345 /* We have both the objects, use strcoll */
6346 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6349 /* Compare elements directly */
6352 dec1
= getDecodedObject(so1
->obj
);
6353 dec2
= getDecodedObject(so2
->obj
);
6354 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6359 return server
.sort_desc
? -cmp
: cmp
;
6362 /* The SORT command is the most complex command in Redis. Warning: this code
6363 * is optimized for speed and a bit less for readability */
6364 static void sortCommand(redisClient
*c
) {
6367 int desc
= 0, alpha
= 0;
6368 int limit_start
= 0, limit_count
= -1, start
, end
;
6369 int j
, dontsort
= 0, vectorlen
;
6370 int getop
= 0; /* GET operation counter */
6371 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6372 redisSortObject
*vector
; /* Resulting vector to sort */
6374 /* Lookup the key to sort. It must be of the right types */
6375 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6376 if (sortval
== NULL
) {
6377 addReply(c
,shared
.nullmultibulk
);
6380 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6381 sortval
->type
!= REDIS_ZSET
)
6383 addReply(c
,shared
.wrongtypeerr
);
6387 /* Create a list of operations to perform for every sorted element.
6388 * Operations can be GET/DEL/INCR/DECR */
6389 operations
= listCreate();
6390 listSetFreeMethod(operations
,zfree
);
6393 /* Now we need to protect sortval incrementing its count, in the future
6394 * SORT may have options able to overwrite/delete keys during the sorting
6395 * and the sorted key itself may get destroied */
6396 incrRefCount(sortval
);
6398 /* The SORT command has an SQL-alike syntax, parse it */
6399 while(j
< c
->argc
) {
6400 int leftargs
= c
->argc
-j
-1;
6401 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6403 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6405 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6407 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6408 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6409 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6411 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6412 storekey
= c
->argv
[j
+1];
6414 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6415 sortby
= c
->argv
[j
+1];
6416 /* If the BY pattern does not contain '*', i.e. it is constant,
6417 * we don't need to sort nor to lookup the weight keys. */
6418 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6420 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6421 listAddNodeTail(operations
,createSortOperation(
6422 REDIS_SORT_GET
,c
->argv
[j
+1]));
6426 decrRefCount(sortval
);
6427 listRelease(operations
);
6428 addReply(c
,shared
.syntaxerr
);
6434 /* Load the sorting vector with all the objects to sort */
6435 switch(sortval
->type
) {
6436 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6437 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6438 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6439 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6441 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6444 if (sortval
->type
== REDIS_LIST
) {
6445 list
*list
= sortval
->ptr
;
6449 listRewind(list
,&li
);
6450 while((ln
= listNext(&li
))) {
6451 robj
*ele
= ln
->value
;
6452 vector
[j
].obj
= ele
;
6453 vector
[j
].u
.score
= 0;
6454 vector
[j
].u
.cmpobj
= NULL
;
6462 if (sortval
->type
== REDIS_SET
) {
6465 zset
*zs
= sortval
->ptr
;
6469 di
= dictGetIterator(set
);
6470 while((setele
= dictNext(di
)) != NULL
) {
6471 vector
[j
].obj
= dictGetEntryKey(setele
);
6472 vector
[j
].u
.score
= 0;
6473 vector
[j
].u
.cmpobj
= NULL
;
6476 dictReleaseIterator(di
);
6478 redisAssert(j
== vectorlen
);
6480 /* Now it's time to load the right scores in the sorting vector */
6481 if (dontsort
== 0) {
6482 for (j
= 0; j
< vectorlen
; j
++) {
6486 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6487 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6489 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6491 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6492 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6494 /* Don't need to decode the object if it's
6495 * integer-encoded (the only encoding supported) so
6496 * far. We can just cast it */
6497 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6498 vector
[j
].u
.score
= (long)byval
->ptr
;
6500 redisAssert(1 != 1);
6505 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6506 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6508 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6509 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6511 redisAssert(1 != 1);
6518 /* We are ready to sort the vector... perform a bit of sanity check
6519 * on the LIMIT option too. We'll use a partial version of quicksort. */
6520 start
= (limit_start
< 0) ? 0 : limit_start
;
6521 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6522 if (start
>= vectorlen
) {
6523 start
= vectorlen
-1;
6526 if (end
>= vectorlen
) end
= vectorlen
-1;
6528 if (dontsort
== 0) {
6529 server
.sort_desc
= desc
;
6530 server
.sort_alpha
= alpha
;
6531 server
.sort_bypattern
= sortby
? 1 : 0;
6532 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6533 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6535 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6538 /* Send command output to the output buffer, performing the specified
6539 * GET/DEL/INCR/DECR operations if any. */
6540 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6541 if (storekey
== NULL
) {
6542 /* STORE option not specified, sent the sorting result to client */
6543 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6544 for (j
= start
; j
<= end
; j
++) {
6548 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6549 listRewind(operations
,&li
);
6550 while((ln
= listNext(&li
))) {
6551 redisSortOperation
*sop
= ln
->value
;
6552 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6555 if (sop
->type
== REDIS_SORT_GET
) {
6556 if (!val
|| val
->type
!= REDIS_STRING
) {
6557 addReply(c
,shared
.nullbulk
);
6559 addReplyBulk(c
,val
);
6562 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6567 robj
*listObject
= createListObject();
6568 list
*listPtr
= (list
*) listObject
->ptr
;
6570 /* STORE option specified, set the sorting result as a List object */
6571 for (j
= start
; j
<= end
; j
++) {
6576 listAddNodeTail(listPtr
,vector
[j
].obj
);
6577 incrRefCount(vector
[j
].obj
);
6579 listRewind(operations
,&li
);
6580 while((ln
= listNext(&li
))) {
6581 redisSortOperation
*sop
= ln
->value
;
6582 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6585 if (sop
->type
== REDIS_SORT_GET
) {
6586 if (!val
|| val
->type
!= REDIS_STRING
) {
6587 listAddNodeTail(listPtr
,createStringObject("",0));
6589 listAddNodeTail(listPtr
,val
);
6593 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6597 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6598 incrRefCount(storekey
);
6600 /* Note: we add 1 because the DB is dirty anyway since even if the
6601 * SORT result is empty a new key is set and maybe the old content
6603 server
.dirty
+= 1+outputlen
;
6604 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6608 decrRefCount(sortval
);
6609 listRelease(operations
);
6610 for (j
= 0; j
< vectorlen
; j
++) {
6611 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6612 decrRefCount(vector
[j
].u
.cmpobj
);
6617 /* Convert an amount of bytes into a human readable string in the form
6618 * of 100B, 2G, 100M, 4K, and so forth. */
6619 static void bytesToHuman(char *s
, unsigned long long n
) {
6624 sprintf(s
,"%lluB",n
);
6626 } else if (n
< (1024*1024)) {
6627 d
= (double)n
/(1024);
6628 sprintf(s
,"%.2fK",d
);
6629 } else if (n
< (1024LL*1024*1024)) {
6630 d
= (double)n
/(1024*1024);
6631 sprintf(s
,"%.2fM",d
);
6632 } else if (n
< (1024LL*1024*1024*1024)) {
6633 d
= (double)n
/(1024LL*1024*1024);
6634 sprintf(s
,"%.2fG",d
);
6638 /* Create the string returned by the INFO command. This is decoupled
6639 * by the INFO command itself as we need to report the same information
6640 * on memory corruption problems. */
6641 static sds
genRedisInfoString(void) {
6643 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6647 bytesToHuman(hmem
,zmalloc_used_memory());
6648 info
= sdscatprintf(sdsempty(),
6649 "redis_version:%s\r\n"
6651 "multiplexing_api:%s\r\n"
6652 "process_id:%ld\r\n"
6653 "uptime_in_seconds:%ld\r\n"
6654 "uptime_in_days:%ld\r\n"
6655 "connected_clients:%d\r\n"
6656 "connected_slaves:%d\r\n"
6657 "blocked_clients:%d\r\n"
6658 "used_memory:%zu\r\n"
6659 "used_memory_human:%s\r\n"
6660 "changes_since_last_save:%lld\r\n"
6661 "bgsave_in_progress:%d\r\n"
6662 "last_save_time:%ld\r\n"
6663 "bgrewriteaof_in_progress:%d\r\n"
6664 "total_connections_received:%lld\r\n"
6665 "total_commands_processed:%lld\r\n"
6666 "expired_keys:%lld\r\n"
6667 "hash_max_zipmap_entries:%ld\r\n"
6668 "hash_max_zipmap_value:%ld\r\n"
6672 (sizeof(long) == 8) ? "64" : "32",
6677 listLength(server
.clients
)-listLength(server
.slaves
),
6678 listLength(server
.slaves
),
6679 server
.blpop_blocked_clients
,
6680 zmalloc_used_memory(),
6683 server
.bgsavechildpid
!= -1,
6685 server
.bgrewritechildpid
!= -1,
6686 server
.stat_numconnections
,
6687 server
.stat_numcommands
,
6688 server
.stat_expiredkeys
,
6689 server
.hash_max_zipmap_entries
,
6690 server
.hash_max_zipmap_value
,
6691 server
.vm_enabled
!= 0,
6692 server
.masterhost
== NULL
? "master" : "slave"
6694 if (server
.masterhost
) {
6695 info
= sdscatprintf(info
,
6696 "master_host:%s\r\n"
6697 "master_port:%d\r\n"
6698 "master_link_status:%s\r\n"
6699 "master_last_io_seconds_ago:%d\r\n"
6702 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6704 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6707 if (server
.vm_enabled
) {
6709 info
= sdscatprintf(info
,
6710 "vm_conf_max_memory:%llu\r\n"
6711 "vm_conf_page_size:%llu\r\n"
6712 "vm_conf_pages:%llu\r\n"
6713 "vm_stats_used_pages:%llu\r\n"
6714 "vm_stats_swapped_objects:%llu\r\n"
6715 "vm_stats_swappin_count:%llu\r\n"
6716 "vm_stats_swappout_count:%llu\r\n"
6717 "vm_stats_io_newjobs_len:%lu\r\n"
6718 "vm_stats_io_processing_len:%lu\r\n"
6719 "vm_stats_io_processed_len:%lu\r\n"
6720 "vm_stats_io_active_threads:%lu\r\n"
6721 "vm_stats_blocked_clients:%lu\r\n"
6722 ,(unsigned long long) server
.vm_max_memory
,
6723 (unsigned long long) server
.vm_page_size
,
6724 (unsigned long long) server
.vm_pages
,
6725 (unsigned long long) server
.vm_stats_used_pages
,
6726 (unsigned long long) server
.vm_stats_swapped_objects
,
6727 (unsigned long long) server
.vm_stats_swapins
,
6728 (unsigned long long) server
.vm_stats_swapouts
,
6729 (unsigned long) listLength(server
.io_newjobs
),
6730 (unsigned long) listLength(server
.io_processing
),
6731 (unsigned long) listLength(server
.io_processed
),
6732 (unsigned long) server
.io_active_threads
,
6733 (unsigned long) server
.vm_blocked_clients
6737 for (j
= 0; j
< server
.dbnum
; j
++) {
6738 long long keys
, vkeys
;
6740 keys
= dictSize(server
.db
[j
].dict
);
6741 vkeys
= dictSize(server
.db
[j
].expires
);
6742 if (keys
|| vkeys
) {
6743 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6750 static void infoCommand(redisClient
*c
) {
6751 sds info
= genRedisInfoString();
6752 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6753 (unsigned long)sdslen(info
)));
6754 addReplySds(c
,info
);
6755 addReply(c
,shared
.crlf
);
6758 static void monitorCommand(redisClient
*c
) {
6759 /* ignore MONITOR if aleady slave or in monitor mode */
6760 if (c
->flags
& REDIS_SLAVE
) return;
6762 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6764 listAddNodeTail(server
.monitors
,c
);
6765 addReply(c
,shared
.ok
);
6768 /* ================================= Expire ================================= */
6769 static int removeExpire(redisDb
*db
, robj
*key
) {
6770 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6777 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6778 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6786 /* Return the expire time of the specified key, or -1 if no expire
6787 * is associated with this key (i.e. the key is non volatile) */
6788 static time_t getExpire(redisDb
*db
, robj
*key
) {
6791 /* No expire? return ASAP */
6792 if (dictSize(db
->expires
) == 0 ||
6793 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6795 return (time_t) dictGetEntryVal(de
);
6798 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6802 /* No expire? return ASAP */
6803 if (dictSize(db
->expires
) == 0 ||
6804 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6806 /* Lookup the expire */
6807 when
= (time_t) dictGetEntryVal(de
);
6808 if (time(NULL
) <= when
) return 0;
6810 /* Delete the key */
6811 dictDelete(db
->expires
,key
);
6812 server
.stat_expiredkeys
++;
6813 return dictDelete(db
->dict
,key
) == DICT_OK
;
6816 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6819 /* No expire? return ASAP */
6820 if (dictSize(db
->expires
) == 0 ||
6821 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6823 /* Delete the key */
6825 server
.stat_expiredkeys
++;
6826 dictDelete(db
->expires
,key
);
6827 return dictDelete(db
->dict
,key
) == DICT_OK
;
6830 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6833 de
= dictFind(c
->db
->dict
,key
);
6835 addReply(c
,shared
.czero
);
6839 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6840 addReply(c
, shared
.cone
);
6843 time_t when
= time(NULL
)+seconds
;
6844 if (setExpire(c
->db
,key
,when
)) {
6845 addReply(c
,shared
.cone
);
6848 addReply(c
,shared
.czero
);
6854 static void expireCommand(redisClient
*c
) {
6855 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6858 static void expireatCommand(redisClient
*c
) {
6859 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6862 static void ttlCommand(redisClient
*c
) {
6866 expire
= getExpire(c
->db
,c
->argv
[1]);
6868 ttl
= (int) (expire
-time(NULL
));
6869 if (ttl
< 0) ttl
= -1;
6871 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6874 /* ================================ MULTI/EXEC ============================== */
6876 /* Client state initialization for MULTI/EXEC */
6877 static void initClientMultiState(redisClient
*c
) {
6878 c
->mstate
.commands
= NULL
;
6879 c
->mstate
.count
= 0;
6882 /* Release all the resources associated with MULTI/EXEC state */
6883 static void freeClientMultiState(redisClient
*c
) {
6886 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6888 multiCmd
*mc
= c
->mstate
.commands
+j
;
6890 for (i
= 0; i
< mc
->argc
; i
++)
6891 decrRefCount(mc
->argv
[i
]);
6894 zfree(c
->mstate
.commands
);
6897 /* Add a new command into the MULTI commands queue */
6898 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6902 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6903 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6904 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6907 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6908 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6909 for (j
= 0; j
< c
->argc
; j
++)
6910 incrRefCount(mc
->argv
[j
]);
6914 static void multiCommand(redisClient
*c
) {
6915 c
->flags
|= REDIS_MULTI
;
6916 addReply(c
,shared
.ok
);
6919 static void discardCommand(redisClient
*c
) {
6920 if (!(c
->flags
& REDIS_MULTI
)) {
6921 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6925 freeClientMultiState(c
);
6926 initClientMultiState(c
);
6927 c
->flags
&= (~REDIS_MULTI
);
6928 addReply(c
,shared
.ok
);
6931 static void execCommand(redisClient
*c
) {
6936 if (!(c
->flags
& REDIS_MULTI
)) {
6937 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6941 orig_argv
= c
->argv
;
6942 orig_argc
= c
->argc
;
6943 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6944 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6945 c
->argc
= c
->mstate
.commands
[j
].argc
;
6946 c
->argv
= c
->mstate
.commands
[j
].argv
;
6947 call(c
,c
->mstate
.commands
[j
].cmd
);
6949 c
->argv
= orig_argv
;
6950 c
->argc
= orig_argc
;
6951 freeClientMultiState(c
);
6952 initClientMultiState(c
);
6953 c
->flags
&= (~REDIS_MULTI
);
6956 /* =========================== Blocking Operations ========================= */
6958 /* Currently Redis blocking operations support is limited to list POP ops,
6959 * so the current implementation is not fully generic, but it is also not
6960 * completely specific so it will not require a rewrite to support new
6961 * kind of blocking operations in the future.
6963 * Still it's important to note that list blocking operations can be already
6964 * used as a notification mechanism in order to implement other blocking
6965 * operations at application level, so there must be a very strong evidence
6966 * of usefulness and generality before new blocking operations are implemented.
6968 * This is how the current blocking POP works, we use BLPOP as example:
6969 * - If the user calls BLPOP and the key exists and contains a non empty list
6970 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6971 * if there is not to block.
6972 * - If instead BLPOP is called and the key does not exists or the list is
6973 * empty we need to block. In order to do so we remove the notification for
6974 * new data to read in the client socket (so that we'll not serve new
6975 * requests if the blocking request is not served). Also we put the client
6976 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6977 * blocking for this keys.
6978 * - If a PUSH operation against a key with blocked clients waiting is
6979 * performed, we serve the first in the list: basically instead to push
6980 * the new element inside the list we return it to the (first / oldest)
6981 * blocking client, unblock the client, and remove it form the list.
6983 * The above comment and the source code should be enough in order to understand
6984 * the implementation and modify / fix it later.
6987 /* Set a client in blocking mode for the specified key, with the specified
6989 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6994 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6995 c
->blockingkeysnum
= numkeys
;
6996 c
->blockingto
= timeout
;
6997 for (j
= 0; j
< numkeys
; j
++) {
6998 /* Add the key in the client structure, to map clients -> keys */
6999 c
->blockingkeys
[j
] = keys
[j
];
7000 incrRefCount(keys
[j
]);
7002 /* And in the other "side", to map keys -> clients */
7003 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7007 /* For every key we take a list of clients blocked for it */
7009 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7010 incrRefCount(keys
[j
]);
7011 assert(retval
== DICT_OK
);
7013 l
= dictGetEntryVal(de
);
7015 listAddNodeTail(l
,c
);
7017 /* Mark the client as a blocked client */
7018 c
->flags
|= REDIS_BLOCKED
;
7019 server
.blpop_blocked_clients
++;
7022 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7023 static void unblockClientWaitingData(redisClient
*c
) {
7028 assert(c
->blockingkeys
!= NULL
);
7029 /* The client may wait for multiple keys, so unblock it for every key. */
7030 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7031 /* Remove this client from the list of clients waiting for this key. */
7032 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7034 l
= dictGetEntryVal(de
);
7035 listDelNode(l
,listSearchKey(l
,c
));
7036 /* If the list is empty we need to remove it to avoid wasting memory */
7037 if (listLength(l
) == 0)
7038 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7039 decrRefCount(c
->blockingkeys
[j
]);
7041 /* Cleanup the client structure */
7042 zfree(c
->blockingkeys
);
7043 c
->blockingkeys
= NULL
;
7044 c
->flags
&= (~REDIS_BLOCKED
);
7045 server
.blpop_blocked_clients
--;
7046 /* We want to process data if there is some command waiting
7047 * in the input buffer. Note that this is safe even if
7048 * unblockClientWaitingData() gets called from freeClient() because
7049 * freeClient() will be smart enough to call this function
7050 * *after* c->querybuf was set to NULL. */
7051 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7054 /* This should be called from any function PUSHing into lists.
7055 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7056 * 'ele' is the element pushed.
7058 * If the function returns 0 there was no client waiting for a list push
7061 * If the function returns 1 there was a client waiting for a list push
7062 * against this key, the element was passed to this client thus it's not
7063 * needed to actually add it to the list and the caller should return asap. */
7064 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7065 struct dictEntry
*de
;
7066 redisClient
*receiver
;
7070 de
= dictFind(c
->db
->blockingkeys
,key
);
7071 if (de
== NULL
) return 0;
7072 l
= dictGetEntryVal(de
);
7075 receiver
= ln
->value
;
7077 addReplySds(receiver
,sdsnew("*2\r\n"));
7078 addReplyBulk(receiver
,key
);
7079 addReplyBulk(receiver
,ele
);
7080 unblockClientWaitingData(receiver
);
7084 /* Blocking RPOP/LPOP */
7085 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7090 for (j
= 1; j
< c
->argc
-1; j
++) {
7091 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7093 if (o
->type
!= REDIS_LIST
) {
7094 addReply(c
,shared
.wrongtypeerr
);
7097 list
*list
= o
->ptr
;
7098 if (listLength(list
) != 0) {
7099 /* If the list contains elements fall back to the usual
7100 * non-blocking POP operation */
7101 robj
*argv
[2], **orig_argv
;
7104 /* We need to alter the command arguments before to call
7105 * popGenericCommand() as the command takes a single key. */
7106 orig_argv
= c
->argv
;
7107 orig_argc
= c
->argc
;
7108 argv
[1] = c
->argv
[j
];
7112 /* Also the return value is different, we need to output
7113 * the multi bulk reply header and the key name. The
7114 * "real" command will add the last element (the value)
7115 * for us. If this souds like an hack to you it's just
7116 * because it is... */
7117 addReplySds(c
,sdsnew("*2\r\n"));
7118 addReplyBulk(c
,argv
[1]);
7119 popGenericCommand(c
,where
);
7121 /* Fix the client structure with the original stuff */
7122 c
->argv
= orig_argv
;
7123 c
->argc
= orig_argc
;
7129 /* If the list is empty or the key does not exists we must block */
7130 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7131 if (timeout
> 0) timeout
+= time(NULL
);
7132 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7135 static void blpopCommand(redisClient
*c
) {
7136 blockingPopGenericCommand(c
,REDIS_HEAD
);
7139 static void brpopCommand(redisClient
*c
) {
7140 blockingPopGenericCommand(c
,REDIS_TAIL
);
7143 /* =============================== Replication ============================= */
7145 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7146 ssize_t nwritten
, ret
= size
;
7147 time_t start
= time(NULL
);
7151 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7152 nwritten
= write(fd
,ptr
,size
);
7153 if (nwritten
== -1) return -1;
7157 if ((time(NULL
)-start
) > timeout
) {
7165 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7166 ssize_t nread
, totread
= 0;
7167 time_t start
= time(NULL
);
7171 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7172 nread
= read(fd
,ptr
,size
);
7173 if (nread
== -1) return -1;
7178 if ((time(NULL
)-start
) > timeout
) {
7186 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7193 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7196 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7207 static void syncCommand(redisClient
*c
) {
7208 /* ignore SYNC if aleady slave or in monitor mode */
7209 if (c
->flags
& REDIS_SLAVE
) return;
7211 /* SYNC can't be issued when the server has pending data to send to
7212 * the client about already issued commands. We need a fresh reply
7213 * buffer registering the differences between the BGSAVE and the current
7214 * dataset, so that we can copy to other slaves if needed. */
7215 if (listLength(c
->reply
) != 0) {
7216 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7220 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7221 /* Here we need to check if there is a background saving operation
7222 * in progress, or if it is required to start one */
7223 if (server
.bgsavechildpid
!= -1) {
7224 /* Ok a background save is in progress. Let's check if it is a good
7225 * one for replication, i.e. if there is another slave that is
7226 * registering differences since the server forked to save */
7231 listRewind(server
.slaves
,&li
);
7232 while((ln
= listNext(&li
))) {
7234 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7237 /* Perfect, the server is already registering differences for
7238 * another slave. Set the right state, and copy the buffer. */
7239 listRelease(c
->reply
);
7240 c
->reply
= listDup(slave
->reply
);
7241 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7242 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7244 /* No way, we need to wait for the next BGSAVE in order to
7245 * register differences */
7246 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7247 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7250 /* Ok we don't have a BGSAVE in progress, let's start one */
7251 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7252 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7253 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7254 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7257 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7260 c
->flags
|= REDIS_SLAVE
;
7262 listAddNodeTail(server
.slaves
,c
);
7266 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7267 redisClient
*slave
= privdata
;
7269 REDIS_NOTUSED(mask
);
7270 char buf
[REDIS_IOBUF_LEN
];
7271 ssize_t nwritten
, buflen
;
7273 if (slave
->repldboff
== 0) {
7274 /* Write the bulk write count before to transfer the DB. In theory here
7275 * we don't know how much room there is in the output buffer of the
7276 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7277 * operations) will never be smaller than the few bytes we need. */
7280 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7282 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7290 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7291 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7293 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7294 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7298 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7299 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7304 slave
->repldboff
+= nwritten
;
7305 if (slave
->repldboff
== slave
->repldbsize
) {
7306 close(slave
->repldbfd
);
7307 slave
->repldbfd
= -1;
7308 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7309 slave
->replstate
= REDIS_REPL_ONLINE
;
7310 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7311 sendReplyToClient
, slave
) == AE_ERR
) {
7315 addReplySds(slave
,sdsempty());
7316 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7320 /* This function is called at the end of every backgrond saving.
7321 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7322 * otherwise REDIS_ERR is passed to the function.
7324 * The goal of this function is to handle slaves waiting for a successful
7325 * background saving in order to perform non-blocking synchronization. */
7326 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7328 int startbgsave
= 0;
7331 listRewind(server
.slaves
,&li
);
7332 while((ln
= listNext(&li
))) {
7333 redisClient
*slave
= ln
->value
;
7335 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7337 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7338 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7339 struct redis_stat buf
;
7341 if (bgsaveerr
!= REDIS_OK
) {
7343 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7346 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7347 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7349 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7352 slave
->repldboff
= 0;
7353 slave
->repldbsize
= buf
.st_size
;
7354 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7355 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7356 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7363 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7366 listRewind(server
.slaves
,&li
);
7367 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7368 while((ln
= listNext(&li
))) {
7369 redisClient
*slave
= ln
->value
;
7371 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7378 static int syncWithMaster(void) {
7379 char buf
[1024], tmpfile
[256], authcmd
[1024];
7381 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7382 int dfd
, maxtries
= 5;
7385 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7390 /* AUTH with the master if required. */
7391 if(server
.masterauth
) {
7392 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7393 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7395 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7399 /* Read the AUTH result. */
7400 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7402 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7406 if (buf
[0] != '+') {
7408 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7413 /* Issue the SYNC command */
7414 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7416 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7420 /* Read the bulk write count */
7421 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7423 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7427 if (buf
[0] != '$') {
7429 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7432 dumpsize
= strtol(buf
+1,NULL
,10);
7433 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7434 /* Read the bulk write data on a temp file */
7436 snprintf(tmpfile
,256,
7437 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7438 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7439 if (dfd
!= -1) break;
7444 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7448 int nread
, nwritten
;
7450 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7452 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7458 nwritten
= write(dfd
,buf
,nread
);
7459 if (nwritten
== -1) {
7460 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7468 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7469 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7475 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7476 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7480 server
.master
= createClient(fd
);
7481 server
.master
->flags
|= REDIS_MASTER
;
7482 server
.master
->authenticated
= 1;
7483 server
.replstate
= REDIS_REPL_CONNECTED
;
7487 static void slaveofCommand(redisClient
*c
) {
7488 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7489 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7490 if (server
.masterhost
) {
7491 sdsfree(server
.masterhost
);
7492 server
.masterhost
= NULL
;
7493 if (server
.master
) freeClient(server
.master
);
7494 server
.replstate
= REDIS_REPL_NONE
;
7495 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7498 sdsfree(server
.masterhost
);
7499 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7500 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7501 if (server
.master
) freeClient(server
.master
);
7502 server
.replstate
= REDIS_REPL_CONNECT
;
7503 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7504 server
.masterhost
, server
.masterport
);
7506 addReply(c
,shared
.ok
);
7509 /* ============================ Maxmemory directive ======================== */
7511 /* Try to free one object form the pre-allocated objects free list.
7512 * This is useful under low mem conditions as by default we take 1 million
7513 * free objects allocated. On success REDIS_OK is returned, otherwise
7515 static int tryFreeOneObjectFromFreelist(void) {
7518 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7519 if (listLength(server
.objfreelist
)) {
7520 listNode
*head
= listFirst(server
.objfreelist
);
7521 o
= listNodeValue(head
);
7522 listDelNode(server
.objfreelist
,head
);
7523 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7527 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7532 /* This function gets called when 'maxmemory' is set on the config file to limit
7533 * the max memory used by the server, and we are out of memory.
7534 * This function will try to, in order:
7536 * - Free objects from the free list
7537 * - Try to remove keys with an EXPIRE set
7539 * It is not possible to free enough memory to reach used-memory < maxmemory
7540 * the server will start refusing commands that will enlarge even more the
7543 static void freeMemoryIfNeeded(void) {
7544 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7545 int j
, k
, freed
= 0;
7547 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7548 for (j
= 0; j
< server
.dbnum
; j
++) {
7550 robj
*minkey
= NULL
;
7551 struct dictEntry
*de
;
7553 if (dictSize(server
.db
[j
].expires
)) {
7555 /* From a sample of three keys drop the one nearest to
7556 * the natural expire */
7557 for (k
= 0; k
< 3; k
++) {
7560 de
= dictGetRandomKey(server
.db
[j
].expires
);
7561 t
= (time_t) dictGetEntryVal(de
);
7562 if (minttl
== -1 || t
< minttl
) {
7563 minkey
= dictGetEntryKey(de
);
7567 deleteKey(server
.db
+j
,minkey
);
7570 if (!freed
) return; /* nothing to free... */
7574 /* ============================== Append Only file ========================== */
7576 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7577 sds buf
= sdsempty();
7583 /* The DB this command was targetting is not the same as the last command
7584 * we appendend. To issue a SELECT command is needed. */
7585 if (dictid
!= server
.appendseldb
) {
7588 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7589 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7590 (unsigned long)strlen(seldb
),seldb
);
7591 server
.appendseldb
= dictid
;
7594 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7595 * EXPIREs into EXPIREATs calls */
7596 if (cmd
->proc
== expireCommand
) {
7599 tmpargv
[0] = createStringObject("EXPIREAT",8);
7600 tmpargv
[1] = argv
[1];
7601 incrRefCount(argv
[1]);
7602 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7603 tmpargv
[2] = createObject(REDIS_STRING
,
7604 sdscatprintf(sdsempty(),"%ld",when
));
7608 /* Append the actual command */
7609 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7610 for (j
= 0; j
< argc
; j
++) {
7613 o
= getDecodedObject(o
);
7614 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7615 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7616 buf
= sdscatlen(buf
,"\r\n",2);
7620 /* Free the objects from the modified argv for EXPIREAT */
7621 if (cmd
->proc
== expireCommand
) {
7622 for (j
= 0; j
< 3; j
++)
7623 decrRefCount(argv
[j
]);
7626 /* We want to perform a single write. This should be guaranteed atomic
7627 * at least if the filesystem we are writing is a real physical one.
7628 * While this will save us against the server being killed I don't think
7629 * there is much to do about the whole server stopping for power problems
7631 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7632 if (nwritten
!= (signed)sdslen(buf
)) {
7633 /* Ooops, we are in troubles. The best thing to do for now is
7634 * to simply exit instead to give the illusion that everything is
7635 * working as expected. */
7636 if (nwritten
== -1) {
7637 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7639 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7643 /* If a background append only file rewriting is in progress we want to
7644 * accumulate the differences between the child DB and the current one
7645 * in a buffer, so that when the child process will do its work we
7646 * can append the differences to the new append only file. */
7647 if (server
.bgrewritechildpid
!= -1)
7648 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7652 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7653 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7654 now
-server
.lastfsync
> 1))
7656 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7657 server
.lastfsync
= now
;
7661 /* In Redis commands are always executed in the context of a client, so in
7662 * order to load the append only file we need to create a fake client. */
7663 static struct redisClient
*createFakeClient(void) {
7664 struct redisClient
*c
= zmalloc(sizeof(*c
));
7668 c
->querybuf
= sdsempty();
7672 /* We set the fake client as a slave waiting for the synchronization
7673 * so that Redis will not try to send replies to this client. */
7674 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7675 c
->reply
= listCreate();
7676 listSetFreeMethod(c
->reply
,decrRefCount
);
7677 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7681 static void freeFakeClient(struct redisClient
*c
) {
7682 sdsfree(c
->querybuf
);
7683 listRelease(c
->reply
);
7687 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7688 * error (the append only file is zero-length) REDIS_ERR is returned. On
7689 * fatal error an error message is logged and the program exists. */
7690 int loadAppendOnlyFile(char *filename
) {
7691 struct redisClient
*fakeClient
;
7692 FILE *fp
= fopen(filename
,"r");
7693 struct redis_stat sb
;
7694 unsigned long long loadedkeys
= 0;
7696 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7700 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7704 fakeClient
= createFakeClient();
7711 struct redisCommand
*cmd
;
7713 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7719 if (buf
[0] != '*') goto fmterr
;
7721 argv
= zmalloc(sizeof(robj
*)*argc
);
7722 for (j
= 0; j
< argc
; j
++) {
7723 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7724 if (buf
[0] != '$') goto fmterr
;
7725 len
= strtol(buf
+1,NULL
,10);
7726 argsds
= sdsnewlen(NULL
,len
);
7727 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7728 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7729 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7732 /* Command lookup */
7733 cmd
= lookupCommand(argv
[0]->ptr
);
7735 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7738 /* Try object sharing and encoding */
7739 if (server
.shareobjects
) {
7741 for(j
= 1; j
< argc
; j
++)
7742 argv
[j
] = tryObjectSharing(argv
[j
]);
7744 if (cmd
->flags
& REDIS_CMD_BULK
)
7745 tryObjectEncoding(argv
[argc
-1]);
7746 /* Run the command in the context of a fake client */
7747 fakeClient
->argc
= argc
;
7748 fakeClient
->argv
= argv
;
7749 cmd
->proc(fakeClient
);
7750 /* Discard the reply objects list from the fake client */
7751 while(listLength(fakeClient
->reply
))
7752 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7753 /* Clean up, ready for the next command */
7754 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7756 /* Handle swapping while loading big datasets when VM is on */
7758 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7759 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7760 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7765 freeFakeClient(fakeClient
);
7770 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7772 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7776 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7780 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7781 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7785 /* Avoid the incr/decr ref count business if possible to help
7786 * copy-on-write (we are often in a child process when this function
7788 * Also makes sure that key objects don't get incrRefCount-ed when VM
7790 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7791 obj
= getDecodedObject(obj
);
7794 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7795 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7796 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7798 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7799 if (decrrc
) decrRefCount(obj
);
7802 if (decrrc
) decrRefCount(obj
);
7806 /* Write binary-safe string into a file in the bulkformat
7807 * $<count>\r\n<payload>\r\n */
7808 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7811 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7812 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7813 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7814 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7818 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7819 static int fwriteBulkDouble(FILE *fp
, double d
) {
7820 char buf
[128], dbuf
[128];
7822 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7823 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7824 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7825 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7829 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7830 static int fwriteBulkLong(FILE *fp
, long l
) {
7831 char buf
[128], lbuf
[128];
7833 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7834 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7835 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7836 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7840 /* Write a sequence of commands able to fully rebuild the dataset into
7841 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7842 static int rewriteAppendOnlyFile(char *filename
) {
7843 dictIterator
*di
= NULL
;
7848 time_t now
= time(NULL
);
7850 /* Note that we have to use a different temp name here compared to the
7851 * one used by rewriteAppendOnlyFileBackground() function. */
7852 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7853 fp
= fopen(tmpfile
,"w");
7855 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7858 for (j
= 0; j
< server
.dbnum
; j
++) {
7859 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7860 redisDb
*db
= server
.db
+j
;
7862 if (dictSize(d
) == 0) continue;
7863 di
= dictGetIterator(d
);
7869 /* SELECT the new DB */
7870 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7871 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7873 /* Iterate this DB writing every entry */
7874 while((de
= dictNext(di
)) != NULL
) {
7879 key
= dictGetEntryKey(de
);
7880 /* If the value for this key is swapped, load a preview in memory.
7881 * We use a "swapped" flag to remember if we need to free the
7882 * value object instead to just increment the ref count anyway
7883 * in order to avoid copy-on-write of pages if we are forked() */
7884 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7885 key
->storage
== REDIS_VM_SWAPPING
) {
7886 o
= dictGetEntryVal(de
);
7889 o
= vmPreviewObject(key
);
7892 expiretime
= getExpire(db
,key
);
7894 /* Save the key and associated value */
7895 if (o
->type
== REDIS_STRING
) {
7896 /* Emit a SET command */
7897 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7898 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7900 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7901 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7902 } else if (o
->type
== REDIS_LIST
) {
7903 /* Emit the RPUSHes needed to rebuild the list */
7904 list
*list
= o
->ptr
;
7908 listRewind(list
,&li
);
7909 while((ln
= listNext(&li
))) {
7910 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7911 robj
*eleobj
= listNodeValue(ln
);
7913 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7914 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7915 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7917 } else if (o
->type
== REDIS_SET
) {
7918 /* Emit the SADDs needed to rebuild the set */
7920 dictIterator
*di
= dictGetIterator(set
);
7923 while((de
= dictNext(di
)) != NULL
) {
7924 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7925 robj
*eleobj
= dictGetEntryKey(de
);
7927 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7928 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7929 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7931 dictReleaseIterator(di
);
7932 } else if (o
->type
== REDIS_ZSET
) {
7933 /* Emit the ZADDs needed to rebuild the sorted set */
7935 dictIterator
*di
= dictGetIterator(zs
->dict
);
7938 while((de
= dictNext(di
)) != NULL
) {
7939 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7940 robj
*eleobj
= dictGetEntryKey(de
);
7941 double *score
= dictGetEntryVal(de
);
7943 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7944 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7945 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7946 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7948 dictReleaseIterator(di
);
7949 } else if (o
->type
== REDIS_HASH
) {
7950 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7952 /* Emit the HSETs needed to rebuild the hash */
7953 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7954 unsigned char *p
= zipmapRewind(o
->ptr
);
7955 unsigned char *field
, *val
;
7956 unsigned int flen
, vlen
;
7958 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7959 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7960 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7961 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7963 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7967 dictIterator
*di
= dictGetIterator(o
->ptr
);
7970 while((de
= dictNext(di
)) != NULL
) {
7971 robj
*field
= dictGetEntryKey(de
);
7972 robj
*val
= dictGetEntryVal(de
);
7974 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7975 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7976 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7977 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7979 dictReleaseIterator(di
);
7984 /* Save the expire time */
7985 if (expiretime
!= -1) {
7986 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7987 /* If this key is already expired skip it */
7988 if (expiretime
< now
) continue;
7989 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7990 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7991 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7993 if (swapped
) decrRefCount(o
);
7995 dictReleaseIterator(di
);
7998 /* Make sure data will not remain on the OS's output buffers */
8003 /* Use RENAME to make sure the DB file is changed atomically only
8004 * if the generate DB file is ok. */
8005 if (rename(tmpfile
,filename
) == -1) {
8006 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8010 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8016 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8017 if (di
) dictReleaseIterator(di
);
8021 /* This is how rewriting of the append only file in background works:
8023 * 1) The user calls BGREWRITEAOF
8024 * 2) Redis calls this function, that forks():
8025 * 2a) the child rewrite the append only file in a temp file.
8026 * 2b) the parent accumulates differences in server.bgrewritebuf.
8027 * 3) When the child finished '2a' exists.
8028 * 4) The parent will trap the exit code, if it's OK, will append the
8029 * data accumulated into server.bgrewritebuf into the temp file, and
8030 * finally will rename(2) the temp file in the actual file name.
8031 * The the new file is reopened as the new append only file. Profit!
8033 static int rewriteAppendOnlyFileBackground(void) {
8036 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8037 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8038 if ((childpid
= fork()) == 0) {
8042 if (server
.vm_enabled
) vmReopenSwapFile();
8044 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8045 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8052 if (childpid
== -1) {
8053 redisLog(REDIS_WARNING
,
8054 "Can't rewrite append only file in background: fork: %s",
8058 redisLog(REDIS_NOTICE
,
8059 "Background append only file rewriting started by pid %d",childpid
);
8060 server
.bgrewritechildpid
= childpid
;
8061 /* We set appendseldb to -1 in order to force the next call to the
8062 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8063 * accumulated by the parent into server.bgrewritebuf will start
8064 * with a SELECT statement and it will be safe to merge. */
8065 server
.appendseldb
= -1;
8068 return REDIS_OK
; /* unreached */
8071 static void bgrewriteaofCommand(redisClient
*c
) {
8072 if (server
.bgrewritechildpid
!= -1) {
8073 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8076 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8077 char *status
= "+Background append only file rewriting started\r\n";
8078 addReplySds(c
,sdsnew(status
));
8080 addReply(c
,shared
.err
);
8084 static void aofRemoveTempFile(pid_t childpid
) {
8087 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8091 /* Virtual Memory is composed mainly of two subsystems:
8092 * - Blocking Virutal Memory
8093 * - Threaded Virtual Memory I/O
8094 * The two parts are not fully decoupled, but functions are split among two
8095 * different sections of the source code (delimited by comments) in order to
8096 * make more clear what functionality is about the blocking VM and what about
8097 * the threaded (not blocking) VM.
8101 * Redis VM is a blocking VM (one that blocks reading swapped values from
8102 * disk into memory when a value swapped out is needed in memory) that is made
8103 * unblocking by trying to examine the command argument vector in order to
8104 * load in background values that will likely be needed in order to exec
8105 * the command. The command is executed only once all the relevant keys
8106 * are loaded into memory.
8108 * This basically is almost as simple of a blocking VM, but almost as parallel
8109 * as a fully non-blocking VM.
8112 /* =================== Virtual Memory - Blocking Side ====================== */
8114 /* substitute the first occurrence of '%p' with the process pid in the
8115 * swap file name. */
8116 static void expandVmSwapFilename(void) {
8117 char *p
= strstr(server
.vm_swap_file
,"%p");
8123 new = sdscat(new,server
.vm_swap_file
);
8124 new = sdscatprintf(new,"%ld",(long) getpid());
8125 new = sdscat(new,p
+2);
8126 zfree(server
.vm_swap_file
);
8127 server
.vm_swap_file
= new;
8130 static void vmInit(void) {
8135 if (server
.vm_max_threads
!= 0)
8136 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8138 expandVmSwapFilename();
8139 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8140 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8141 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8143 if (server
.vm_fp
== NULL
) {
8144 redisLog(REDIS_WARNING
,
8145 "Impossible to open the swap file: %s. Exiting.",
8149 server
.vm_fd
= fileno(server
.vm_fp
);
8150 server
.vm_next_page
= 0;
8151 server
.vm_near_pages
= 0;
8152 server
.vm_stats_used_pages
= 0;
8153 server
.vm_stats_swapped_objects
= 0;
8154 server
.vm_stats_swapouts
= 0;
8155 server
.vm_stats_swapins
= 0;
8156 totsize
= server
.vm_pages
*server
.vm_page_size
;
8157 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8158 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8159 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8163 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8165 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8166 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8167 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8168 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8170 /* Initialize threaded I/O (used by Virtual Memory) */
8171 server
.io_newjobs
= listCreate();
8172 server
.io_processing
= listCreate();
8173 server
.io_processed
= listCreate();
8174 server
.io_ready_clients
= listCreate();
8175 pthread_mutex_init(&server
.io_mutex
,NULL
);
8176 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8177 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8178 server
.io_active_threads
= 0;
8179 if (pipe(pipefds
) == -1) {
8180 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8184 server
.io_ready_pipe_read
= pipefds
[0];
8185 server
.io_ready_pipe_write
= pipefds
[1];
8186 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8187 /* LZF requires a lot of stack */
8188 pthread_attr_init(&server
.io_threads_attr
);
8189 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8190 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8191 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8192 /* Listen for events in the threaded I/O pipe */
8193 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8194 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8195 oom("creating file event");
8198 /* Mark the page as used */
8199 static void vmMarkPageUsed(off_t page
) {
8200 off_t byte
= page
/8;
8202 redisAssert(vmFreePage(page
) == 1);
8203 server
.vm_bitmap
[byte
] |= 1<<bit
;
8206 /* Mark N contiguous pages as used, with 'page' being the first. */
8207 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8210 for (j
= 0; j
< count
; j
++)
8211 vmMarkPageUsed(page
+j
);
8212 server
.vm_stats_used_pages
+= count
;
8213 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8214 (long long)count
, (long long)page
);
8217 /* Mark the page as free */
8218 static void vmMarkPageFree(off_t page
) {
8219 off_t byte
= page
/8;
8221 redisAssert(vmFreePage(page
) == 0);
8222 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8225 /* Mark N contiguous pages as free, with 'page' being the first. */
8226 static void vmMarkPagesFree(off_t page
, off_t count
) {
8229 for (j
= 0; j
< count
; j
++)
8230 vmMarkPageFree(page
+j
);
8231 server
.vm_stats_used_pages
-= count
;
8232 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8233 (long long)count
, (long long)page
);
8236 /* Test if the page is free */
8237 static int vmFreePage(off_t page
) {
8238 off_t byte
= page
/8;
8240 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8243 /* Find N contiguous free pages storing the first page of the cluster in *first.
8244 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8245 * REDIS_ERR is returned.
8247 * This function uses a simple algorithm: we try to allocate
8248 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8249 * again from the start of the swap file searching for free spaces.
8251 * If it looks pretty clear that there are no free pages near our offset
8252 * we try to find less populated places doing a forward jump of
8253 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8254 * without hurry, and then we jump again and so forth...
8256 * This function can be improved using a free list to avoid to guess
8257 * too much, since we could collect data about freed pages.
8259 * note: I implemented this function just after watching an episode of
8260 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8262 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8263 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8265 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8266 server
.vm_near_pages
= 0;
8267 server
.vm_next_page
= 0;
8269 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8270 base
= server
.vm_next_page
;
8272 while(offset
< server
.vm_pages
) {
8273 off_t
this = base
+offset
;
8275 /* If we overflow, restart from page zero */
8276 if (this >= server
.vm_pages
) {
8277 this -= server
.vm_pages
;
8279 /* Just overflowed, what we found on tail is no longer
8280 * interesting, as it's no longer contiguous. */
8284 if (vmFreePage(this)) {
8285 /* This is a free page */
8287 /* Already got N free pages? Return to the caller, with success */
8289 *first
= this-(n
-1);
8290 server
.vm_next_page
= this+1;
8291 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8295 /* The current one is not a free page */
8299 /* Fast-forward if the current page is not free and we already
8300 * searched enough near this place. */
8302 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8303 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8305 /* Note that even if we rewind after the jump, we are don't need
8306 * to make sure numfree is set to zero as we only jump *if* it
8307 * is set to zero. */
8309 /* Otherwise just check the next page */
8316 /* Write the specified object at the specified page of the swap file */
8317 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8318 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8319 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8320 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8321 redisLog(REDIS_WARNING
,
8322 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8326 rdbSaveObject(server
.vm_fp
,o
);
8327 fflush(server
.vm_fp
);
8328 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8332 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8333 * needed to later retrieve the object into the key object.
8334 * If we can't find enough contiguous empty pages to swap the object on disk
8335 * REDIS_ERR is returned. */
8336 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8337 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8340 assert(key
->storage
== REDIS_VM_MEMORY
);
8341 assert(key
->refcount
== 1);
8342 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8343 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8344 key
->vm
.page
= page
;
8345 key
->vm
.usedpages
= pages
;
8346 key
->storage
= REDIS_VM_SWAPPED
;
8347 key
->vtype
= val
->type
;
8348 decrRefCount(val
); /* Deallocate the object from memory. */
8349 vmMarkPagesUsed(page
,pages
);
8350 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8351 (unsigned char*) key
->ptr
,
8352 (unsigned long long) page
, (unsigned long long) pages
);
8353 server
.vm_stats_swapped_objects
++;
8354 server
.vm_stats_swapouts
++;
8358 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8361 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8362 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8363 redisLog(REDIS_WARNING
,
8364 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8368 o
= rdbLoadObject(type
,server
.vm_fp
);
8370 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8373 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8377 /* Load the value object relative to the 'key' object from swap to memory.
8378 * The newly allocated object is returned.
8380 * If preview is true the unserialized object is returned to the caller but
8381 * no changes are made to the key object, nor the pages are marked as freed */
8382 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8385 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8386 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8388 key
->storage
= REDIS_VM_MEMORY
;
8389 key
->vm
.atime
= server
.unixtime
;
8390 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8391 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8392 (unsigned char*) key
->ptr
);
8393 server
.vm_stats_swapped_objects
--;
8395 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8396 (unsigned char*) key
->ptr
);
8398 server
.vm_stats_swapins
++;
8402 /* Plain object loading, from swap to memory */
8403 static robj
*vmLoadObject(robj
*key
) {
8404 /* If we are loading the object in background, stop it, we
8405 * need to load this object synchronously ASAP. */
8406 if (key
->storage
== REDIS_VM_LOADING
)
8407 vmCancelThreadedIOJob(key
);
8408 return vmGenericLoadObject(key
,0);
8411 /* Just load the value on disk, without to modify the key.
8412 * This is useful when we want to perform some operation on the value
8413 * without to really bring it from swap to memory, like while saving the
8414 * dataset or rewriting the append only log. */
8415 static robj
*vmPreviewObject(robj
*key
) {
8416 return vmGenericLoadObject(key
,1);
8419 /* How a good candidate is this object for swapping?
8420 * The better candidate it is, the greater the returned value.
8422 * Currently we try to perform a fast estimation of the object size in
8423 * memory, and combine it with aging informations.
8425 * Basically swappability = idle-time * log(estimated size)
8427 * Bigger objects are preferred over smaller objects, but not
8428 * proportionally, this is why we use the logarithm. This algorithm is
8429 * just a first try and will probably be tuned later. */
8430 static double computeObjectSwappability(robj
*o
) {
8431 time_t age
= server
.unixtime
- o
->vm
.atime
;
8435 struct dictEntry
*de
;
8438 if (age
<= 0) return 0;
8441 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8444 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8449 listNode
*ln
= listFirst(l
);
8451 asize
= sizeof(list
);
8453 robj
*ele
= ln
->value
;
8456 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8457 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8459 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8464 z
= (o
->type
== REDIS_ZSET
);
8465 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8467 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8468 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8473 de
= dictGetRandomKey(d
);
8474 ele
= dictGetEntryKey(de
);
8475 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8476 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8478 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8479 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8483 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8484 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8485 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8486 unsigned int klen
, vlen
;
8487 unsigned char *key
, *val
;
8489 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8493 asize
= len
*(klen
+vlen
+3);
8494 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8496 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8501 de
= dictGetRandomKey(d
);
8502 ele
= dictGetEntryKey(de
);
8503 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8504 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8506 ele
= dictGetEntryVal(de
);
8507 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8508 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8510 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8515 return (double)age
*log(1+asize
);
8518 /* Try to swap an object that's a good candidate for swapping.
8519 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8520 * to swap any object at all.
8522 * If 'usethreaded' is true, Redis will try to swap the object in background
8523 * using I/O threads. */
8524 static int vmSwapOneObject(int usethreads
) {
8526 struct dictEntry
*best
= NULL
;
8527 double best_swappability
= 0;
8528 redisDb
*best_db
= NULL
;
8531 for (j
= 0; j
< server
.dbnum
; j
++) {
8532 redisDb
*db
= server
.db
+j
;
8533 /* Why maxtries is set to 100?
8534 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8535 * are swappable objects */
8538 if (dictSize(db
->dict
) == 0) continue;
8539 for (i
= 0; i
< 5; i
++) {
8541 double swappability
;
8543 if (maxtries
) maxtries
--;
8544 de
= dictGetRandomKey(db
->dict
);
8545 key
= dictGetEntryKey(de
);
8546 val
= dictGetEntryVal(de
);
8547 /* Only swap objects that are currently in memory.
8549 * Also don't swap shared objects if threaded VM is on, as we
8550 * try to ensure that the main thread does not touch the
8551 * object while the I/O thread is using it, but we can't
8552 * control other keys without adding additional mutex. */
8553 if (key
->storage
!= REDIS_VM_MEMORY
||
8554 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8555 if (maxtries
) i
--; /* don't count this try */
8558 swappability
= computeObjectSwappability(val
);
8559 if (!best
|| swappability
> best_swappability
) {
8561 best_swappability
= swappability
;
8566 if (best
== NULL
) return REDIS_ERR
;
8567 key
= dictGetEntryKey(best
);
8568 val
= dictGetEntryVal(best
);
8570 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8571 key
->ptr
, best_swappability
);
8573 /* Unshare the key if needed */
8574 if (key
->refcount
> 1) {
8575 robj
*newkey
= dupStringObject(key
);
8577 key
= dictGetEntryKey(best
) = newkey
;
8581 vmSwapObjectThreaded(key
,val
,best_db
);
8584 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8585 dictGetEntryVal(best
) = NULL
;
8593 static int vmSwapOneObjectBlocking() {
8594 return vmSwapOneObject(0);
8597 static int vmSwapOneObjectThreaded() {
8598 return vmSwapOneObject(1);
8601 /* Return true if it's safe to swap out objects in a given moment.
8602 * Basically we don't want to swap objects out while there is a BGSAVE
8603 * or a BGAEOREWRITE running in backgroud. */
8604 static int vmCanSwapOut(void) {
8605 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8608 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8609 * and was deleted. Otherwise 0 is returned. */
8610 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8614 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8615 foundkey
= dictGetEntryKey(de
);
8616 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8621 /* =================== Virtual Memory - Threaded I/O ======================= */
8623 static void freeIOJob(iojob
*j
) {
8624 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8625 j
->type
== REDIS_IOJOB_DO_SWAP
||
8626 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8627 decrRefCount(j
->val
);
8628 decrRefCount(j
->key
);
8632 /* Every time a thread finished a Job, it writes a byte into the write side
8633 * of an unix pipe in order to "awake" the main thread, and this function
8635 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8639 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8641 REDIS_NOTUSED(mask
);
8642 REDIS_NOTUSED(privdata
);
8644 /* For every byte we read in the read side of the pipe, there is one
8645 * I/O job completed to process. */
8646 while((retval
= read(fd
,buf
,1)) == 1) {
8650 struct dictEntry
*de
;
8652 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8654 /* Get the processed element (the oldest one) */
8656 assert(listLength(server
.io_processed
) != 0);
8657 if (toprocess
== -1) {
8658 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8659 if (toprocess
<= 0) toprocess
= 1;
8661 ln
= listFirst(server
.io_processed
);
8663 listDelNode(server
.io_processed
,ln
);
8665 /* If this job is marked as canceled, just ignore it */
8670 /* Post process it in the main thread, as there are things we
8671 * can do just here to avoid race conditions and/or invasive locks */
8672 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8673 de
= dictFind(j
->db
->dict
,j
->key
);
8675 key
= dictGetEntryKey(de
);
8676 if (j
->type
== REDIS_IOJOB_LOAD
) {
8679 /* Key loaded, bring it at home */
8680 key
->storage
= REDIS_VM_MEMORY
;
8681 key
->vm
.atime
= server
.unixtime
;
8682 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8683 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8684 (unsigned char*) key
->ptr
);
8685 server
.vm_stats_swapped_objects
--;
8686 server
.vm_stats_swapins
++;
8687 dictGetEntryVal(de
) = j
->val
;
8688 incrRefCount(j
->val
);
8691 /* Handle clients waiting for this key to be loaded. */
8692 handleClientsBlockedOnSwappedKey(db
,key
);
8693 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8694 /* Now we know the amount of pages required to swap this object.
8695 * Let's find some space for it, and queue this task again
8696 * rebranded as REDIS_IOJOB_DO_SWAP. */
8697 if (!vmCanSwapOut() ||
8698 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8700 /* Ooops... no space or we can't swap as there is
8701 * a fork()ed Redis trying to save stuff on disk. */
8703 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8705 /* Note that we need to mark this pages as used now,
8706 * if the job will be canceled, we'll mark them as freed
8708 vmMarkPagesUsed(j
->page
,j
->pages
);
8709 j
->type
= REDIS_IOJOB_DO_SWAP
;
8714 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8717 /* Key swapped. We can finally free some memory. */
8718 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8719 printf("key->storage: %d\n",key
->storage
);
8720 printf("key->name: %s\n",(char*)key
->ptr
);
8721 printf("key->refcount: %d\n",key
->refcount
);
8722 printf("val: %p\n",(void*)j
->val
);
8723 printf("val->type: %d\n",j
->val
->type
);
8724 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8726 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8727 val
= dictGetEntryVal(de
);
8728 key
->vm
.page
= j
->page
;
8729 key
->vm
.usedpages
= j
->pages
;
8730 key
->storage
= REDIS_VM_SWAPPED
;
8731 key
->vtype
= j
->val
->type
;
8732 decrRefCount(val
); /* Deallocate the object from memory. */
8733 dictGetEntryVal(de
) = NULL
;
8734 redisLog(REDIS_DEBUG
,
8735 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8736 (unsigned char*) key
->ptr
,
8737 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8738 server
.vm_stats_swapped_objects
++;
8739 server
.vm_stats_swapouts
++;
8741 /* Put a few more swap requests in queue if we are still
8743 if (trytoswap
&& vmCanSwapOut() &&
8744 zmalloc_used_memory() > server
.vm_max_memory
)
8749 more
= listLength(server
.io_newjobs
) <
8750 (unsigned) server
.vm_max_threads
;
8752 /* Don't waste CPU time if swappable objects are rare. */
8753 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8761 if (processed
== toprocess
) return;
8763 if (retval
< 0 && errno
!= EAGAIN
) {
8764 redisLog(REDIS_WARNING
,
8765 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8770 static void lockThreadedIO(void) {
8771 pthread_mutex_lock(&server
.io_mutex
);
8774 static void unlockThreadedIO(void) {
8775 pthread_mutex_unlock(&server
.io_mutex
);
8778 /* Remove the specified object from the threaded I/O queue if still not
8779 * processed, otherwise make sure to flag it as canceled. */
8780 static void vmCancelThreadedIOJob(robj
*o
) {
8782 server
.io_newjobs
, /* 0 */
8783 server
.io_processing
, /* 1 */
8784 server
.io_processed
/* 2 */
8788 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8791 /* Search for a matching key in one of the queues */
8792 for (i
= 0; i
< 3; i
++) {
8796 listRewind(lists
[i
],&li
);
8797 while ((ln
= listNext(&li
)) != NULL
) {
8798 iojob
*job
= ln
->value
;
8800 if (job
->canceled
) continue; /* Skip this, already canceled. */
8801 if (compareStringObjects(job
->key
,o
) == 0) {
8802 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8803 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8804 /* Mark the pages as free since the swap didn't happened
8805 * or happened but is now discarded. */
8806 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8807 vmMarkPagesFree(job
->page
,job
->pages
);
8808 /* Cancel the job. It depends on the list the job is
8811 case 0: /* io_newjobs */
8812 /* If the job was yet not processed the best thing to do
8813 * is to remove it from the queue at all */
8815 listDelNode(lists
[i
],ln
);
8817 case 1: /* io_processing */
8818 /* Oh Shi- the thread is messing with the Job:
8820 * Probably it's accessing the object if this is a
8821 * PREPARE_SWAP or DO_SWAP job.
8822 * If it's a LOAD job it may be reading from disk and
8823 * if we don't wait for the job to terminate before to
8824 * cancel it, maybe in a few microseconds data can be
8825 * corrupted in this pages. So the short story is:
8827 * Better to wait for the job to move into the
8828 * next queue (processed)... */
8830 /* We try again and again until the job is completed. */
8832 /* But let's wait some time for the I/O thread
8833 * to finish with this job. After all this condition
8834 * should be very rare. */
8837 case 2: /* io_processed */
8838 /* The job was already processed, that's easy...
8839 * just mark it as canceled so that we'll ignore it
8840 * when processing completed jobs. */
8844 /* Finally we have to adjust the storage type of the object
8845 * in order to "UNDO" the operaiton. */
8846 if (o
->storage
== REDIS_VM_LOADING
)
8847 o
->storage
= REDIS_VM_SWAPPED
;
8848 else if (o
->storage
== REDIS_VM_SWAPPING
)
8849 o
->storage
= REDIS_VM_MEMORY
;
8856 assert(1 != 1); /* We should never reach this */
8859 static void *IOThreadEntryPoint(void *arg
) {
8864 pthread_detach(pthread_self());
8866 /* Get a new job to process */
8868 if (listLength(server
.io_newjobs
) == 0) {
8869 /* No new jobs in queue, exit. */
8870 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8871 (long) pthread_self());
8872 server
.io_active_threads
--;
8876 ln
= listFirst(server
.io_newjobs
);
8878 listDelNode(server
.io_newjobs
,ln
);
8879 /* Add the job in the processing queue */
8880 j
->thread
= pthread_self();
8881 listAddNodeTail(server
.io_processing
,j
);
8882 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8884 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8885 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8887 /* Process the Job */
8888 if (j
->type
== REDIS_IOJOB_LOAD
) {
8889 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8890 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8891 FILE *fp
= fopen("/dev/null","w+");
8892 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8894 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8895 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8899 /* Done: insert the job into the processed queue */
8900 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8901 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8903 listDelNode(server
.io_processing
,ln
);
8904 listAddNodeTail(server
.io_processed
,j
);
8907 /* Signal the main thread there is new stuff to process */
8908 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8910 return NULL
; /* never reached */
8913 static void spawnIOThread(void) {
8915 sigset_t mask
, omask
;
8919 sigaddset(&mask
,SIGCHLD
);
8920 sigaddset(&mask
,SIGHUP
);
8921 sigaddset(&mask
,SIGPIPE
);
8922 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8923 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8924 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8928 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8929 server
.io_active_threads
++;
8932 /* We need to wait for the last thread to exit before we are able to
8933 * fork() in order to BGSAVE or BGREWRITEAOF. */
8934 static void waitEmptyIOJobsQueue(void) {
8936 int io_processed_len
;
8939 if (listLength(server
.io_newjobs
) == 0 &&
8940 listLength(server
.io_processing
) == 0 &&
8941 server
.io_active_threads
== 0)
8946 /* While waiting for empty jobs queue condition we post-process some
8947 * finshed job, as I/O threads may be hanging trying to write against
8948 * the io_ready_pipe_write FD but there are so much pending jobs that
8950 io_processed_len
= listLength(server
.io_processed
);
8952 if (io_processed_len
) {
8953 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8954 usleep(1000); /* 1 millisecond */
8956 usleep(10000); /* 10 milliseconds */
8961 static void vmReopenSwapFile(void) {
8962 /* Note: we don't close the old one as we are in the child process
8963 * and don't want to mess at all with the original file object. */
8964 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8965 if (server
.vm_fp
== NULL
) {
8966 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8967 server
.vm_swap_file
);
8970 server
.vm_fd
= fileno(server
.vm_fp
);
8973 /* This function must be called while with threaded IO locked */
8974 static void queueIOJob(iojob
*j
) {
8975 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8976 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8977 listAddNodeTail(server
.io_newjobs
,j
);
8978 if (server
.io_active_threads
< server
.vm_max_threads
)
8982 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8985 assert(key
->storage
== REDIS_VM_MEMORY
);
8986 assert(key
->refcount
== 1);
8988 j
= zmalloc(sizeof(*j
));
8989 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8991 j
->key
= dupStringObject(key
);
8995 j
->thread
= (pthread_t
) -1;
8996 key
->storage
= REDIS_VM_SWAPPING
;
9004 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9006 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9007 * If there is not already a job loading the key, it is craeted.
9008 * The key is added to the io_keys list in the client structure, and also
9009 * in the hash table mapping swapped keys to waiting clients, that is,
9010 * server.io_waited_keys. */
9011 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9012 struct dictEntry
*de
;
9016 /* If the key does not exist or is already in RAM we don't need to
9017 * block the client at all. */
9018 de
= dictFind(c
->db
->dict
,key
);
9019 if (de
== NULL
) return 0;
9020 o
= dictGetEntryKey(de
);
9021 if (o
->storage
== REDIS_VM_MEMORY
) {
9023 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9024 /* We were swapping the key, undo it! */
9025 vmCancelThreadedIOJob(o
);
9029 /* OK: the key is either swapped, or being loaded just now. */
9031 /* Add the key to the list of keys this client is waiting for.
9032 * This maps clients to keys they are waiting for. */
9033 listAddNodeTail(c
->io_keys
,key
);
9036 /* Add the client to the swapped keys => clients waiting map. */
9037 de
= dictFind(c
->db
->io_keys
,key
);
9041 /* For every key we take a list of clients blocked for it */
9043 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9045 assert(retval
== DICT_OK
);
9047 l
= dictGetEntryVal(de
);
9049 listAddNodeTail(l
,c
);
9051 /* Are we already loading the key from disk? If not create a job */
9052 if (o
->storage
== REDIS_VM_SWAPPED
) {
9055 o
->storage
= REDIS_VM_LOADING
;
9056 j
= zmalloc(sizeof(*j
));
9057 j
->type
= REDIS_IOJOB_LOAD
;
9059 j
->key
= dupStringObject(key
);
9060 j
->key
->vtype
= o
->vtype
;
9061 j
->page
= o
->vm
.page
;
9064 j
->thread
= (pthread_t
) -1;
9072 /* Preload keys needed for the ZUNION and ZINTER commands. */
9073 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9075 num
= atoi(c
->argv
[2]->ptr
);
9076 for (i
= 0; i
< num
; i
++) {
9077 waitForSwappedKey(c
,c
->argv
[3+i
]);
9081 /* Is this client attempting to run a command against swapped keys?
9082 * If so, block it ASAP, load the keys in background, then resume it.
9084 * The important idea about this function is that it can fail! If keys will
9085 * still be swapped when the client is resumed, this key lookups will
9086 * just block loading keys from disk. In practical terms this should only
9087 * happen with SORT BY command or if there is a bug in this function.
9089 * Return 1 if the client is marked as blocked, 0 if the client can
9090 * continue as the keys it is going to access appear to be in memory. */
9091 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9094 if (cmd
->vm_preload_proc
!= NULL
) {
9095 cmd
->vm_preload_proc(c
);
9097 if (cmd
->vm_firstkey
== 0) return 0;
9098 last
= cmd
->vm_lastkey
;
9099 if (last
< 0) last
= c
->argc
+last
;
9100 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9101 waitForSwappedKey(c
,c
->argv
[j
]);
9104 /* If the client was blocked for at least one key, mark it as blocked. */
9105 if (listLength(c
->io_keys
)) {
9106 c
->flags
|= REDIS_IO_WAIT
;
9107 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9108 server
.vm_blocked_clients
++;
9115 /* Remove the 'key' from the list of blocked keys for a given client.
9117 * The function returns 1 when there are no longer blocking keys after
9118 * the current one was removed (and the client can be unblocked). */
9119 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9123 struct dictEntry
*de
;
9125 /* Remove the key from the list of keys this client is waiting for. */
9126 listRewind(c
->io_keys
,&li
);
9127 while ((ln
= listNext(&li
)) != NULL
) {
9128 if (compareStringObjects(ln
->value
,key
) == 0) {
9129 listDelNode(c
->io_keys
,ln
);
9135 /* Remove the client form the key => waiting clients map. */
9136 de
= dictFind(c
->db
->io_keys
,key
);
9138 l
= dictGetEntryVal(de
);
9139 ln
= listSearchKey(l
,c
);
9142 if (listLength(l
) == 0)
9143 dictDelete(c
->db
->io_keys
,key
);
9145 return listLength(c
->io_keys
) == 0;
9148 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9149 struct dictEntry
*de
;
9154 de
= dictFind(db
->io_keys
,key
);
9157 l
= dictGetEntryVal(de
);
9158 len
= listLength(l
);
9159 /* Note: we can't use something like while(listLength(l)) as the list
9160 * can be freed by the calling function when we remove the last element. */
9163 redisClient
*c
= ln
->value
;
9165 if (dontWaitForSwappedKey(c
,key
)) {
9166 /* Put the client in the list of clients ready to go as we
9167 * loaded all the keys about it. */
9168 listAddNodeTail(server
.io_ready_clients
,c
);
9173 /* =========================== Remote Configuration ========================= */
9175 static void configSetCommand(redisClient
*c
) {
9176 robj
*o
= getDecodedObject(c
->argv
[3]);
9177 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9178 zfree(server
.dbfilename
);
9179 server
.dbfilename
= zstrdup(o
->ptr
);
9180 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9181 zfree(server
.requirepass
);
9182 server
.requirepass
= zstrdup(o
->ptr
);
9183 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9184 zfree(server
.masterauth
);
9185 server
.masterauth
= zstrdup(o
->ptr
);
9186 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9187 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9189 addReplySds(c
,sdscatprintf(sdsempty(),
9190 "-ERR not supported CONFIG parameter %s\r\n",
9191 (char*)c
->argv
[2]->ptr
));
9196 addReply(c
,shared
.ok
);
9199 static void configGetCommand(redisClient
*c
) {
9200 robj
*o
= getDecodedObject(c
->argv
[2]);
9201 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9202 char *pattern
= o
->ptr
;
9206 decrRefCount(lenobj
);
9208 if (stringmatch(pattern
,"dbfilename",0)) {
9209 addReplyBulkCString(c
,"dbfilename");
9210 addReplyBulkCString(c
,server
.dbfilename
);
9213 if (stringmatch(pattern
,"requirepass",0)) {
9214 addReplyBulkCString(c
,"requirepass");
9215 addReplyBulkCString(c
,server
.requirepass
);
9218 if (stringmatch(pattern
,"masterauth",0)) {
9219 addReplyBulkCString(c
,"masterauth");
9220 addReplyBulkCString(c
,server
.masterauth
);
9223 if (stringmatch(pattern
,"maxmemory",0)) {
9226 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9227 addReplyBulkCString(c
,"maxmemory");
9228 addReplyBulkCString(c
,buf
);
9232 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9235 static void configCommand(redisClient
*c
) {
9236 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9237 if (c
->argc
!= 4) goto badarity
;
9238 configSetCommand(c
);
9239 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9240 if (c
->argc
!= 3) goto badarity
;
9241 configGetCommand(c
);
9242 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9243 if (c
->argc
!= 2) goto badarity
;
9244 server
.stat_numcommands
= 0;
9245 server
.stat_numconnections
= 0;
9246 server
.stat_expiredkeys
= 0;
9247 server
.stat_starttime
= time(NULL
);
9248 addReply(c
,shared
.ok
);
9250 addReplySds(c
,sdscatprintf(sdsempty(),
9251 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9256 addReplySds(c
,sdscatprintf(sdsempty(),
9257 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9258 (char*) c
->argv
[1]->ptr
));
9261 /* =========================== Pubsub implementation ======================== */
9263 /* Subscribe a client to a class. Returns 1 if the operation succeeded, or
9264 * 0 if the client was already subscribed to that class. */
9265 static int pubsubSubscribe(redisClient
*c
, robj
*class) {
9266 struct dictEntry
*de
;
9267 list
*clients
= NULL
;
9270 /* Add the class to the client -> classes hash table */
9271 if (dictAdd(c
->pubsub_classes
,class,NULL
) == DICT_OK
) {
9273 incrRefCount(class);
9274 /* Add the client to the class -> list of clients hash table */
9275 de
= dictFind(server
.pubsub_classes
,class);
9277 clients
= listCreate();
9278 dictAdd(server
.pubsub_classes
,class,clients
);
9279 incrRefCount(class);
9281 clients
= dictGetEntryVal(de
);
9283 listAddNodeTail(clients
,c
);
9285 /* Notify the client */
9286 addReply(c
,shared
.mbulk3
);
9287 addReply(c
,shared
.subscribebulk
);
9288 addReplyBulk(c
,class);
9289 addReplyLong(c
,dictSize(c
->pubsub_classes
));
9293 /* Unsubscribe a client from a class. Returns 1 if the operation succeeded, or
9294 * 0 if the client was not subscribed to the specified class. */
9295 static int pubsubUnsubscribe(redisClient
*c
, robj
*class, int notify
) {
9296 struct dictEntry
*de
;
9301 /* Remove the class from the client -> classes hash table */
9302 if (dictDelete(c
->pubsub_classes
,class) == DICT_OK
) {
9304 /* Remove the client from the class -> clients list hash table */
9305 de
= dictFind(server
.pubsub_classes
,class);
9307 clients
= dictGetEntryVal(de
);
9308 ln
= listSearchKey(clients
,c
);
9310 listDelNode(clients
,ln
);
9312 /* Notify the client */
9314 addReply(c
,shared
.mbulk3
);
9315 addReply(c
,shared
.unsubscribebulk
);
9316 addReplyBulk(c
,class);
9317 addReplyLong(c
,dictSize(c
->pubsub_classes
));
9322 /* Unsubscribe from all the classes. Return the number of classes the
9323 * client was subscribed to. */
9324 static int pubsubUnsubscribeAll(redisClient
*c
, int notify
) {
9325 dictIterator
*di
= dictGetIterator(c
->pubsub_classes
);
9329 while((de
= dictNext(di
)) != NULL
) {
9330 robj
*class = dictGetEntryKey(de
);
9332 count
+= pubsubUnsubscribe(c
,class,notify
);
9334 dictReleaseIterator(di
);
9338 /* Publish a message */
9339 static int pubsubPublishMessage(robj
*class, robj
*message
) {
9341 struct dictEntry
*de
;
9343 de
= dictFind(server
.pubsub_classes
,class);
9345 list
*list
= dictGetEntryVal(de
);
9349 listRewind(list
,&li
);
9350 while ((ln
= listNext(&li
)) != NULL
) {
9351 redisClient
*c
= ln
->value
;
9353 addReply(c
,shared
.mbulk3
);
9354 addReply(c
,shared
.messagebulk
);
9355 addReplyBulk(c
,class);
9356 addReplyBulk(c
,message
);
9363 static void subscribeCommand(redisClient
*c
) {
9366 for (j
= 1; j
< c
->argc
; j
++)
9367 pubsubSubscribe(c
,c
->argv
[j
]);
9370 static void unsubscribeCommand(redisClient
*c
) {
9372 pubsubUnsubscribeAll(c
,1);
9377 for (j
= 1; j
< c
->argc
; j
++)
9378 pubsubUnsubscribe(c
,c
->argv
[j
],1);
9382 static void publishCommand(redisClient
*c
) {
9383 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9384 addReplyLong(c
,receivers
);
9387 /* ================================= Debugging ============================== */
9389 static void debugCommand(redisClient
*c
) {
9390 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9392 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9393 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9394 addReply(c
,shared
.err
);
9398 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9399 addReply(c
,shared
.err
);
9402 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9403 addReply(c
,shared
.ok
);
9404 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9406 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9407 addReply(c
,shared
.err
);
9410 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9411 addReply(c
,shared
.ok
);
9412 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9413 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9417 addReply(c
,shared
.nokeyerr
);
9420 key
= dictGetEntryKey(de
);
9421 val
= dictGetEntryVal(de
);
9422 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9423 key
->storage
== REDIS_VM_SWAPPING
)) {
9427 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9428 strenc
= strencoding
[val
->encoding
];
9430 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9433 addReplySds(c
,sdscatprintf(sdsempty(),
9434 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9435 "encoding:%s serializedlength:%lld\r\n",
9436 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9437 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9439 addReplySds(c
,sdscatprintf(sdsempty(),
9440 "+Key at:%p refcount:%d, value swapped at: page %llu "
9441 "using %llu pages\r\n",
9442 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9443 (unsigned long long) key
->vm
.usedpages
));
9445 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9446 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9449 if (!server
.vm_enabled
) {
9450 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9454 addReply(c
,shared
.nokeyerr
);
9457 key
= dictGetEntryKey(de
);
9458 val
= dictGetEntryVal(de
);
9459 /* If the key is shared we want to create a copy */
9460 if (key
->refcount
> 1) {
9461 robj
*newkey
= dupStringObject(key
);
9463 key
= dictGetEntryKey(de
) = newkey
;
9466 if (key
->storage
!= REDIS_VM_MEMORY
) {
9467 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9468 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9469 dictGetEntryVal(de
) = NULL
;
9470 addReply(c
,shared
.ok
);
9472 addReply(c
,shared
.err
);
9475 addReplySds(c
,sdsnew(
9476 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9480 static void _redisAssert(char *estr
, char *file
, int line
) {
9481 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9482 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9483 #ifdef HAVE_BACKTRACE
9484 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9489 /* =================================== Main! ================================ */
9492 int linuxOvercommitMemoryValue(void) {
9493 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9497 if (fgets(buf
,64,fp
) == NULL
) {
9506 void linuxOvercommitMemoryWarning(void) {
9507 if (linuxOvercommitMemoryValue() == 0) {
9508 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9511 #endif /* __linux__ */
9513 static void daemonize(void) {
9517 if (fork() != 0) exit(0); /* parent exits */
9518 setsid(); /* create a new session */
9520 /* Every output goes to /dev/null. If Redis is daemonized but
9521 * the 'logfile' is set to 'stdout' in the configuration file
9522 * it will not log at all. */
9523 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9524 dup2(fd
, STDIN_FILENO
);
9525 dup2(fd
, STDOUT_FILENO
);
9526 dup2(fd
, STDERR_FILENO
);
9527 if (fd
> STDERR_FILENO
) close(fd
);
9529 /* Try to write the pid file */
9530 fp
= fopen(server
.pidfile
,"w");
9532 fprintf(fp
,"%d\n",getpid());
9537 static void version() {
9538 printf("Redis server version %s\n", REDIS_VERSION
);
9542 static void usage() {
9543 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9544 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9548 int main(int argc
, char **argv
) {
9553 if (strcmp(argv
[1], "-v") == 0 ||
9554 strcmp(argv
[1], "--version") == 0) version();
9555 if (strcmp(argv
[1], "--help") == 0) usage();
9556 resetServerSaveParams();
9557 loadServerConfig(argv
[1]);
9558 } else if ((argc
> 2)) {
9561 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9563 if (server
.daemonize
) daemonize();
9565 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9567 linuxOvercommitMemoryWarning();
9570 if (server
.appendonly
) {
9571 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9572 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9574 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9575 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9577 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9578 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9580 aeDeleteEventLoop(server
.el
);
9584 /* ============================= Backtrace support ========================= */
9586 #ifdef HAVE_BACKTRACE
9587 static char *findFuncName(void *pointer
, unsigned long *offset
);
9589 static void *getMcontextEip(ucontext_t
*uc
) {
9590 #if defined(__FreeBSD__)
9591 return (void*) uc
->uc_mcontext
.mc_eip
;
9592 #elif defined(__dietlibc__)
9593 return (void*) uc
->uc_mcontext
.eip
;
9594 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9596 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9598 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9600 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9601 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9602 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9604 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9606 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9607 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9608 #elif defined(__ia64__) /* Linux IA64 */
9609 return (void*) uc
->uc_mcontext
.sc_ip
;
9615 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9617 char **messages
= NULL
;
9618 int i
, trace_size
= 0;
9619 unsigned long offset
=0;
9620 ucontext_t
*uc
= (ucontext_t
*) secret
;
9622 REDIS_NOTUSED(info
);
9624 redisLog(REDIS_WARNING
,
9625 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9626 infostring
= genRedisInfoString();
9627 redisLog(REDIS_WARNING
, "%s",infostring
);
9628 /* It's not safe to sdsfree() the returned string under memory
9629 * corruption conditions. Let it leak as we are going to abort */
9631 trace_size
= backtrace(trace
, 100);
9632 /* overwrite sigaction with caller's address */
9633 if (getMcontextEip(uc
) != NULL
) {
9634 trace
[1] = getMcontextEip(uc
);
9636 messages
= backtrace_symbols(trace
, trace_size
);
9638 for (i
=1; i
<trace_size
; ++i
) {
9639 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9641 p
= strchr(messages
[i
],'+');
9642 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9643 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9645 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9648 /* free(messages); Don't call free() with possibly corrupted memory. */
9652 static void setupSigSegvAction(void) {
9653 struct sigaction act
;
9655 sigemptyset (&act
.sa_mask
);
9656 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9657 * is used. Otherwise, sa_handler is used */
9658 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9659 act
.sa_sigaction
= segvHandler
;
9660 sigaction (SIGSEGV
, &act
, NULL
);
9661 sigaction (SIGBUS
, &act
, NULL
);
9662 sigaction (SIGFPE
, &act
, NULL
);
9663 sigaction (SIGILL
, &act
, NULL
);
9664 sigaction (SIGBUS
, &act
, NULL
);
9668 #include "staticsymbols.h"
9669 /* This function try to convert a pointer into a function name. It's used in
9670 * oreder to provide a backtrace under segmentation fault that's able to
9671 * display functions declared as static (otherwise the backtrace is useless). */
9672 static char *findFuncName(void *pointer
, unsigned long *offset
){
9674 unsigned long off
, minoff
= 0;
9676 /* Try to match against the Symbol with the smallest offset */
9677 for (i
=0; symsTable
[i
].pointer
; i
++) {
9678 unsigned long lp
= (unsigned long) pointer
;
9680 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9681 off
=lp
-symsTable
[i
].pointer
;
9682 if (ret
< 0 || off
< minoff
) {
9688 if (ret
== -1) return NULL
;
9690 return symsTable
[ret
].name
;
9692 #else /* HAVE_BACKTRACE */
9693 static void setupSigSegvAction(void) {
9695 #endif /* HAVE_BACKTRACE */