2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
330 dict
*pubsub_classes
; /* Classes a client is interested in (SUBSCRIBE) */
338 /* Global server state structure */
343 dict
*sharingpool
; /* Poll used for object sharing */
344 unsigned int sharingpoolsize
;
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_classes
; /* Associate classes to list of subscribed clients */
445 typedef void redisCommandProc(redisClient
*c
);
446 struct redisCommand
{
448 redisCommandProc
*proc
;
451 /* Use a function to determine which keys need to be loaded
452 * in the background prior to executing this command. Takes precedence
453 * over vm_firstkey and others, ignored when NULL */
454 redisCommandProc
*vm_preload_proc
;
455 /* What keys should be loaded in background when calling this command? */
456 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
457 int vm_lastkey
; /* THe last argument that's a key */
458 int vm_keystep
; /* The step between first and last key */
461 struct redisFunctionSym
{
463 unsigned long pointer
;
466 typedef struct _redisSortObject
{
474 typedef struct _redisSortOperation
{
477 } redisSortOperation
;
479 /* ZSETs use a specialized version of Skiplists */
481 typedef struct zskiplistNode
{
482 struct zskiplistNode
**forward
;
483 struct zskiplistNode
*backward
;
489 typedef struct zskiplist
{
490 struct zskiplistNode
*header
, *tail
;
491 unsigned long length
;
495 typedef struct zset
{
500 /* Our shared "common" objects */
502 struct sharedObjectsStruct
{
503 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
504 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
505 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
506 *outofrangeerr
, *plus
,
507 *select0
, *select1
, *select2
, *select3
, *select4
,
508 *select5
, *select6
, *select7
, *select8
, *select9
,
509 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
;
512 /* Global vars that are actally used as constants. The following double
513 * values are used for double on-disk serialization, and are initialized
514 * at runtime to avoid strange compiler optimizations. */
516 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
518 /* VM threaded I/O request message */
519 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
520 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
521 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
522 typedef struct iojob
{
523 int type
; /* Request type, REDIS_IOJOB_* */
524 redisDb
*db
;/* Redis database */
525 robj
*key
; /* This I/O request is about swapping this key */
526 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
527 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
528 off_t page
; /* Swap page where to read/write the object */
529 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
530 int canceled
; /* True if this command was canceled by blocking side of VM */
531 pthread_t thread
; /* ID of the thread processing this entry */
534 /*================================ Prototypes =============================== */
536 static void freeStringObject(robj
*o
);
537 static void freeListObject(robj
*o
);
538 static void freeSetObject(robj
*o
);
539 static void decrRefCount(void *o
);
540 static robj
*createObject(int type
, void *ptr
);
541 static void freeClient(redisClient
*c
);
542 static int rdbLoad(char *filename
);
543 static void addReply(redisClient
*c
, robj
*obj
);
544 static void addReplySds(redisClient
*c
, sds s
);
545 static void incrRefCount(robj
*o
);
546 static int rdbSaveBackground(char *filename
);
547 static robj
*createStringObject(char *ptr
, size_t len
);
548 static robj
*dupStringObject(robj
*o
);
549 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
550 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
551 static int syncWithMaster(void);
552 static robj
*tryObjectSharing(robj
*o
);
553 static int tryObjectEncoding(robj
*o
);
554 static robj
*getDecodedObject(robj
*o
);
555 static int removeExpire(redisDb
*db
, robj
*key
);
556 static int expireIfNeeded(redisDb
*db
, robj
*key
);
557 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
558 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
559 static int deleteKey(redisDb
*db
, robj
*key
);
560 static time_t getExpire(redisDb
*db
, robj
*key
);
561 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
562 static void updateSlavesWaitingBgsave(int bgsaveerr
);
563 static void freeMemoryIfNeeded(void);
564 static int processCommand(redisClient
*c
);
565 static void setupSigSegvAction(void);
566 static void rdbRemoveTempFile(pid_t childpid
);
567 static void aofRemoveTempFile(pid_t childpid
);
568 static size_t stringObjectLen(robj
*o
);
569 static void processInputBuffer(redisClient
*c
);
570 static zskiplist
*zslCreate(void);
571 static void zslFree(zskiplist
*zsl
);
572 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
573 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
574 static void initClientMultiState(redisClient
*c
);
575 static void freeClientMultiState(redisClient
*c
);
576 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
577 static void unblockClientWaitingData(redisClient
*c
);
578 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
579 static void vmInit(void);
580 static void vmMarkPagesFree(off_t page
, off_t count
);
581 static robj
*vmLoadObject(robj
*key
);
582 static robj
*vmPreviewObject(robj
*key
);
583 static int vmSwapOneObjectBlocking(void);
584 static int vmSwapOneObjectThreaded(void);
585 static int vmCanSwapOut(void);
586 static int tryFreeOneObjectFromFreelist(void);
587 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
588 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
589 static void vmCancelThreadedIOJob(robj
*o
);
590 static void lockThreadedIO(void);
591 static void unlockThreadedIO(void);
592 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
593 static void freeIOJob(iojob
*j
);
594 static void queueIOJob(iojob
*j
);
595 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
596 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
597 static void waitEmptyIOJobsQueue(void);
598 static void vmReopenSwapFile(void);
599 static int vmFreePage(off_t page
);
600 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
601 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
602 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
603 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
604 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
605 static struct redisCommand
*lookupCommand(char *name
);
606 static void call(redisClient
*c
, struct redisCommand
*cmd
);
607 static void resetClient(redisClient
*c
);
608 static void convertToRealHash(robj
*o
);
609 static int pubsubUnsubscribeAll(redisClient
*c
, int notify
);
612 static void authCommand(redisClient
*c
);
613 static void pingCommand(redisClient
*c
);
614 static void echoCommand(redisClient
*c
);
615 static void setCommand(redisClient
*c
);
616 static void setnxCommand(redisClient
*c
);
617 static void getCommand(redisClient
*c
);
618 static void delCommand(redisClient
*c
);
619 static void existsCommand(redisClient
*c
);
620 static void incrCommand(redisClient
*c
);
621 static void decrCommand(redisClient
*c
);
622 static void incrbyCommand(redisClient
*c
);
623 static void decrbyCommand(redisClient
*c
);
624 static void selectCommand(redisClient
*c
);
625 static void randomkeyCommand(redisClient
*c
);
626 static void keysCommand(redisClient
*c
);
627 static void dbsizeCommand(redisClient
*c
);
628 static void lastsaveCommand(redisClient
*c
);
629 static void saveCommand(redisClient
*c
);
630 static void bgsaveCommand(redisClient
*c
);
631 static void bgrewriteaofCommand(redisClient
*c
);
632 static void shutdownCommand(redisClient
*c
);
633 static void moveCommand(redisClient
*c
);
634 static void renameCommand(redisClient
*c
);
635 static void renamenxCommand(redisClient
*c
);
636 static void lpushCommand(redisClient
*c
);
637 static void rpushCommand(redisClient
*c
);
638 static void lpopCommand(redisClient
*c
);
639 static void rpopCommand(redisClient
*c
);
640 static void llenCommand(redisClient
*c
);
641 static void lindexCommand(redisClient
*c
);
642 static void lrangeCommand(redisClient
*c
);
643 static void ltrimCommand(redisClient
*c
);
644 static void typeCommand(redisClient
*c
);
645 static void lsetCommand(redisClient
*c
);
646 static void saddCommand(redisClient
*c
);
647 static void sremCommand(redisClient
*c
);
648 static void smoveCommand(redisClient
*c
);
649 static void sismemberCommand(redisClient
*c
);
650 static void scardCommand(redisClient
*c
);
651 static void spopCommand(redisClient
*c
);
652 static void srandmemberCommand(redisClient
*c
);
653 static void sinterCommand(redisClient
*c
);
654 static void sinterstoreCommand(redisClient
*c
);
655 static void sunionCommand(redisClient
*c
);
656 static void sunionstoreCommand(redisClient
*c
);
657 static void sdiffCommand(redisClient
*c
);
658 static void sdiffstoreCommand(redisClient
*c
);
659 static void syncCommand(redisClient
*c
);
660 static void flushdbCommand(redisClient
*c
);
661 static void flushallCommand(redisClient
*c
);
662 static void sortCommand(redisClient
*c
);
663 static void lremCommand(redisClient
*c
);
664 static void rpoplpushcommand(redisClient
*c
);
665 static void infoCommand(redisClient
*c
);
666 static void mgetCommand(redisClient
*c
);
667 static void monitorCommand(redisClient
*c
);
668 static void expireCommand(redisClient
*c
);
669 static void expireatCommand(redisClient
*c
);
670 static void getsetCommand(redisClient
*c
);
671 static void ttlCommand(redisClient
*c
);
672 static void slaveofCommand(redisClient
*c
);
673 static void debugCommand(redisClient
*c
);
674 static void msetCommand(redisClient
*c
);
675 static void msetnxCommand(redisClient
*c
);
676 static void zaddCommand(redisClient
*c
);
677 static void zincrbyCommand(redisClient
*c
);
678 static void zrangeCommand(redisClient
*c
);
679 static void zrangebyscoreCommand(redisClient
*c
);
680 static void zcountCommand(redisClient
*c
);
681 static void zrevrangeCommand(redisClient
*c
);
682 static void zcardCommand(redisClient
*c
);
683 static void zremCommand(redisClient
*c
);
684 static void zscoreCommand(redisClient
*c
);
685 static void zremrangebyscoreCommand(redisClient
*c
);
686 static void multiCommand(redisClient
*c
);
687 static void execCommand(redisClient
*c
);
688 static void discardCommand(redisClient
*c
);
689 static void blpopCommand(redisClient
*c
);
690 static void brpopCommand(redisClient
*c
);
691 static void appendCommand(redisClient
*c
);
692 static void substrCommand(redisClient
*c
);
693 static void zrankCommand(redisClient
*c
);
694 static void zrevrankCommand(redisClient
*c
);
695 static void hsetCommand(redisClient
*c
);
696 static void hgetCommand(redisClient
*c
);
697 static void hdelCommand(redisClient
*c
);
698 static void hlenCommand(redisClient
*c
);
699 static void zremrangebyrankCommand(redisClient
*c
);
700 static void zunionCommand(redisClient
*c
);
701 static void zinterCommand(redisClient
*c
);
702 static void hkeysCommand(redisClient
*c
);
703 static void hvalsCommand(redisClient
*c
);
704 static void hgetallCommand(redisClient
*c
);
705 static void hexistsCommand(redisClient
*c
);
706 static void configCommand(redisClient
*c
);
707 static void hincrbyCommand(redisClient
*c
);
708 static void subscribeCommand(redisClient
*c
);
709 static void unsubscribeCommand(redisClient
*c
);
710 static void publishCommand(redisClient
*c
);
712 /*================================= Globals ================================= */
715 static struct redisServer server
; /* server global state */
716 static struct redisCommand cmdTable
[] = {
717 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
718 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
719 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
720 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
721 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
723 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
725 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
726 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
727 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
728 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
729 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
730 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
731 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
732 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
733 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
736 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
738 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
739 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
740 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
742 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
743 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
744 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
748 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
749 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
750 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
751 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
752 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
753 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
755 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
760 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
761 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
768 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
769 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
772 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
773 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
778 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
780 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
782 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
783 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
785 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
789 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
791 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
795 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
796 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
799 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
808 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
814 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"publish",publishCommand
,3,REDIS_CMD_BULK
,NULL
,0,0,0},
817 {NULL
,NULL
,0,0,NULL
,0,0,0}
820 /*============================ Utility functions ============================ */
822 /* Glob-style pattern matching. */
823 static int stringmatchlen(const char *pattern
, int patternLen
,
824 const char *string
, int stringLen
, int nocase
)
829 while (pattern
[1] == '*') {
834 return 1; /* match */
836 if (stringmatchlen(pattern
+1, patternLen
-1,
837 string
, stringLen
, nocase
))
838 return 1; /* match */
842 return 0; /* no match */
846 return 0; /* no match */
856 not = pattern
[0] == '^';
863 if (pattern
[0] == '\\') {
866 if (pattern
[0] == string
[0])
868 } else if (pattern
[0] == ']') {
870 } else if (patternLen
== 0) {
874 } else if (pattern
[1] == '-' && patternLen
>= 3) {
875 int start
= pattern
[0];
876 int end
= pattern
[2];
884 start
= tolower(start
);
890 if (c
>= start
&& c
<= end
)
894 if (pattern
[0] == string
[0])
897 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
907 return 0; /* no match */
913 if (patternLen
>= 2) {
920 if (pattern
[0] != string
[0])
921 return 0; /* no match */
923 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
924 return 0; /* no match */
932 if (stringLen
== 0) {
933 while(*pattern
== '*') {
940 if (patternLen
== 0 && stringLen
== 0)
945 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
946 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
949 static void redisLog(int level
, const char *fmt
, ...) {
953 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
957 if (level
>= server
.verbosity
) {
963 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
964 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
965 vfprintf(fp
, fmt
, ap
);
971 if (server
.logfile
) fclose(fp
);
974 /*====================== Hash table type implementation ==================== */
976 /* This is an hash table type that uses the SDS dynamic strings libary as
977 * keys and radis objects as values (objects can hold SDS strings,
980 static void dictVanillaFree(void *privdata
, void *val
)
982 DICT_NOTUSED(privdata
);
986 static void dictListDestructor(void *privdata
, void *val
)
988 DICT_NOTUSED(privdata
);
989 listRelease((list
*)val
);
992 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
996 DICT_NOTUSED(privdata
);
998 l1
= sdslen((sds
)key1
);
999 l2
= sdslen((sds
)key2
);
1000 if (l1
!= l2
) return 0;
1001 return memcmp(key1
, key2
, l1
) == 0;
1004 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1006 DICT_NOTUSED(privdata
);
1008 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1012 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1015 const robj
*o1
= key1
, *o2
= key2
;
1016 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1019 static unsigned int dictObjHash(const void *key
) {
1020 const robj
*o
= key
;
1021 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1024 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1027 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1030 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1031 o2
->encoding
== REDIS_ENCODING_INT
&&
1032 o1
->ptr
== o2
->ptr
) return 1;
1034 o1
= getDecodedObject(o1
);
1035 o2
= getDecodedObject(o2
);
1036 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1042 static unsigned int dictEncObjHash(const void *key
) {
1043 robj
*o
= (robj
*) key
;
1045 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1046 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1048 if (o
->encoding
== REDIS_ENCODING_INT
) {
1052 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1053 return dictGenHashFunction((unsigned char*)buf
, len
);
1057 o
= getDecodedObject(o
);
1058 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1065 /* Sets type and expires */
1066 static dictType setDictType
= {
1067 dictEncObjHash
, /* hash function */
1070 dictEncObjKeyCompare
, /* key compare */
1071 dictRedisObjectDestructor
, /* key destructor */
1072 NULL
/* val destructor */
1075 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1076 static dictType zsetDictType
= {
1077 dictEncObjHash
, /* hash function */
1080 dictEncObjKeyCompare
, /* key compare */
1081 dictRedisObjectDestructor
, /* key destructor */
1082 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1086 static dictType dbDictType
= {
1087 dictObjHash
, /* hash function */
1090 dictObjKeyCompare
, /* key compare */
1091 dictRedisObjectDestructor
, /* key destructor */
1092 dictRedisObjectDestructor
/* val destructor */
1096 static dictType keyptrDictType
= {
1097 dictObjHash
, /* hash function */
1100 dictObjKeyCompare
, /* key compare */
1101 dictRedisObjectDestructor
, /* key destructor */
1102 NULL
/* val destructor */
1105 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1106 static dictType hashDictType
= {
1107 dictEncObjHash
, /* hash function */
1110 dictEncObjKeyCompare
, /* key compare */
1111 dictRedisObjectDestructor
, /* key destructor */
1112 dictRedisObjectDestructor
/* val destructor */
1115 /* Keylist hash table type has unencoded redis objects as keys and
1116 * lists as values. It's used for blocking operations (BLPOP) and to
1117 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1118 static dictType keylistDictType
= {
1119 dictObjHash
, /* hash function */
1122 dictObjKeyCompare
, /* key compare */
1123 dictRedisObjectDestructor
, /* key destructor */
1124 dictListDestructor
/* val destructor */
1127 static void version();
1129 /* ========================= Random utility functions ======================= */
1131 /* Redis generally does not try to recover from out of memory conditions
1132 * when allocating objects or strings, it is not clear if it will be possible
1133 * to report this condition to the client since the networking layer itself
1134 * is based on heap allocation for send buffers, so we simply abort.
1135 * At least the code will be simpler to read... */
1136 static void oom(const char *msg
) {
1137 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1142 /* ====================== Redis server networking stuff ===================== */
1143 static void closeTimedoutClients(void) {
1146 time_t now
= time(NULL
);
1149 listRewind(server
.clients
,&li
);
1150 while ((ln
= listNext(&li
)) != NULL
) {
1151 c
= listNodeValue(ln
);
1152 if (server
.maxidletime
&&
1153 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1154 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1155 dictSize(c
->pubsub_classes
) == 0 && /* no timeout for pubsub */
1156 (now
- c
->lastinteraction
> server
.maxidletime
))
1158 redisLog(REDIS_VERBOSE
,"Closing idle client");
1160 } else if (c
->flags
& REDIS_BLOCKED
) {
1161 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1162 addReply(c
,shared
.nullmultibulk
);
1163 unblockClientWaitingData(c
);
1169 static int htNeedsResize(dict
*dict
) {
1170 long long size
, used
;
1172 size
= dictSlots(dict
);
1173 used
= dictSize(dict
);
1174 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1175 (used
*100/size
< REDIS_HT_MINFILL
));
1178 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1179 * we resize the hash table to save memory */
1180 static void tryResizeHashTables(void) {
1183 for (j
= 0; j
< server
.dbnum
; j
++) {
1184 if (htNeedsResize(server
.db
[j
].dict
)) {
1185 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1186 dictResize(server
.db
[j
].dict
);
1187 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1189 if (htNeedsResize(server
.db
[j
].expires
))
1190 dictResize(server
.db
[j
].expires
);
1194 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1195 void backgroundSaveDoneHandler(int statloc
) {
1196 int exitcode
= WEXITSTATUS(statloc
);
1197 int bysignal
= WIFSIGNALED(statloc
);
1199 if (!bysignal
&& exitcode
== 0) {
1200 redisLog(REDIS_NOTICE
,
1201 "Background saving terminated with success");
1203 server
.lastsave
= time(NULL
);
1204 } else if (!bysignal
&& exitcode
!= 0) {
1205 redisLog(REDIS_WARNING
, "Background saving error");
1207 redisLog(REDIS_WARNING
,
1208 "Background saving terminated by signal");
1209 rdbRemoveTempFile(server
.bgsavechildpid
);
1211 server
.bgsavechildpid
= -1;
1212 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1213 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1214 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1217 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1219 void backgroundRewriteDoneHandler(int statloc
) {
1220 int exitcode
= WEXITSTATUS(statloc
);
1221 int bysignal
= WIFSIGNALED(statloc
);
1223 if (!bysignal
&& exitcode
== 0) {
1227 redisLog(REDIS_NOTICE
,
1228 "Background append only file rewriting terminated with success");
1229 /* Now it's time to flush the differences accumulated by the parent */
1230 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1231 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1233 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1236 /* Flush our data... */
1237 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1238 (signed) sdslen(server
.bgrewritebuf
)) {
1239 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1243 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1244 /* Now our work is to rename the temp file into the stable file. And
1245 * switch the file descriptor used by the server for append only. */
1246 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1247 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1251 /* Mission completed... almost */
1252 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1253 if (server
.appendfd
!= -1) {
1254 /* If append only is actually enabled... */
1255 close(server
.appendfd
);
1256 server
.appendfd
= fd
;
1258 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1259 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1261 /* If append only is disabled we just generate a dump in this
1262 * format. Why not? */
1265 } else if (!bysignal
&& exitcode
!= 0) {
1266 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1268 redisLog(REDIS_WARNING
,
1269 "Background append only file rewriting terminated by signal");
1272 sdsfree(server
.bgrewritebuf
);
1273 server
.bgrewritebuf
= sdsempty();
1274 aofRemoveTempFile(server
.bgrewritechildpid
);
1275 server
.bgrewritechildpid
= -1;
1278 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1279 int j
, loops
= server
.cronloops
++;
1280 REDIS_NOTUSED(eventLoop
);
1282 REDIS_NOTUSED(clientData
);
1284 /* We take a cached value of the unix time in the global state because
1285 * with virtual memory and aging there is to store the current time
1286 * in objects at every object access, and accuracy is not needed.
1287 * To access a global var is faster than calling time(NULL) */
1288 server
.unixtime
= time(NULL
);
1290 /* Show some info about non-empty databases */
1291 for (j
= 0; j
< server
.dbnum
; j
++) {
1292 long long size
, used
, vkeys
;
1294 size
= dictSlots(server
.db
[j
].dict
);
1295 used
= dictSize(server
.db
[j
].dict
);
1296 vkeys
= dictSize(server
.db
[j
].expires
);
1297 if (!(loops
% 50) && (used
|| vkeys
)) {
1298 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1299 /* dictPrintStats(server.dict); */
1303 /* We don't want to resize the hash tables while a bacground saving
1304 * is in progress: the saving child is created using fork() that is
1305 * implemented with a copy-on-write semantic in most modern systems, so
1306 * if we resize the HT while there is the saving child at work actually
1307 * a lot of memory movements in the parent will cause a lot of pages
1309 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1311 /* Show information about connected clients */
1312 if (!(loops
% 50)) {
1313 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1314 listLength(server
.clients
)-listLength(server
.slaves
),
1315 listLength(server
.slaves
),
1316 zmalloc_used_memory(),
1317 dictSize(server
.sharingpool
));
1320 /* Close connections of timedout clients */
1321 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1322 closeTimedoutClients();
1324 /* Check if a background saving or AOF rewrite in progress terminated */
1325 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1329 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1330 if (pid
== server
.bgsavechildpid
) {
1331 backgroundSaveDoneHandler(statloc
);
1333 backgroundRewriteDoneHandler(statloc
);
1337 /* If there is not a background saving in progress check if
1338 * we have to save now */
1339 time_t now
= time(NULL
);
1340 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1341 struct saveparam
*sp
= server
.saveparams
+j
;
1343 if (server
.dirty
>= sp
->changes
&&
1344 now
-server
.lastsave
> sp
->seconds
) {
1345 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1346 sp
->changes
, sp
->seconds
);
1347 rdbSaveBackground(server
.dbfilename
);
1353 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1354 * will use few CPU cycles if there are few expiring keys, otherwise
1355 * it will get more aggressive to avoid that too much memory is used by
1356 * keys that can be removed from the keyspace. */
1357 for (j
= 0; j
< server
.dbnum
; j
++) {
1359 redisDb
*db
= server
.db
+j
;
1361 /* Continue to expire if at the end of the cycle more than 25%
1362 * of the keys were expired. */
1364 long num
= dictSize(db
->expires
);
1365 time_t now
= time(NULL
);
1368 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1369 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1374 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1375 t
= (time_t) dictGetEntryVal(de
);
1377 deleteKey(db
,dictGetEntryKey(de
));
1379 server
.stat_expiredkeys
++;
1382 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1385 /* Swap a few keys on disk if we are over the memory limit and VM
1386 * is enbled. Try to free objects from the free list first. */
1387 if (vmCanSwapOut()) {
1388 while (server
.vm_enabled
&& zmalloc_used_memory() >
1389 server
.vm_max_memory
)
1393 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1394 retval
= (server
.vm_max_threads
== 0) ?
1395 vmSwapOneObjectBlocking() :
1396 vmSwapOneObjectThreaded();
1397 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1398 zmalloc_used_memory() >
1399 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1401 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1403 /* Note that when using threade I/O we free just one object,
1404 * because anyway when the I/O thread in charge to swap this
1405 * object out will finish, the handler of completed jobs
1406 * will try to swap more objects if we are still out of memory. */
1407 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1411 /* Check if we should connect to a MASTER */
1412 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1413 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1414 if (syncWithMaster() == REDIS_OK
) {
1415 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1421 /* This function gets called every time Redis is entering the
1422 * main loop of the event driven library, that is, before to sleep
1423 * for ready file descriptors. */
1424 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1425 REDIS_NOTUSED(eventLoop
);
1427 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1431 listRewind(server
.io_ready_clients
,&li
);
1432 while((ln
= listNext(&li
))) {
1433 redisClient
*c
= ln
->value
;
1434 struct redisCommand
*cmd
;
1436 /* Resume the client. */
1437 listDelNode(server
.io_ready_clients
,ln
);
1438 c
->flags
&= (~REDIS_IO_WAIT
);
1439 server
.vm_blocked_clients
--;
1440 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1441 readQueryFromClient
, c
);
1442 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1443 assert(cmd
!= NULL
);
1446 /* There may be more data to process in the input buffer. */
1447 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1448 processInputBuffer(c
);
1453 static void createSharedObjects(void) {
1454 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1455 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1456 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1457 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1458 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1459 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1460 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1461 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1462 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1463 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1464 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1465 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1466 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1467 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1468 "-ERR no such key\r\n"));
1469 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1470 "-ERR syntax error\r\n"));
1471 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1472 "-ERR source and destination objects are the same\r\n"));
1473 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1474 "-ERR index out of range\r\n"));
1475 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1476 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1477 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1478 shared
.select0
= createStringObject("select 0\r\n",10);
1479 shared
.select1
= createStringObject("select 1\r\n",10);
1480 shared
.select2
= createStringObject("select 2\r\n",10);
1481 shared
.select3
= createStringObject("select 3\r\n",10);
1482 shared
.select4
= createStringObject("select 4\r\n",10);
1483 shared
.select5
= createStringObject("select 5\r\n",10);
1484 shared
.select6
= createStringObject("select 6\r\n",10);
1485 shared
.select7
= createStringObject("select 7\r\n",10);
1486 shared
.select8
= createStringObject("select 8\r\n",10);
1487 shared
.select9
= createStringObject("select 9\r\n",10);
1488 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1489 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1490 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1491 shared
.mbulk3
= createStringObject("*3\r\n",4);
1494 static void appendServerSaveParams(time_t seconds
, int changes
) {
1495 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1496 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1497 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1498 server
.saveparamslen
++;
1501 static void resetServerSaveParams() {
1502 zfree(server
.saveparams
);
1503 server
.saveparams
= NULL
;
1504 server
.saveparamslen
= 0;
1507 static void initServerConfig() {
1508 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1509 server
.port
= REDIS_SERVERPORT
;
1510 server
.verbosity
= REDIS_VERBOSE
;
1511 server
.maxidletime
= REDIS_MAXIDLETIME
;
1512 server
.saveparams
= NULL
;
1513 server
.logfile
= NULL
; /* NULL = log on standard output */
1514 server
.bindaddr
= NULL
;
1515 server
.glueoutputbuf
= 1;
1516 server
.daemonize
= 0;
1517 server
.appendonly
= 0;
1518 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1519 server
.lastfsync
= time(NULL
);
1520 server
.appendfd
= -1;
1521 server
.appendseldb
= -1; /* Make sure the first time will not match */
1522 server
.pidfile
= zstrdup("/var/run/redis.pid");
1523 server
.dbfilename
= zstrdup("dump.rdb");
1524 server
.appendfilename
= zstrdup("appendonly.aof");
1525 server
.requirepass
= NULL
;
1526 server
.shareobjects
= 0;
1527 server
.rdbcompression
= 1;
1528 server
.sharingpoolsize
= 1024;
1529 server
.maxclients
= 0;
1530 server
.blpop_blocked_clients
= 0;
1531 server
.maxmemory
= 0;
1532 server
.vm_enabled
= 0;
1533 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1534 server
.vm_page_size
= 256; /* 256 bytes per page */
1535 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1536 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1537 server
.vm_max_threads
= 4;
1538 server
.vm_blocked_clients
= 0;
1539 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1540 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1542 resetServerSaveParams();
1544 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1545 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1546 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1547 /* Replication related */
1549 server
.masterauth
= NULL
;
1550 server
.masterhost
= NULL
;
1551 server
.masterport
= 6379;
1552 server
.master
= NULL
;
1553 server
.replstate
= REDIS_REPL_NONE
;
1555 /* Double constants initialization */
1557 R_PosInf
= 1.0/R_Zero
;
1558 R_NegInf
= -1.0/R_Zero
;
1559 R_Nan
= R_Zero
/R_Zero
;
1562 static void initServer() {
1565 signal(SIGHUP
, SIG_IGN
);
1566 signal(SIGPIPE
, SIG_IGN
);
1567 setupSigSegvAction();
1569 server
.devnull
= fopen("/dev/null","w");
1570 if (server
.devnull
== NULL
) {
1571 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1574 server
.clients
= listCreate();
1575 server
.slaves
= listCreate();
1576 server
.monitors
= listCreate();
1577 server
.objfreelist
= listCreate();
1578 createSharedObjects();
1579 server
.el
= aeCreateEventLoop();
1580 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1581 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1582 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1583 if (server
.fd
== -1) {
1584 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1587 for (j
= 0; j
< server
.dbnum
; j
++) {
1588 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1589 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1590 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1591 if (server
.vm_enabled
)
1592 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1593 server
.db
[j
].id
= j
;
1595 server
.pubsub_classes
= dictCreate(&keylistDictType
,NULL
);
1596 server
.cronloops
= 0;
1597 server
.bgsavechildpid
= -1;
1598 server
.bgrewritechildpid
= -1;
1599 server
.bgrewritebuf
= sdsempty();
1600 server
.lastsave
= time(NULL
);
1602 server
.stat_numcommands
= 0;
1603 server
.stat_numconnections
= 0;
1604 server
.stat_expiredkeys
= 0;
1605 server
.stat_starttime
= time(NULL
);
1606 server
.unixtime
= time(NULL
);
1607 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1608 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1609 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1611 if (server
.appendonly
) {
1612 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1613 if (server
.appendfd
== -1) {
1614 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1620 if (server
.vm_enabled
) vmInit();
1623 /* Empty the whole database */
1624 static long long emptyDb() {
1626 long long removed
= 0;
1628 for (j
= 0; j
< server
.dbnum
; j
++) {
1629 removed
+= dictSize(server
.db
[j
].dict
);
1630 dictEmpty(server
.db
[j
].dict
);
1631 dictEmpty(server
.db
[j
].expires
);
1636 static int yesnotoi(char *s
) {
1637 if (!strcasecmp(s
,"yes")) return 1;
1638 else if (!strcasecmp(s
,"no")) return 0;
1642 /* I agree, this is a very rudimental way to load a configuration...
1643 will improve later if the config gets more complex */
1644 static void loadServerConfig(char *filename
) {
1646 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1649 char *errormsg
= "Fatal error, can't open config file '%s'";
1650 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1651 sprintf(errorbuf
, errormsg
, filename
);
1653 if (filename
[0] == '-' && filename
[1] == '\0')
1656 if ((fp
= fopen(filename
,"r")) == NULL
) {
1657 redisLog(REDIS_WARNING
, errorbuf
);
1662 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1668 line
= sdstrim(line
," \t\r\n");
1670 /* Skip comments and blank lines*/
1671 if (line
[0] == '#' || line
[0] == '\0') {
1676 /* Split into arguments */
1677 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1678 sdstolower(argv
[0]);
1680 /* Execute config directives */
1681 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1682 server
.maxidletime
= atoi(argv
[1]);
1683 if (server
.maxidletime
< 0) {
1684 err
= "Invalid timeout value"; goto loaderr
;
1686 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1687 server
.port
= atoi(argv
[1]);
1688 if (server
.port
< 1 || server
.port
> 65535) {
1689 err
= "Invalid port"; goto loaderr
;
1691 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1692 server
.bindaddr
= zstrdup(argv
[1]);
1693 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1694 int seconds
= atoi(argv
[1]);
1695 int changes
= atoi(argv
[2]);
1696 if (seconds
< 1 || changes
< 0) {
1697 err
= "Invalid save parameters"; goto loaderr
;
1699 appendServerSaveParams(seconds
,changes
);
1700 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1701 if (chdir(argv
[1]) == -1) {
1702 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1703 argv
[1], strerror(errno
));
1706 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1707 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1708 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1709 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1710 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1712 err
= "Invalid log level. Must be one of debug, notice, warning";
1715 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1718 server
.logfile
= zstrdup(argv
[1]);
1719 if (!strcasecmp(server
.logfile
,"stdout")) {
1720 zfree(server
.logfile
);
1721 server
.logfile
= NULL
;
1723 if (server
.logfile
) {
1724 /* Test if we are able to open the file. The server will not
1725 * be able to abort just for this problem later... */
1726 logfp
= fopen(server
.logfile
,"a");
1727 if (logfp
== NULL
) {
1728 err
= sdscatprintf(sdsempty(),
1729 "Can't open the log file: %s", strerror(errno
));
1734 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1735 server
.dbnum
= atoi(argv
[1]);
1736 if (server
.dbnum
< 1) {
1737 err
= "Invalid number of databases"; goto loaderr
;
1739 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1740 loadServerConfig(argv
[1]);
1741 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1742 server
.maxclients
= atoi(argv
[1]);
1743 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1744 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1745 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1746 server
.masterhost
= sdsnew(argv
[1]);
1747 server
.masterport
= atoi(argv
[2]);
1748 server
.replstate
= REDIS_REPL_CONNECT
;
1749 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1750 server
.masterauth
= zstrdup(argv
[1]);
1751 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1752 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1753 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1755 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1756 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1757 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1759 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1760 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1761 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1763 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1764 server
.sharingpoolsize
= atoi(argv
[1]);
1765 if (server
.sharingpoolsize
< 1) {
1766 err
= "invalid object sharing pool size"; goto loaderr
;
1768 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1769 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1770 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1772 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1773 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1774 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1776 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1777 if (!strcasecmp(argv
[1],"no")) {
1778 server
.appendfsync
= APPENDFSYNC_NO
;
1779 } else if (!strcasecmp(argv
[1],"always")) {
1780 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1781 } else if (!strcasecmp(argv
[1],"everysec")) {
1782 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1784 err
= "argument must be 'no', 'always' or 'everysec'";
1787 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1788 server
.requirepass
= zstrdup(argv
[1]);
1789 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1790 zfree(server
.pidfile
);
1791 server
.pidfile
= zstrdup(argv
[1]);
1792 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1793 zfree(server
.dbfilename
);
1794 server
.dbfilename
= zstrdup(argv
[1]);
1795 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1796 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1797 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1799 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1800 zfree(server
.vm_swap_file
);
1801 server
.vm_swap_file
= zstrdup(argv
[1]);
1802 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1803 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1804 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1805 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1806 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1807 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1808 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1809 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1810 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1811 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1812 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1813 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1814 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1815 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1817 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1819 for (j
= 0; j
< argc
; j
++)
1824 if (fp
!= stdin
) fclose(fp
);
1828 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1829 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1830 fprintf(stderr
, ">>> '%s'\n", line
);
1831 fprintf(stderr
, "%s\n", err
);
1835 static void freeClientArgv(redisClient
*c
) {
1838 for (j
= 0; j
< c
->argc
; j
++)
1839 decrRefCount(c
->argv
[j
]);
1840 for (j
= 0; j
< c
->mbargc
; j
++)
1841 decrRefCount(c
->mbargv
[j
]);
1846 static void freeClient(redisClient
*c
) {
1849 /* Note that if the client we are freeing is blocked into a blocking
1850 * call, we have to set querybuf to NULL *before* to call
1851 * unblockClientWaitingData() to avoid processInputBuffer() will get
1852 * called. Also it is important to remove the file events after
1853 * this, because this call adds the READABLE event. */
1854 sdsfree(c
->querybuf
);
1856 if (c
->flags
& REDIS_BLOCKED
)
1857 unblockClientWaitingData(c
);
1859 /* Unsubscribe from all the pubsub classes */
1860 pubsubUnsubscribeAll(c
,0);
1861 dictRelease(c
->pubsub_classes
);
1862 /* Obvious cleanup */
1863 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1864 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1865 listRelease(c
->reply
);
1868 /* Remove from the list of clients */
1869 ln
= listSearchKey(server
.clients
,c
);
1870 redisAssert(ln
!= NULL
);
1871 listDelNode(server
.clients
,ln
);
1872 /* Remove from the list of clients waiting for swapped keys */
1873 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1874 ln
= listSearchKey(server
.io_ready_clients
,c
);
1876 listDelNode(server
.io_ready_clients
,ln
);
1877 server
.vm_blocked_clients
--;
1880 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1881 ln
= listFirst(c
->io_keys
);
1882 dontWaitForSwappedKey(c
,ln
->value
);
1884 listRelease(c
->io_keys
);
1885 /* Master/slave cleanup */
1886 if (c
->flags
& REDIS_SLAVE
) {
1887 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1889 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1890 ln
= listSearchKey(l
,c
);
1891 redisAssert(ln
!= NULL
);
1894 if (c
->flags
& REDIS_MASTER
) {
1895 server
.master
= NULL
;
1896 server
.replstate
= REDIS_REPL_CONNECT
;
1898 /* Release memory */
1901 freeClientMultiState(c
);
1905 #define GLUEREPLY_UP_TO (1024)
1906 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1908 char buf
[GLUEREPLY_UP_TO
];
1913 listRewind(c
->reply
,&li
);
1914 while((ln
= listNext(&li
))) {
1918 objlen
= sdslen(o
->ptr
);
1919 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1920 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1922 listDelNode(c
->reply
,ln
);
1924 if (copylen
== 0) return;
1928 /* Now the output buffer is empty, add the new single element */
1929 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1930 listAddNodeHead(c
->reply
,o
);
1933 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1934 redisClient
*c
= privdata
;
1935 int nwritten
= 0, totwritten
= 0, objlen
;
1938 REDIS_NOTUSED(mask
);
1940 /* Use writev() if we have enough buffers to send */
1941 if (!server
.glueoutputbuf
&&
1942 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1943 !(c
->flags
& REDIS_MASTER
))
1945 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1949 while(listLength(c
->reply
)) {
1950 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1951 glueReplyBuffersIfNeeded(c
);
1953 o
= listNodeValue(listFirst(c
->reply
));
1954 objlen
= sdslen(o
->ptr
);
1957 listDelNode(c
->reply
,listFirst(c
->reply
));
1961 if (c
->flags
& REDIS_MASTER
) {
1962 /* Don't reply to a master */
1963 nwritten
= objlen
- c
->sentlen
;
1965 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1966 if (nwritten
<= 0) break;
1968 c
->sentlen
+= nwritten
;
1969 totwritten
+= nwritten
;
1970 /* If we fully sent the object on head go to the next one */
1971 if (c
->sentlen
== objlen
) {
1972 listDelNode(c
->reply
,listFirst(c
->reply
));
1975 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1976 * bytes, in a single threaded server it's a good idea to serve
1977 * other clients as well, even if a very large request comes from
1978 * super fast link that is always able to accept data (in real world
1979 * scenario think about 'KEYS *' against the loopback interfae) */
1980 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1982 if (nwritten
== -1) {
1983 if (errno
== EAGAIN
) {
1986 redisLog(REDIS_VERBOSE
,
1987 "Error writing to client: %s", strerror(errno
));
1992 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1993 if (listLength(c
->reply
) == 0) {
1995 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1999 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2001 redisClient
*c
= privdata
;
2002 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2004 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2005 int offset
, ion
= 0;
2007 REDIS_NOTUSED(mask
);
2010 while (listLength(c
->reply
)) {
2011 offset
= c
->sentlen
;
2015 /* fill-in the iov[] array */
2016 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2017 o
= listNodeValue(node
);
2018 objlen
= sdslen(o
->ptr
);
2020 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2023 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2024 break; /* no more iovecs */
2026 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2027 iov
[ion
].iov_len
= objlen
- offset
;
2028 willwrite
+= objlen
- offset
;
2029 offset
= 0; /* just for the first item */
2036 /* write all collected blocks at once */
2037 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2038 if (errno
!= EAGAIN
) {
2039 redisLog(REDIS_VERBOSE
,
2040 "Error writing to client: %s", strerror(errno
));
2047 totwritten
+= nwritten
;
2048 offset
= c
->sentlen
;
2050 /* remove written robjs from c->reply */
2051 while (nwritten
&& listLength(c
->reply
)) {
2052 o
= listNodeValue(listFirst(c
->reply
));
2053 objlen
= sdslen(o
->ptr
);
2055 if(nwritten
>= objlen
- offset
) {
2056 listDelNode(c
->reply
, listFirst(c
->reply
));
2057 nwritten
-= objlen
- offset
;
2061 c
->sentlen
+= nwritten
;
2069 c
->lastinteraction
= time(NULL
);
2071 if (listLength(c
->reply
) == 0) {
2073 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2077 static struct redisCommand
*lookupCommand(char *name
) {
2079 while(cmdTable
[j
].name
!= NULL
) {
2080 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2086 /* resetClient prepare the client to process the next command */
2087 static void resetClient(redisClient
*c
) {
2093 /* Call() is the core of Redis execution of a command */
2094 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2097 dirty
= server
.dirty
;
2099 if (server
.appendonly
&& server
.dirty
-dirty
)
2100 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2101 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2102 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2103 if (listLength(server
.monitors
))
2104 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2105 server
.stat_numcommands
++;
2108 /* If this function gets called we already read a whole
2109 * command, argments are in the client argv/argc fields.
2110 * processCommand() execute the command or prepare the
2111 * server for a bulk read from the client.
2113 * If 1 is returned the client is still alive and valid and
2114 * and other operations can be performed by the caller. Otherwise
2115 * if 0 is returned the client was destroied (i.e. after QUIT). */
2116 static int processCommand(redisClient
*c
) {
2117 struct redisCommand
*cmd
;
2119 /* Free some memory if needed (maxmemory setting) */
2120 if (server
.maxmemory
) freeMemoryIfNeeded();
2122 /* Handle the multi bulk command type. This is an alternative protocol
2123 * supported by Redis in order to receive commands that are composed of
2124 * multiple binary-safe "bulk" arguments. The latency of processing is
2125 * a bit higher but this allows things like multi-sets, so if this
2126 * protocol is used only for MSET and similar commands this is a big win. */
2127 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2128 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2129 if (c
->multibulk
<= 0) {
2133 decrRefCount(c
->argv
[c
->argc
-1]);
2137 } else if (c
->multibulk
) {
2138 if (c
->bulklen
== -1) {
2139 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2140 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2144 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2145 decrRefCount(c
->argv
[0]);
2146 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2148 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2153 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2157 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2158 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2162 if (c
->multibulk
== 0) {
2166 /* Here we need to swap the multi-bulk argc/argv with the
2167 * normal argc/argv of the client structure. */
2169 c
->argv
= c
->mbargv
;
2170 c
->mbargv
= auxargv
;
2173 c
->argc
= c
->mbargc
;
2174 c
->mbargc
= auxargc
;
2176 /* We need to set bulklen to something different than -1
2177 * in order for the code below to process the command without
2178 * to try to read the last argument of a bulk command as
2179 * a special argument. */
2181 /* continue below and process the command */
2188 /* -- end of multi bulk commands processing -- */
2190 /* The QUIT command is handled as a special case. Normal command
2191 * procs are unable to close the client connection safely */
2192 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2197 /* Now lookup the command and check ASAP about trivial error conditions
2198 * such wrong arity, bad command name and so forth. */
2199 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2202 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2203 (char*)c
->argv
[0]->ptr
));
2206 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2207 (c
->argc
< -cmd
->arity
)) {
2209 sdscatprintf(sdsempty(),
2210 "-ERR wrong number of arguments for '%s' command\r\n",
2214 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2215 /* This is a bulk command, we have to read the last argument yet. */
2216 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2218 decrRefCount(c
->argv
[c
->argc
-1]);
2219 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2221 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2226 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2227 /* It is possible that the bulk read is already in the
2228 * buffer. Check this condition and handle it accordingly.
2229 * This is just a fast path, alternative to call processInputBuffer().
2230 * It's a good idea since the code is small and this condition
2231 * happens most of the times. */
2232 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2233 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2235 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2237 /* Otherwise return... there is to read the last argument
2238 * from the socket. */
2242 /* Let's try to share objects on the command arguments vector */
2243 if (server
.shareobjects
) {
2245 for(j
= 1; j
< c
->argc
; j
++)
2246 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2248 /* Let's try to encode the bulk object to save space. */
2249 if (cmd
->flags
& REDIS_CMD_BULK
)
2250 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2252 /* Check if the user is authenticated */
2253 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2254 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2259 /* Handle the maxmemory directive */
2260 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2261 zmalloc_used_memory() > server
.maxmemory
)
2263 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2268 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2269 if (dictSize(c
->pubsub_classes
) > 0 &&
2270 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
) {
2271 addReplySds(c
,sdsnew("-ERR only SUBSCRIBE / UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2276 /* Exec the command */
2277 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2278 queueMultiCommand(c
,cmd
);
2279 addReply(c
,shared
.queued
);
2281 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2282 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2286 /* Prepare the client for the next command */
2291 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2296 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2297 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2298 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2299 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2302 if (argc
<= REDIS_STATIC_ARGS
) {
2305 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2308 lenobj
= createObject(REDIS_STRING
,
2309 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2310 lenobj
->refcount
= 0;
2311 outv
[outc
++] = lenobj
;
2312 for (j
= 0; j
< argc
; j
++) {
2313 lenobj
= createObject(REDIS_STRING
,
2314 sdscatprintf(sdsempty(),"$%lu\r\n",
2315 (unsigned long) stringObjectLen(argv
[j
])));
2316 lenobj
->refcount
= 0;
2317 outv
[outc
++] = lenobj
;
2318 outv
[outc
++] = argv
[j
];
2319 outv
[outc
++] = shared
.crlf
;
2322 /* Increment all the refcounts at start and decrement at end in order to
2323 * be sure to free objects if there is no slave in a replication state
2324 * able to be feed with commands */
2325 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2326 listRewind(slaves
,&li
);
2327 while((ln
= listNext(&li
))) {
2328 redisClient
*slave
= ln
->value
;
2330 /* Don't feed slaves that are still waiting for BGSAVE to start */
2331 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2333 /* Feed all the other slaves, MONITORs and so on */
2334 if (slave
->slaveseldb
!= dictid
) {
2338 case 0: selectcmd
= shared
.select0
; break;
2339 case 1: selectcmd
= shared
.select1
; break;
2340 case 2: selectcmd
= shared
.select2
; break;
2341 case 3: selectcmd
= shared
.select3
; break;
2342 case 4: selectcmd
= shared
.select4
; break;
2343 case 5: selectcmd
= shared
.select5
; break;
2344 case 6: selectcmd
= shared
.select6
; break;
2345 case 7: selectcmd
= shared
.select7
; break;
2346 case 8: selectcmd
= shared
.select8
; break;
2347 case 9: selectcmd
= shared
.select9
; break;
2349 selectcmd
= createObject(REDIS_STRING
,
2350 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2351 selectcmd
->refcount
= 0;
2354 addReply(slave
,selectcmd
);
2355 slave
->slaveseldb
= dictid
;
2357 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2359 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2360 if (outv
!= static_outv
) zfree(outv
);
2363 static void processInputBuffer(redisClient
*c
) {
2365 /* Before to process the input buffer, make sure the client is not
2366 * waitig for a blocking operation such as BLPOP. Note that the first
2367 * iteration the client is never blocked, otherwise the processInputBuffer
2368 * would not be called at all, but after the execution of the first commands
2369 * in the input buffer the client may be blocked, and the "goto again"
2370 * will try to reiterate. The following line will make it return asap. */
2371 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2372 if (c
->bulklen
== -1) {
2373 /* Read the first line of the query */
2374 char *p
= strchr(c
->querybuf
,'\n');
2381 query
= c
->querybuf
;
2382 c
->querybuf
= sdsempty();
2383 querylen
= 1+(p
-(query
));
2384 if (sdslen(query
) > querylen
) {
2385 /* leave data after the first line of the query in the buffer */
2386 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2388 *p
= '\0'; /* remove "\n" */
2389 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2390 sdsupdatelen(query
);
2392 /* Now we can split the query in arguments */
2393 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2396 if (c
->argv
) zfree(c
->argv
);
2397 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2399 for (j
= 0; j
< argc
; j
++) {
2400 if (sdslen(argv
[j
])) {
2401 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2409 /* Execute the command. If the client is still valid
2410 * after processCommand() return and there is something
2411 * on the query buffer try to process the next command. */
2412 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2414 /* Nothing to process, argc == 0. Just process the query
2415 * buffer if it's not empty or return to the caller */
2416 if (sdslen(c
->querybuf
)) goto again
;
2419 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2420 redisLog(REDIS_VERBOSE
, "Client protocol error");
2425 /* Bulk read handling. Note that if we are at this point
2426 the client already sent a command terminated with a newline,
2427 we are reading the bulk data that is actually the last
2428 argument of the command. */
2429 int qbl
= sdslen(c
->querybuf
);
2431 if (c
->bulklen
<= qbl
) {
2432 /* Copy everything but the final CRLF as final argument */
2433 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2435 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2436 /* Process the command. If the client is still valid after
2437 * the processing and there is more data in the buffer
2438 * try to parse it. */
2439 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2445 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2446 redisClient
*c
= (redisClient
*) privdata
;
2447 char buf
[REDIS_IOBUF_LEN
];
2450 REDIS_NOTUSED(mask
);
2452 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2454 if (errno
== EAGAIN
) {
2457 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2461 } else if (nread
== 0) {
2462 redisLog(REDIS_VERBOSE
, "Client closed connection");
2467 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2468 c
->lastinteraction
= time(NULL
);
2472 processInputBuffer(c
);
2475 static int selectDb(redisClient
*c
, int id
) {
2476 if (id
< 0 || id
>= server
.dbnum
)
2478 c
->db
= &server
.db
[id
];
2482 static void *dupClientReplyValue(void *o
) {
2483 incrRefCount((robj
*)o
);
2487 static redisClient
*createClient(int fd
) {
2488 redisClient
*c
= zmalloc(sizeof(*c
));
2490 anetNonBlock(NULL
,fd
);
2491 anetTcpNoDelay(NULL
,fd
);
2492 if (!c
) return NULL
;
2495 c
->querybuf
= sdsempty();
2504 c
->lastinteraction
= time(NULL
);
2505 c
->authenticated
= 0;
2506 c
->replstate
= REDIS_REPL_NONE
;
2507 c
->reply
= listCreate();
2508 listSetFreeMethod(c
->reply
,decrRefCount
);
2509 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2510 c
->blockingkeys
= NULL
;
2511 c
->blockingkeysnum
= 0;
2512 c
->io_keys
= listCreate();
2513 c
->pubsub_classes
= dictCreate(&setDictType
,NULL
);
2514 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2515 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2516 readQueryFromClient
, c
) == AE_ERR
) {
2520 listAddNodeTail(server
.clients
,c
);
2521 initClientMultiState(c
);
2525 static void addReply(redisClient
*c
, robj
*obj
) {
2526 if (listLength(c
->reply
) == 0 &&
2527 (c
->replstate
== REDIS_REPL_NONE
||
2528 c
->replstate
== REDIS_REPL_ONLINE
) &&
2529 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2530 sendReplyToClient
, c
) == AE_ERR
) return;
2532 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2533 obj
= dupStringObject(obj
);
2534 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2536 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2539 static void addReplySds(redisClient
*c
, sds s
) {
2540 robj
*o
= createObject(REDIS_STRING
,s
);
2545 static void addReplyDouble(redisClient
*c
, double d
) {
2548 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2549 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2550 (unsigned long) strlen(buf
),buf
));
2553 static void addReplyLong(redisClient
*c
, long l
) {
2558 addReply(c
,shared
.czero
);
2560 } else if (l
== 1) {
2561 addReply(c
,shared
.cone
);
2564 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2565 addReplySds(c
,sdsnewlen(buf
,len
));
2568 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2573 addReply(c
,shared
.czero
);
2575 } else if (ul
== 1) {
2576 addReply(c
,shared
.cone
);
2579 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2580 addReplySds(c
,sdsnewlen(buf
,len
));
2583 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2586 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2587 len
= sdslen(obj
->ptr
);
2589 long n
= (long)obj
->ptr
;
2591 /* Compute how many bytes will take this integer as a radix 10 string */
2597 while((n
= n
/10) != 0) {
2601 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2604 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2605 addReplyBulkLen(c
,obj
);
2607 addReply(c
,shared
.crlf
);
2610 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2611 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2613 addReply(c
,shared
.nullbulk
);
2615 robj
*o
= createStringObject(s
,strlen(s
));
2621 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2626 REDIS_NOTUSED(mask
);
2627 REDIS_NOTUSED(privdata
);
2629 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2630 if (cfd
== AE_ERR
) {
2631 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2634 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2635 if ((c
= createClient(cfd
)) == NULL
) {
2636 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2637 close(cfd
); /* May be already closed, just ingore errors */
2640 /* If maxclient directive is set and this is one client more... close the
2641 * connection. Note that we create the client instead to check before
2642 * for this condition, since now the socket is already set in nonblocking
2643 * mode and we can send an error for free using the Kernel I/O */
2644 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2645 char *err
= "-ERR max number of clients reached\r\n";
2647 /* That's a best effort error message, don't check write errors */
2648 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2649 /* Nothing to do, Just to avoid the warning... */
2654 server
.stat_numconnections
++;
2657 /* ======================= Redis objects implementation ===================== */
2659 static robj
*createObject(int type
, void *ptr
) {
2662 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2663 if (listLength(server
.objfreelist
)) {
2664 listNode
*head
= listFirst(server
.objfreelist
);
2665 o
= listNodeValue(head
);
2666 listDelNode(server
.objfreelist
,head
);
2667 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2669 if (server
.vm_enabled
) {
2670 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2671 o
= zmalloc(sizeof(*o
));
2673 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2677 o
->encoding
= REDIS_ENCODING_RAW
;
2680 if (server
.vm_enabled
) {
2681 /* Note that this code may run in the context of an I/O thread
2682 * and accessing to server.unixtime in theory is an error
2683 * (no locks). But in practice this is safe, and even if we read
2684 * garbage Redis will not fail, as it's just a statistical info */
2685 o
->vm
.atime
= server
.unixtime
;
2686 o
->storage
= REDIS_VM_MEMORY
;
2691 static robj
*createStringObject(char *ptr
, size_t len
) {
2692 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2695 static robj
*dupStringObject(robj
*o
) {
2696 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2697 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2700 static robj
*createListObject(void) {
2701 list
*l
= listCreate();
2703 listSetFreeMethod(l
,decrRefCount
);
2704 return createObject(REDIS_LIST
,l
);
2707 static robj
*createSetObject(void) {
2708 dict
*d
= dictCreate(&setDictType
,NULL
);
2709 return createObject(REDIS_SET
,d
);
2712 static robj
*createHashObject(void) {
2713 /* All the Hashes start as zipmaps. Will be automatically converted
2714 * into hash tables if there are enough elements or big elements
2716 unsigned char *zm
= zipmapNew();
2717 robj
*o
= createObject(REDIS_HASH
,zm
);
2718 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2722 static robj
*createZsetObject(void) {
2723 zset
*zs
= zmalloc(sizeof(*zs
));
2725 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2726 zs
->zsl
= zslCreate();
2727 return createObject(REDIS_ZSET
,zs
);
2730 static void freeStringObject(robj
*o
) {
2731 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2736 static void freeListObject(robj
*o
) {
2737 listRelease((list
*) o
->ptr
);
2740 static void freeSetObject(robj
*o
) {
2741 dictRelease((dict
*) o
->ptr
);
2744 static void freeZsetObject(robj
*o
) {
2747 dictRelease(zs
->dict
);
2752 static void freeHashObject(robj
*o
) {
2753 switch (o
->encoding
) {
2754 case REDIS_ENCODING_HT
:
2755 dictRelease((dict
*) o
->ptr
);
2757 case REDIS_ENCODING_ZIPMAP
:
2766 static void incrRefCount(robj
*o
) {
2767 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2771 static void decrRefCount(void *obj
) {
2774 /* Object is a key of a swapped out value, or in the process of being
2776 if (server
.vm_enabled
&&
2777 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2779 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2780 redisAssert(o
->refcount
== 1);
2782 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2783 redisAssert(o
->type
== REDIS_STRING
);
2784 freeStringObject(o
);
2785 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2786 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2787 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2788 !listAddNodeHead(server
.objfreelist
,o
))
2790 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2791 server
.vm_stats_swapped_objects
--;
2794 /* Object is in memory, or in the process of being swapped out. */
2795 if (--(o
->refcount
) == 0) {
2796 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2797 vmCancelThreadedIOJob(obj
);
2799 case REDIS_STRING
: freeStringObject(o
); break;
2800 case REDIS_LIST
: freeListObject(o
); break;
2801 case REDIS_SET
: freeSetObject(o
); break;
2802 case REDIS_ZSET
: freeZsetObject(o
); break;
2803 case REDIS_HASH
: freeHashObject(o
); break;
2804 default: redisAssert(0); break;
2806 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2807 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2808 !listAddNodeHead(server
.objfreelist
,o
))
2810 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2814 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2815 dictEntry
*de
= dictFind(db
->dict
,key
);
2817 robj
*key
= dictGetEntryKey(de
);
2818 robj
*val
= dictGetEntryVal(de
);
2820 if (server
.vm_enabled
) {
2821 if (key
->storage
== REDIS_VM_MEMORY
||
2822 key
->storage
== REDIS_VM_SWAPPING
)
2824 /* If we were swapping the object out, stop it, this key
2826 if (key
->storage
== REDIS_VM_SWAPPING
)
2827 vmCancelThreadedIOJob(key
);
2828 /* Update the access time of the key for the aging algorithm. */
2829 key
->vm
.atime
= server
.unixtime
;
2831 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2833 /* Our value was swapped on disk. Bring it at home. */
2834 redisAssert(val
== NULL
);
2835 val
= vmLoadObject(key
);
2836 dictGetEntryVal(de
) = val
;
2838 /* Clients blocked by the VM subsystem may be waiting for
2840 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2849 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2850 expireIfNeeded(db
,key
);
2851 return lookupKey(db
,key
);
2854 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2855 deleteIfVolatile(db
,key
);
2856 return lookupKey(db
,key
);
2859 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2860 robj
*o
= lookupKeyRead(c
->db
, key
);
2861 if (!o
) addReply(c
,reply
);
2865 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2866 robj
*o
= lookupKeyWrite(c
->db
, key
);
2867 if (!o
) addReply(c
,reply
);
2871 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2872 if (o
->type
!= type
) {
2873 addReply(c
,shared
.wrongtypeerr
);
2879 static int deleteKey(redisDb
*db
, robj
*key
) {
2882 /* We need to protect key from destruction: after the first dictDelete()
2883 * it may happen that 'key' is no longer valid if we don't increment
2884 * it's count. This may happen when we get the object reference directly
2885 * from the hash table with dictRandomKey() or dict iterators */
2887 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2888 retval
= dictDelete(db
->dict
,key
);
2891 return retval
== DICT_OK
;
2894 /* Try to share an object against the shared objects pool */
2895 static robj
*tryObjectSharing(robj
*o
) {
2896 struct dictEntry
*de
;
2899 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2901 redisAssert(o
->type
== REDIS_STRING
);
2902 de
= dictFind(server
.sharingpool
,o
);
2904 robj
*shared
= dictGetEntryKey(de
);
2906 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2907 dictGetEntryVal(de
) = (void*) c
;
2908 incrRefCount(shared
);
2912 /* Here we are using a stream algorihtm: Every time an object is
2913 * shared we increment its count, everytime there is a miss we
2914 * recrement the counter of a random object. If this object reaches
2915 * zero we remove the object and put the current object instead. */
2916 if (dictSize(server
.sharingpool
) >=
2917 server
.sharingpoolsize
) {
2918 de
= dictGetRandomKey(server
.sharingpool
);
2919 redisAssert(de
!= NULL
);
2920 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2921 dictGetEntryVal(de
) = (void*) c
;
2923 dictDelete(server
.sharingpool
,de
->key
);
2926 c
= 0; /* If the pool is empty we want to add this object */
2931 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2932 redisAssert(retval
== DICT_OK
);
2939 /* Check if the nul-terminated string 's' can be represented by a long
2940 * (that is, is a number that fits into long without any other space or
2941 * character before or after the digits).
2943 * If so, the function returns REDIS_OK and *longval is set to the value
2944 * of the number. Otherwise REDIS_ERR is returned */
2945 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2946 char buf
[32], *endptr
;
2950 value
= strtol(s
, &endptr
, 10);
2951 if (endptr
[0] != '\0') return REDIS_ERR
;
2952 slen
= snprintf(buf
,32,"%ld",value
);
2954 /* If the number converted back into a string is not identical
2955 * then it's not possible to encode the string as integer */
2956 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2957 if (longval
) *longval
= value
;
2961 /* Try to encode a string object in order to save space */
2962 static int tryObjectEncoding(robj
*o
) {
2966 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2967 return REDIS_ERR
; /* Already encoded */
2969 /* It's not save to encode shared objects: shared objects can be shared
2970 * everywhere in the "object space" of Redis. Encoded objects can only
2971 * appear as "values" (and not, for instance, as keys) */
2972 if (o
->refcount
> 1) return REDIS_ERR
;
2974 /* Currently we try to encode only strings */
2975 redisAssert(o
->type
== REDIS_STRING
);
2977 /* Check if we can represent this string as a long integer */
2978 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2980 /* Ok, this object can be encoded */
2981 o
->encoding
= REDIS_ENCODING_INT
;
2983 o
->ptr
= (void*) value
;
2987 /* Get a decoded version of an encoded object (returned as a new object).
2988 * If the object is already raw-encoded just increment the ref count. */
2989 static robj
*getDecodedObject(robj
*o
) {
2992 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2996 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2999 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3000 dec
= createStringObject(buf
,strlen(buf
));
3003 redisAssert(1 != 1);
3007 /* Compare two string objects via strcmp() or alike.
3008 * Note that the objects may be integer-encoded. In such a case we
3009 * use snprintf() to get a string representation of the numbers on the stack
3010 * and compare the strings, it's much faster than calling getDecodedObject().
3012 * Important note: if objects are not integer encoded, but binary-safe strings,
3013 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3015 static int compareStringObjects(robj
*a
, robj
*b
) {
3016 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3017 char bufa
[128], bufb
[128], *astr
, *bstr
;
3020 if (a
== b
) return 0;
3021 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3022 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3028 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3029 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3035 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3038 static size_t stringObjectLen(robj
*o
) {
3039 redisAssert(o
->type
== REDIS_STRING
);
3040 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3041 return sdslen(o
->ptr
);
3045 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3049 /*============================ RDB saving/loading =========================== */
3051 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3052 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3056 static int rdbSaveTime(FILE *fp
, time_t t
) {
3057 int32_t t32
= (int32_t) t
;
3058 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3062 /* check rdbLoadLen() comments for more info */
3063 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3064 unsigned char buf
[2];
3067 /* Save a 6 bit len */
3068 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3069 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3070 } else if (len
< (1<<14)) {
3071 /* Save a 14 bit len */
3072 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3074 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3076 /* Save a 32 bit len */
3077 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3078 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3080 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3085 /* String objects in the form "2391" "-100" without any space and with a
3086 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3087 * encoded as integers to save space */
3088 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3090 char *endptr
, buf
[32];
3092 /* Check if it's possible to encode this value as a number */
3093 value
= strtoll(s
, &endptr
, 10);
3094 if (endptr
[0] != '\0') return 0;
3095 snprintf(buf
,32,"%lld",value
);
3097 /* If the number converted back into a string is not identical
3098 * then it's not possible to encode the string as integer */
3099 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3101 /* Finally check if it fits in our ranges */
3102 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3103 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3104 enc
[1] = value
&0xFF;
3106 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3107 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3108 enc
[1] = value
&0xFF;
3109 enc
[2] = (value
>>8)&0xFF;
3111 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3112 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3113 enc
[1] = value
&0xFF;
3114 enc
[2] = (value
>>8)&0xFF;
3115 enc
[3] = (value
>>16)&0xFF;
3116 enc
[4] = (value
>>24)&0xFF;
3123 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3124 size_t comprlen
, outlen
;
3128 /* We require at least four bytes compression for this to be worth it */
3129 if (len
<= 4) return 0;
3131 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3132 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3133 if (comprlen
== 0) {
3137 /* Data compressed! Let's save it on disk */
3138 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3139 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3140 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3141 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3142 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3151 /* Save a string objet as [len][data] on disk. If the object is a string
3152 * representation of an integer value we try to safe it in a special form */
3153 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3156 /* Try integer encoding */
3158 unsigned char buf
[5];
3159 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3160 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3165 /* Try LZF compression - under 20 bytes it's unable to compress even
3166 * aaaaaaaaaaaaaaaaaa so skip it */
3167 if (server
.rdbcompression
&& len
> 20) {
3170 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3171 if (retval
== -1) return -1;
3172 if (retval
> 0) return 0;
3173 /* retval == 0 means data can't be compressed, save the old way */
3176 /* Store verbatim */
3177 if (rdbSaveLen(fp
,len
) == -1) return -1;
3178 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3182 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3183 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3186 /* Avoid incr/decr ref count business when possible.
3187 * This plays well with copy-on-write given that we are probably
3188 * in a child process (BGSAVE). Also this makes sure key objects
3189 * of swapped objects are not incRefCount-ed (an assert does not allow
3190 * this in order to avoid bugs) */
3191 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3192 obj
= getDecodedObject(obj
);
3193 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3196 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3201 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3202 * 8 bit integer specifing the length of the representation.
3203 * This 8 bit integer has special values in order to specify the following
3209 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3210 unsigned char buf
[128];
3216 } else if (!isfinite(val
)) {
3218 buf
[0] = (val
< 0) ? 255 : 254;
3220 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3221 buf
[0] = strlen((char*)buf
+1);
3224 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3228 /* Save a Redis object. */
3229 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3230 if (o
->type
== REDIS_STRING
) {
3231 /* Save a string value */
3232 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3233 } else if (o
->type
== REDIS_LIST
) {
3234 /* Save a list value */
3235 list
*list
= o
->ptr
;
3239 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3240 listRewind(list
,&li
);
3241 while((ln
= listNext(&li
))) {
3242 robj
*eleobj
= listNodeValue(ln
);
3244 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3246 } else if (o
->type
== REDIS_SET
) {
3247 /* Save a set value */
3249 dictIterator
*di
= dictGetIterator(set
);
3252 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3253 while((de
= dictNext(di
)) != NULL
) {
3254 robj
*eleobj
= dictGetEntryKey(de
);
3256 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3258 dictReleaseIterator(di
);
3259 } else if (o
->type
== REDIS_ZSET
) {
3260 /* Save a set value */
3262 dictIterator
*di
= dictGetIterator(zs
->dict
);
3265 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3266 while((de
= dictNext(di
)) != NULL
) {
3267 robj
*eleobj
= dictGetEntryKey(de
);
3268 double *score
= dictGetEntryVal(de
);
3270 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3271 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3273 dictReleaseIterator(di
);
3274 } else if (o
->type
== REDIS_HASH
) {
3275 /* Save a hash value */
3276 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3277 unsigned char *p
= zipmapRewind(o
->ptr
);
3278 unsigned int count
= zipmapLen(o
->ptr
);
3279 unsigned char *key
, *val
;
3280 unsigned int klen
, vlen
;
3282 if (rdbSaveLen(fp
,count
) == -1) return -1;
3283 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3284 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3285 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3288 dictIterator
*di
= dictGetIterator(o
->ptr
);
3291 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3292 while((de
= dictNext(di
)) != NULL
) {
3293 robj
*key
= dictGetEntryKey(de
);
3294 robj
*val
= dictGetEntryVal(de
);
3296 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3297 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3299 dictReleaseIterator(di
);
3307 /* Return the length the object will have on disk if saved with
3308 * the rdbSaveObject() function. Currently we use a trick to get
3309 * this length with very little changes to the code. In the future
3310 * we could switch to a faster solution. */
3311 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3312 if (fp
== NULL
) fp
= server
.devnull
;
3314 assert(rdbSaveObject(fp
,o
) != 1);
3318 /* Return the number of pages required to save this object in the swap file */
3319 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3320 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3322 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3325 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3326 static int rdbSave(char *filename
) {
3327 dictIterator
*di
= NULL
;
3332 time_t now
= time(NULL
);
3334 /* Wait for I/O therads to terminate, just in case this is a
3335 * foreground-saving, to avoid seeking the swap file descriptor at the
3337 if (server
.vm_enabled
)
3338 waitEmptyIOJobsQueue();
3340 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3341 fp
= fopen(tmpfile
,"w");
3343 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3346 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3347 for (j
= 0; j
< server
.dbnum
; j
++) {
3348 redisDb
*db
= server
.db
+j
;
3350 if (dictSize(d
) == 0) continue;
3351 di
= dictGetIterator(d
);
3357 /* Write the SELECT DB opcode */
3358 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3359 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3361 /* Iterate this DB writing every entry */
3362 while((de
= dictNext(di
)) != NULL
) {
3363 robj
*key
= dictGetEntryKey(de
);
3364 robj
*o
= dictGetEntryVal(de
);
3365 time_t expiretime
= getExpire(db
,key
);
3367 /* Save the expire time */
3368 if (expiretime
!= -1) {
3369 /* If this key is already expired skip it */
3370 if (expiretime
< now
) continue;
3371 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3372 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3374 /* Save the key and associated value. This requires special
3375 * handling if the value is swapped out. */
3376 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3377 key
->storage
== REDIS_VM_SWAPPING
) {
3378 /* Save type, key, value */
3379 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3380 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3381 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3383 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3385 /* Get a preview of the object in memory */
3386 po
= vmPreviewObject(key
);
3387 /* Save type, key, value */
3388 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3389 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3390 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3391 /* Remove the loaded object from memory */
3395 dictReleaseIterator(di
);
3398 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3400 /* Make sure data will not remain on the OS's output buffers */
3405 /* Use RENAME to make sure the DB file is changed atomically only
3406 * if the generate DB file is ok. */
3407 if (rename(tmpfile
,filename
) == -1) {
3408 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3412 redisLog(REDIS_NOTICE
,"DB saved on disk");
3414 server
.lastsave
= time(NULL
);
3420 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3421 if (di
) dictReleaseIterator(di
);
3425 static int rdbSaveBackground(char *filename
) {
3428 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3429 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3430 if ((childpid
= fork()) == 0) {
3432 if (server
.vm_enabled
) vmReopenSwapFile();
3434 if (rdbSave(filename
) == REDIS_OK
) {
3441 if (childpid
== -1) {
3442 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3446 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3447 server
.bgsavechildpid
= childpid
;
3450 return REDIS_OK
; /* unreached */
3453 static void rdbRemoveTempFile(pid_t childpid
) {
3456 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3460 static int rdbLoadType(FILE *fp
) {
3462 if (fread(&type
,1,1,fp
) == 0) return -1;
3466 static time_t rdbLoadTime(FILE *fp
) {
3468 if (fread(&t32
,4,1,fp
) == 0) return -1;
3469 return (time_t) t32
;
3472 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3473 * of this file for a description of how this are stored on disk.
3475 * isencoded is set to 1 if the readed length is not actually a length but
3476 * an "encoding type", check the above comments for more info */
3477 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3478 unsigned char buf
[2];
3482 if (isencoded
) *isencoded
= 0;
3483 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3484 type
= (buf
[0]&0xC0)>>6;
3485 if (type
== REDIS_RDB_6BITLEN
) {
3486 /* Read a 6 bit len */
3488 } else if (type
== REDIS_RDB_ENCVAL
) {
3489 /* Read a 6 bit len encoding type */
3490 if (isencoded
) *isencoded
= 1;
3492 } else if (type
== REDIS_RDB_14BITLEN
) {
3493 /* Read a 14 bit len */
3494 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3495 return ((buf
[0]&0x3F)<<8)|buf
[1];
3497 /* Read a 32 bit len */
3498 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3503 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3504 unsigned char enc
[4];
3507 if (enctype
== REDIS_RDB_ENC_INT8
) {
3508 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3509 val
= (signed char)enc
[0];
3510 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3512 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3513 v
= enc
[0]|(enc
[1]<<8);
3515 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3517 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3518 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3521 val
= 0; /* anti-warning */
3524 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3527 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3528 unsigned int len
, clen
;
3529 unsigned char *c
= NULL
;
3532 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3533 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3534 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3535 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3536 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3537 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3539 return createObject(REDIS_STRING
,val
);
3546 static robj
*rdbLoadStringObject(FILE*fp
) {
3551 len
= rdbLoadLen(fp
,&isencoded
);
3554 case REDIS_RDB_ENC_INT8
:
3555 case REDIS_RDB_ENC_INT16
:
3556 case REDIS_RDB_ENC_INT32
:
3557 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3558 case REDIS_RDB_ENC_LZF
:
3559 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3565 if (len
== REDIS_RDB_LENERR
) return NULL
;
3566 val
= sdsnewlen(NULL
,len
);
3567 if (len
&& fread(val
,len
,1,fp
) == 0) {
3571 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3574 /* For information about double serialization check rdbSaveDoubleValue() */
3575 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3579 if (fread(&len
,1,1,fp
) == 0) return -1;
3581 case 255: *val
= R_NegInf
; return 0;
3582 case 254: *val
= R_PosInf
; return 0;
3583 case 253: *val
= R_Nan
; return 0;
3585 if (fread(buf
,len
,1,fp
) == 0) return -1;
3587 sscanf(buf
, "%lg", val
);
3592 /* Load a Redis object of the specified type from the specified file.
3593 * On success a newly allocated object is returned, otherwise NULL. */
3594 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3597 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3598 if (type
== REDIS_STRING
) {
3599 /* Read string value */
3600 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3601 tryObjectEncoding(o
);
3602 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3603 /* Read list/set value */
3606 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3607 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3608 /* It's faster to expand the dict to the right size asap in order
3609 * to avoid rehashing */
3610 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3611 dictExpand(o
->ptr
,listlen
);
3612 /* Load every single element of the list/set */
3616 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3617 tryObjectEncoding(ele
);
3618 if (type
== REDIS_LIST
) {
3619 listAddNodeTail((list
*)o
->ptr
,ele
);
3621 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3624 } else if (type
== REDIS_ZSET
) {
3625 /* Read list/set value */
3629 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3630 o
= createZsetObject();
3632 /* Load every single element of the list/set */
3635 double *score
= zmalloc(sizeof(double));
3637 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3638 tryObjectEncoding(ele
);
3639 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3640 dictAdd(zs
->dict
,ele
,score
);
3641 zslInsert(zs
->zsl
,*score
,ele
);
3642 incrRefCount(ele
); /* added to skiplist */
3644 } else if (type
== REDIS_HASH
) {
3647 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3648 o
= createHashObject();
3649 /* Too many entries? Use an hash table. */
3650 if (hashlen
> server
.hash_max_zipmap_entries
)
3651 convertToRealHash(o
);
3652 /* Load every key/value, then set it into the zipmap or hash
3653 * table, as needed. */
3657 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3658 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3659 /* If we are using a zipmap and there are too big values
3660 * the object is converted to real hash table encoding. */
3661 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3662 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3663 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3665 convertToRealHash(o
);
3668 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3669 unsigned char *zm
= o
->ptr
;
3671 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3672 val
->ptr
,sdslen(val
->ptr
),NULL
);
3677 tryObjectEncoding(key
);
3678 tryObjectEncoding(val
);
3679 dictAdd((dict
*)o
->ptr
,key
,val
);
3688 static int rdbLoad(char *filename
) {
3690 robj
*keyobj
= NULL
;
3692 int type
, retval
, rdbver
;
3693 dict
*d
= server
.db
[0].dict
;
3694 redisDb
*db
= server
.db
+0;
3696 time_t expiretime
= -1, now
= time(NULL
);
3697 long long loadedkeys
= 0;
3699 fp
= fopen(filename
,"r");
3700 if (!fp
) return REDIS_ERR
;
3701 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3703 if (memcmp(buf
,"REDIS",5) != 0) {
3705 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3708 rdbver
= atoi(buf
+5);
3711 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3718 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3719 if (type
== REDIS_EXPIRETIME
) {
3720 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3721 /* We read the time so we need to read the object type again */
3722 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3724 if (type
== REDIS_EOF
) break;
3725 /* Handle SELECT DB opcode as a special case */
3726 if (type
== REDIS_SELECTDB
) {
3727 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3729 if (dbid
>= (unsigned)server
.dbnum
) {
3730 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3733 db
= server
.db
+dbid
;
3738 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3740 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3741 /* Add the new object in the hash table */
3742 retval
= dictAdd(d
,keyobj
,o
);
3743 if (retval
== DICT_ERR
) {
3744 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3747 /* Set the expire time if needed */
3748 if (expiretime
!= -1) {
3749 setExpire(db
,keyobj
,expiretime
);
3750 /* Delete this key if already expired */
3751 if (expiretime
< now
) deleteKey(db
,keyobj
);
3755 /* Handle swapping while loading big datasets when VM is on */
3757 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3758 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3759 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3766 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3767 if (keyobj
) decrRefCount(keyobj
);
3768 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3770 return REDIS_ERR
; /* Just to avoid warning */
3773 /*================================== Commands =============================== */
3775 static void authCommand(redisClient
*c
) {
3776 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3777 c
->authenticated
= 1;
3778 addReply(c
,shared
.ok
);
3780 c
->authenticated
= 0;
3781 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3785 static void pingCommand(redisClient
*c
) {
3786 addReply(c
,shared
.pong
);
3789 static void echoCommand(redisClient
*c
) {
3790 addReplyBulk(c
,c
->argv
[1]);
3793 /*=================================== Strings =============================== */
3795 static void setGenericCommand(redisClient
*c
, int nx
) {
3798 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3799 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3800 if (retval
== DICT_ERR
) {
3802 /* If the key is about a swapped value, we want a new key object
3803 * to overwrite the old. So we delete the old key in the database.
3804 * This will also make sure that swap pages about the old object
3805 * will be marked as free. */
3806 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3807 incrRefCount(c
->argv
[1]);
3808 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3809 incrRefCount(c
->argv
[2]);
3811 addReply(c
,shared
.czero
);
3815 incrRefCount(c
->argv
[1]);
3816 incrRefCount(c
->argv
[2]);
3819 removeExpire(c
->db
,c
->argv
[1]);
3820 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3823 static void setCommand(redisClient
*c
) {
3824 setGenericCommand(c
,0);
3827 static void setnxCommand(redisClient
*c
) {
3828 setGenericCommand(c
,1);
3831 static int getGenericCommand(redisClient
*c
) {
3834 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3837 if (o
->type
!= REDIS_STRING
) {
3838 addReply(c
,shared
.wrongtypeerr
);
3846 static void getCommand(redisClient
*c
) {
3847 getGenericCommand(c
);
3850 static void getsetCommand(redisClient
*c
) {
3851 if (getGenericCommand(c
) == REDIS_ERR
) return;
3852 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3853 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3855 incrRefCount(c
->argv
[1]);
3857 incrRefCount(c
->argv
[2]);
3859 removeExpire(c
->db
,c
->argv
[1]);
3862 static void mgetCommand(redisClient
*c
) {
3865 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3866 for (j
= 1; j
< c
->argc
; j
++) {
3867 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3869 addReply(c
,shared
.nullbulk
);
3871 if (o
->type
!= REDIS_STRING
) {
3872 addReply(c
,shared
.nullbulk
);
3880 static void msetGenericCommand(redisClient
*c
, int nx
) {
3881 int j
, busykeys
= 0;
3883 if ((c
->argc
% 2) == 0) {
3884 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3887 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3888 * set nothing at all if at least one already key exists. */
3890 for (j
= 1; j
< c
->argc
; j
+= 2) {
3891 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3897 addReply(c
, shared
.czero
);
3901 for (j
= 1; j
< c
->argc
; j
+= 2) {
3904 tryObjectEncoding(c
->argv
[j
+1]);
3905 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3906 if (retval
== DICT_ERR
) {
3907 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3908 incrRefCount(c
->argv
[j
+1]);
3910 incrRefCount(c
->argv
[j
]);
3911 incrRefCount(c
->argv
[j
+1]);
3913 removeExpire(c
->db
,c
->argv
[j
]);
3915 server
.dirty
+= (c
->argc
-1)/2;
3916 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3919 static void msetCommand(redisClient
*c
) {
3920 msetGenericCommand(c
,0);
3923 static void msetnxCommand(redisClient
*c
) {
3924 msetGenericCommand(c
,1);
3927 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3932 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3936 if (o
->type
!= REDIS_STRING
) {
3941 if (o
->encoding
== REDIS_ENCODING_RAW
)
3942 value
= strtoll(o
->ptr
, &eptr
, 10);
3943 else if (o
->encoding
== REDIS_ENCODING_INT
)
3944 value
= (long)o
->ptr
;
3946 redisAssert(1 != 1);
3951 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3952 tryObjectEncoding(o
);
3953 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3954 if (retval
== DICT_ERR
) {
3955 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3956 removeExpire(c
->db
,c
->argv
[1]);
3958 incrRefCount(c
->argv
[1]);
3961 addReply(c
,shared
.colon
);
3963 addReply(c
,shared
.crlf
);
3966 static void incrCommand(redisClient
*c
) {
3967 incrDecrCommand(c
,1);
3970 static void decrCommand(redisClient
*c
) {
3971 incrDecrCommand(c
,-1);
3974 static void incrbyCommand(redisClient
*c
) {
3975 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3976 incrDecrCommand(c
,incr
);
3979 static void decrbyCommand(redisClient
*c
) {
3980 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3981 incrDecrCommand(c
,-incr
);
3984 static void appendCommand(redisClient
*c
) {
3989 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3991 /* Create the key */
3992 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3993 incrRefCount(c
->argv
[1]);
3994 incrRefCount(c
->argv
[2]);
3995 totlen
= stringObjectLen(c
->argv
[2]);
3999 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4002 o
= dictGetEntryVal(de
);
4003 if (o
->type
!= REDIS_STRING
) {
4004 addReply(c
,shared
.wrongtypeerr
);
4007 /* If the object is specially encoded or shared we have to make
4009 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4010 robj
*decoded
= getDecodedObject(o
);
4012 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4013 decrRefCount(decoded
);
4014 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4017 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4018 o
->ptr
= sdscatlen(o
->ptr
,
4019 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4021 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4022 (unsigned long) c
->argv
[2]->ptr
);
4024 totlen
= sdslen(o
->ptr
);
4027 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4030 static void substrCommand(redisClient
*c
) {
4032 long start
= atoi(c
->argv
[2]->ptr
);
4033 long end
= atoi(c
->argv
[3]->ptr
);
4034 size_t rangelen
, strlen
;
4037 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4038 checkType(c
,o
,REDIS_STRING
)) return;
4040 o
= getDecodedObject(o
);
4041 strlen
= sdslen(o
->ptr
);
4043 /* convert negative indexes */
4044 if (start
< 0) start
= strlen
+start
;
4045 if (end
< 0) end
= strlen
+end
;
4046 if (start
< 0) start
= 0;
4047 if (end
< 0) end
= 0;
4049 /* indexes sanity checks */
4050 if (start
> end
|| (size_t)start
>= strlen
) {
4051 /* Out of range start or start > end result in null reply */
4052 addReply(c
,shared
.nullbulk
);
4056 if ((size_t)end
>= strlen
) end
= strlen
-1;
4057 rangelen
= (end
-start
)+1;
4059 /* Return the result */
4060 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4061 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4062 addReplySds(c
,range
);
4063 addReply(c
,shared
.crlf
);
4067 /* ========================= Type agnostic commands ========================= */
4069 static void delCommand(redisClient
*c
) {
4072 for (j
= 1; j
< c
->argc
; j
++) {
4073 if (deleteKey(c
->db
,c
->argv
[j
])) {
4078 addReplyLong(c
,deleted
);
4081 static void existsCommand(redisClient
*c
) {
4082 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4085 static void selectCommand(redisClient
*c
) {
4086 int id
= atoi(c
->argv
[1]->ptr
);
4088 if (selectDb(c
,id
) == REDIS_ERR
) {
4089 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4091 addReply(c
,shared
.ok
);
4095 static void randomkeyCommand(redisClient
*c
) {
4099 de
= dictGetRandomKey(c
->db
->dict
);
4100 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4103 addReply(c
,shared
.plus
);
4104 addReply(c
,shared
.crlf
);
4106 addReply(c
,shared
.plus
);
4107 addReply(c
,dictGetEntryKey(de
));
4108 addReply(c
,shared
.crlf
);
4112 static void keysCommand(redisClient
*c
) {
4115 sds pattern
= c
->argv
[1]->ptr
;
4116 int plen
= sdslen(pattern
);
4117 unsigned long numkeys
= 0;
4118 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4120 di
= dictGetIterator(c
->db
->dict
);
4122 decrRefCount(lenobj
);
4123 while((de
= dictNext(di
)) != NULL
) {
4124 robj
*keyobj
= dictGetEntryKey(de
);
4126 sds key
= keyobj
->ptr
;
4127 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4128 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4129 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4130 addReplyBulk(c
,keyobj
);
4135 dictReleaseIterator(di
);
4136 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4139 static void dbsizeCommand(redisClient
*c
) {
4141 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4144 static void lastsaveCommand(redisClient
*c
) {
4146 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4149 static void typeCommand(redisClient
*c
) {
4153 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4158 case REDIS_STRING
: type
= "+string"; break;
4159 case REDIS_LIST
: type
= "+list"; break;
4160 case REDIS_SET
: type
= "+set"; break;
4161 case REDIS_ZSET
: type
= "+zset"; break;
4162 case REDIS_HASH
: type
= "+hash"; break;
4163 default: type
= "+unknown"; break;
4166 addReplySds(c
,sdsnew(type
));
4167 addReply(c
,shared
.crlf
);
4170 static void saveCommand(redisClient
*c
) {
4171 if (server
.bgsavechildpid
!= -1) {
4172 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4175 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4176 addReply(c
,shared
.ok
);
4178 addReply(c
,shared
.err
);
4182 static void bgsaveCommand(redisClient
*c
) {
4183 if (server
.bgsavechildpid
!= -1) {
4184 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4187 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4188 char *status
= "+Background saving started\r\n";
4189 addReplySds(c
,sdsnew(status
));
4191 addReply(c
,shared
.err
);
4195 static void shutdownCommand(redisClient
*c
) {
4196 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4197 /* Kill the saving child if there is a background saving in progress.
4198 We want to avoid race conditions, for instance our saving child may
4199 overwrite the synchronous saving did by SHUTDOWN. */
4200 if (server
.bgsavechildpid
!= -1) {
4201 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4202 kill(server
.bgsavechildpid
,SIGKILL
);
4203 rdbRemoveTempFile(server
.bgsavechildpid
);
4205 if (server
.appendonly
) {
4206 /* Append only file: fsync() the AOF and exit */
4207 fsync(server
.appendfd
);
4208 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4211 /* Snapshotting. Perform a SYNC SAVE and exit */
4212 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4213 if (server
.daemonize
)
4214 unlink(server
.pidfile
);
4215 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4216 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4217 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4220 /* Ooops.. error saving! The best we can do is to continue
4221 * operating. Note that if there was a background saving process,
4222 * in the next cron() Redis will be notified that the background
4223 * saving aborted, handling special stuff like slaves pending for
4224 * synchronization... */
4225 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4227 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4232 static void renameGenericCommand(redisClient
*c
, int nx
) {
4235 /* To use the same key as src and dst is probably an error */
4236 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4237 addReply(c
,shared
.sameobjecterr
);
4241 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4245 deleteIfVolatile(c
->db
,c
->argv
[2]);
4246 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4249 addReply(c
,shared
.czero
);
4252 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4254 incrRefCount(c
->argv
[2]);
4256 deleteKey(c
->db
,c
->argv
[1]);
4258 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4261 static void renameCommand(redisClient
*c
) {
4262 renameGenericCommand(c
,0);
4265 static void renamenxCommand(redisClient
*c
) {
4266 renameGenericCommand(c
,1);
4269 static void moveCommand(redisClient
*c
) {
4274 /* Obtain source and target DB pointers */
4277 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4278 addReply(c
,shared
.outofrangeerr
);
4282 selectDb(c
,srcid
); /* Back to the source DB */
4284 /* If the user is moving using as target the same
4285 * DB as the source DB it is probably an error. */
4287 addReply(c
,shared
.sameobjecterr
);
4291 /* Check if the element exists and get a reference */
4292 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4294 addReply(c
,shared
.czero
);
4298 /* Try to add the element to the target DB */
4299 deleteIfVolatile(dst
,c
->argv
[1]);
4300 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4301 addReply(c
,shared
.czero
);
4304 incrRefCount(c
->argv
[1]);
4307 /* OK! key moved, free the entry in the source DB */
4308 deleteKey(src
,c
->argv
[1]);
4310 addReply(c
,shared
.cone
);
4313 /* =================================== Lists ================================ */
4314 static void pushGenericCommand(redisClient
*c
, int where
) {
4318 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4320 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4321 addReply(c
,shared
.cone
);
4324 lobj
= createListObject();
4326 if (where
== REDIS_HEAD
) {
4327 listAddNodeHead(list
,c
->argv
[2]);
4329 listAddNodeTail(list
,c
->argv
[2]);
4331 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4332 incrRefCount(c
->argv
[1]);
4333 incrRefCount(c
->argv
[2]);
4335 if (lobj
->type
!= REDIS_LIST
) {
4336 addReply(c
,shared
.wrongtypeerr
);
4339 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4340 addReply(c
,shared
.cone
);
4344 if (where
== REDIS_HEAD
) {
4345 listAddNodeHead(list
,c
->argv
[2]);
4347 listAddNodeTail(list
,c
->argv
[2]);
4349 incrRefCount(c
->argv
[2]);
4352 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4355 static void lpushCommand(redisClient
*c
) {
4356 pushGenericCommand(c
,REDIS_HEAD
);
4359 static void rpushCommand(redisClient
*c
) {
4360 pushGenericCommand(c
,REDIS_TAIL
);
4363 static void llenCommand(redisClient
*c
) {
4367 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4368 checkType(c
,o
,REDIS_LIST
)) return;
4371 addReplyUlong(c
,listLength(l
));
4374 static void lindexCommand(redisClient
*c
) {
4376 int index
= atoi(c
->argv
[2]->ptr
);
4380 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4381 checkType(c
,o
,REDIS_LIST
)) return;
4384 ln
= listIndex(list
, index
);
4386 addReply(c
,shared
.nullbulk
);
4388 robj
*ele
= listNodeValue(ln
);
4389 addReplyBulk(c
,ele
);
4393 static void lsetCommand(redisClient
*c
) {
4395 int index
= atoi(c
->argv
[2]->ptr
);
4399 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4400 checkType(c
,o
,REDIS_LIST
)) return;
4403 ln
= listIndex(list
, index
);
4405 addReply(c
,shared
.outofrangeerr
);
4407 robj
*ele
= listNodeValue(ln
);
4410 listNodeValue(ln
) = c
->argv
[3];
4411 incrRefCount(c
->argv
[3]);
4412 addReply(c
,shared
.ok
);
4417 static void popGenericCommand(redisClient
*c
, int where
) {
4422 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4423 checkType(c
,o
,REDIS_LIST
)) return;
4426 if (where
== REDIS_HEAD
)
4427 ln
= listFirst(list
);
4429 ln
= listLast(list
);
4432 addReply(c
,shared
.nullbulk
);
4434 robj
*ele
= listNodeValue(ln
);
4435 addReplyBulk(c
,ele
);
4436 listDelNode(list
,ln
);
4437 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4442 static void lpopCommand(redisClient
*c
) {
4443 popGenericCommand(c
,REDIS_HEAD
);
4446 static void rpopCommand(redisClient
*c
) {
4447 popGenericCommand(c
,REDIS_TAIL
);
4450 static void lrangeCommand(redisClient
*c
) {
4452 int start
= atoi(c
->argv
[2]->ptr
);
4453 int end
= atoi(c
->argv
[3]->ptr
);
4460 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4461 checkType(c
,o
,REDIS_LIST
)) return;
4463 llen
= listLength(list
);
4465 /* convert negative indexes */
4466 if (start
< 0) start
= llen
+start
;
4467 if (end
< 0) end
= llen
+end
;
4468 if (start
< 0) start
= 0;
4469 if (end
< 0) end
= 0;
4471 /* indexes sanity checks */
4472 if (start
> end
|| start
>= llen
) {
4473 /* Out of range start or start > end result in empty list */
4474 addReply(c
,shared
.emptymultibulk
);
4477 if (end
>= llen
) end
= llen
-1;
4478 rangelen
= (end
-start
)+1;
4480 /* Return the result in form of a multi-bulk reply */
4481 ln
= listIndex(list
, start
);
4482 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4483 for (j
= 0; j
< rangelen
; j
++) {
4484 ele
= listNodeValue(ln
);
4485 addReplyBulk(c
,ele
);
4490 static void ltrimCommand(redisClient
*c
) {
4492 int start
= atoi(c
->argv
[2]->ptr
);
4493 int end
= atoi(c
->argv
[3]->ptr
);
4495 int j
, ltrim
, rtrim
;
4499 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4500 checkType(c
,o
,REDIS_LIST
)) return;
4502 llen
= listLength(list
);
4504 /* convert negative indexes */
4505 if (start
< 0) start
= llen
+start
;
4506 if (end
< 0) end
= llen
+end
;
4507 if (start
< 0) start
= 0;
4508 if (end
< 0) end
= 0;
4510 /* indexes sanity checks */
4511 if (start
> end
|| start
>= llen
) {
4512 /* Out of range start or start > end result in empty list */
4516 if (end
>= llen
) end
= llen
-1;
4521 /* Remove list elements to perform the trim */
4522 for (j
= 0; j
< ltrim
; j
++) {
4523 ln
= listFirst(list
);
4524 listDelNode(list
,ln
);
4526 for (j
= 0; j
< rtrim
; j
++) {
4527 ln
= listLast(list
);
4528 listDelNode(list
,ln
);
4530 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4532 addReply(c
,shared
.ok
);
4535 static void lremCommand(redisClient
*c
) {
4538 listNode
*ln
, *next
;
4539 int toremove
= atoi(c
->argv
[2]->ptr
);
4543 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4544 checkType(c
,o
,REDIS_LIST
)) return;
4548 toremove
= -toremove
;
4551 ln
= fromtail
? list
->tail
: list
->head
;
4553 robj
*ele
= listNodeValue(ln
);
4555 next
= fromtail
? ln
->prev
: ln
->next
;
4556 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4557 listDelNode(list
,ln
);
4560 if (toremove
&& removed
== toremove
) break;
4564 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4565 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4568 /* This is the semantic of this command:
4569 * RPOPLPUSH srclist dstlist:
4570 * IF LLEN(srclist) > 0
4571 * element = RPOP srclist
4572 * LPUSH dstlist element
4579 * The idea is to be able to get an element from a list in a reliable way
4580 * since the element is not just returned but pushed against another list
4581 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4583 static void rpoplpushcommand(redisClient
*c
) {
4588 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4589 checkType(c
,sobj
,REDIS_LIST
)) return;
4590 srclist
= sobj
->ptr
;
4591 ln
= listLast(srclist
);
4594 addReply(c
,shared
.nullbulk
);
4596 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4597 robj
*ele
= listNodeValue(ln
);
4600 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4601 addReply(c
,shared
.wrongtypeerr
);
4605 /* Add the element to the target list (unless it's directly
4606 * passed to some BLPOP-ing client */
4607 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4609 /* Create the list if the key does not exist */
4610 dobj
= createListObject();
4611 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4612 incrRefCount(c
->argv
[2]);
4614 dstlist
= dobj
->ptr
;
4615 listAddNodeHead(dstlist
,ele
);
4619 /* Send the element to the client as reply as well */
4620 addReplyBulk(c
,ele
);
4622 /* Finally remove the element from the source list */
4623 listDelNode(srclist
,ln
);
4624 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4629 /* ==================================== Sets ================================ */
4631 static void saddCommand(redisClient
*c
) {
4634 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4636 set
= createSetObject();
4637 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4638 incrRefCount(c
->argv
[1]);
4640 if (set
->type
!= REDIS_SET
) {
4641 addReply(c
,shared
.wrongtypeerr
);
4645 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4646 incrRefCount(c
->argv
[2]);
4648 addReply(c
,shared
.cone
);
4650 addReply(c
,shared
.czero
);
4654 static void sremCommand(redisClient
*c
) {
4657 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4658 checkType(c
,set
,REDIS_SET
)) return;
4660 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4662 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4663 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4664 addReply(c
,shared
.cone
);
4666 addReply(c
,shared
.czero
);
4670 static void smoveCommand(redisClient
*c
) {
4671 robj
*srcset
, *dstset
;
4673 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4674 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4676 /* If the source key does not exist return 0, if it's of the wrong type
4678 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4679 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4682 /* Error if the destination key is not a set as well */
4683 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4684 addReply(c
,shared
.wrongtypeerr
);
4687 /* Remove the element from the source set */
4688 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4689 /* Key not found in the src set! return zero */
4690 addReply(c
,shared
.czero
);
4693 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4694 deleteKey(c
->db
,c
->argv
[1]);
4696 /* Add the element to the destination set */
4698 dstset
= createSetObject();
4699 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4700 incrRefCount(c
->argv
[2]);
4702 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4703 incrRefCount(c
->argv
[3]);
4704 addReply(c
,shared
.cone
);
4707 static void sismemberCommand(redisClient
*c
) {
4710 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4711 checkType(c
,set
,REDIS_SET
)) return;
4713 if (dictFind(set
->ptr
,c
->argv
[2]))
4714 addReply(c
,shared
.cone
);
4716 addReply(c
,shared
.czero
);
4719 static void scardCommand(redisClient
*c
) {
4723 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4724 checkType(c
,o
,REDIS_SET
)) return;
4727 addReplyUlong(c
,dictSize(s
));
4730 static void spopCommand(redisClient
*c
) {
4734 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4735 checkType(c
,set
,REDIS_SET
)) return;
4737 de
= dictGetRandomKey(set
->ptr
);
4739 addReply(c
,shared
.nullbulk
);
4741 robj
*ele
= dictGetEntryKey(de
);
4743 addReplyBulk(c
,ele
);
4744 dictDelete(set
->ptr
,ele
);
4745 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4746 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4751 static void srandmemberCommand(redisClient
*c
) {
4755 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4756 checkType(c
,set
,REDIS_SET
)) return;
4758 de
= dictGetRandomKey(set
->ptr
);
4760 addReply(c
,shared
.nullbulk
);
4762 robj
*ele
= dictGetEntryKey(de
);
4764 addReplyBulk(c
,ele
);
4768 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4769 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4771 return dictSize(*d1
)-dictSize(*d2
);
4774 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4775 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4778 robj
*lenobj
= NULL
, *dstset
= NULL
;
4779 unsigned long j
, cardinality
= 0;
4781 for (j
= 0; j
< setsnum
; j
++) {
4785 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4786 lookupKeyRead(c
->db
,setskeys
[j
]);
4790 if (deleteKey(c
->db
,dstkey
))
4792 addReply(c
,shared
.czero
);
4794 addReply(c
,shared
.nullmultibulk
);
4798 if (setobj
->type
!= REDIS_SET
) {
4800 addReply(c
,shared
.wrongtypeerr
);
4803 dv
[j
] = setobj
->ptr
;
4805 /* Sort sets from the smallest to largest, this will improve our
4806 * algorithm's performace */
4807 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4809 /* The first thing we should output is the total number of elements...
4810 * since this is a multi-bulk write, but at this stage we don't know
4811 * the intersection set size, so we use a trick, append an empty object
4812 * to the output list and save the pointer to later modify it with the
4815 lenobj
= createObject(REDIS_STRING
,NULL
);
4817 decrRefCount(lenobj
);
4819 /* If we have a target key where to store the resulting set
4820 * create this key with an empty set inside */
4821 dstset
= createSetObject();
4824 /* Iterate all the elements of the first (smallest) set, and test
4825 * the element against all the other sets, if at least one set does
4826 * not include the element it is discarded */
4827 di
= dictGetIterator(dv
[0]);
4829 while((de
= dictNext(di
)) != NULL
) {
4832 for (j
= 1; j
< setsnum
; j
++)
4833 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4835 continue; /* at least one set does not contain the member */
4836 ele
= dictGetEntryKey(de
);
4838 addReplyBulk(c
,ele
);
4841 dictAdd(dstset
->ptr
,ele
,NULL
);
4845 dictReleaseIterator(di
);
4848 /* Store the resulting set into the target, if the intersection
4849 * is not an empty set. */
4850 deleteKey(c
->db
,dstkey
);
4851 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4852 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4853 incrRefCount(dstkey
);
4854 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4856 decrRefCount(dstset
);
4857 addReply(c
,shared
.czero
);
4861 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4866 static void sinterCommand(redisClient
*c
) {
4867 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4870 static void sinterstoreCommand(redisClient
*c
) {
4871 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4874 #define REDIS_OP_UNION 0
4875 #define REDIS_OP_DIFF 1
4876 #define REDIS_OP_INTER 2
4878 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4879 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4882 robj
*dstset
= NULL
;
4883 int j
, cardinality
= 0;
4885 for (j
= 0; j
< setsnum
; j
++) {
4889 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4890 lookupKeyRead(c
->db
,setskeys
[j
]);
4895 if (setobj
->type
!= REDIS_SET
) {
4897 addReply(c
,shared
.wrongtypeerr
);
4900 dv
[j
] = setobj
->ptr
;
4903 /* We need a temp set object to store our union. If the dstkey
4904 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4905 * this set object will be the resulting object to set into the target key*/
4906 dstset
= createSetObject();
4908 /* Iterate all the elements of all the sets, add every element a single
4909 * time to the result set */
4910 for (j
= 0; j
< setsnum
; j
++) {
4911 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4912 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4914 di
= dictGetIterator(dv
[j
]);
4916 while((de
= dictNext(di
)) != NULL
) {
4919 /* dictAdd will not add the same element multiple times */
4920 ele
= dictGetEntryKey(de
);
4921 if (op
== REDIS_OP_UNION
|| j
== 0) {
4922 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4926 } else if (op
== REDIS_OP_DIFF
) {
4927 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4932 dictReleaseIterator(di
);
4934 /* result set is empty? Exit asap. */
4935 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4938 /* Output the content of the resulting set, if not in STORE mode */
4940 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4941 di
= dictGetIterator(dstset
->ptr
);
4942 while((de
= dictNext(di
)) != NULL
) {
4945 ele
= dictGetEntryKey(de
);
4946 addReplyBulk(c
,ele
);
4948 dictReleaseIterator(di
);
4949 decrRefCount(dstset
);
4951 /* If we have a target key where to store the resulting set
4952 * create this key with the result set inside */
4953 deleteKey(c
->db
,dstkey
);
4954 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4955 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4956 incrRefCount(dstkey
);
4957 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4959 decrRefCount(dstset
);
4960 addReply(c
,shared
.czero
);
4967 static void sunionCommand(redisClient
*c
) {
4968 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4971 static void sunionstoreCommand(redisClient
*c
) {
4972 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4975 static void sdiffCommand(redisClient
*c
) {
4976 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4979 static void sdiffstoreCommand(redisClient
*c
) {
4980 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4983 /* ==================================== ZSets =============================== */
4985 /* ZSETs are ordered sets using two data structures to hold the same elements
4986 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4989 * The elements are added to an hash table mapping Redis objects to scores.
4990 * At the same time the elements are added to a skip list mapping scores
4991 * to Redis objects (so objects are sorted by scores in this "view"). */
4993 /* This skiplist implementation is almost a C translation of the original
4994 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4995 * Alternative to Balanced Trees", modified in three ways:
4996 * a) this implementation allows for repeated values.
4997 * b) the comparison is not just by key (our 'score') but by satellite data.
4998 * c) there is a back pointer, so it's a doubly linked list with the back
4999 * pointers being only at "level 1". This allows to traverse the list
5000 * from tail to head, useful for ZREVRANGE. */
5002 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5003 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5005 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5007 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5013 static zskiplist
*zslCreate(void) {
5017 zsl
= zmalloc(sizeof(*zsl
));
5020 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5021 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5022 zsl
->header
->forward
[j
] = NULL
;
5024 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5025 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5026 zsl
->header
->span
[j
] = 0;
5028 zsl
->header
->backward
= NULL
;
5033 static void zslFreeNode(zskiplistNode
*node
) {
5034 decrRefCount(node
->obj
);
5035 zfree(node
->forward
);
5040 static void zslFree(zskiplist
*zsl
) {
5041 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5043 zfree(zsl
->header
->forward
);
5044 zfree(zsl
->header
->span
);
5047 next
= node
->forward
[0];
5054 static int zslRandomLevel(void) {
5056 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5061 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5062 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5063 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5067 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5068 /* store rank that is crossed to reach the insert position */
5069 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5071 while (x
->forward
[i
] &&
5072 (x
->forward
[i
]->score
< score
||
5073 (x
->forward
[i
]->score
== score
&&
5074 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5075 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5080 /* we assume the key is not already inside, since we allow duplicated
5081 * scores, and the re-insertion of score and redis object should never
5082 * happpen since the caller of zslInsert() should test in the hash table
5083 * if the element is already inside or not. */
5084 level
= zslRandomLevel();
5085 if (level
> zsl
->level
) {
5086 for (i
= zsl
->level
; i
< level
; i
++) {
5088 update
[i
] = zsl
->header
;
5089 update
[i
]->span
[i
-1] = zsl
->length
;
5093 x
= zslCreateNode(level
,score
,obj
);
5094 for (i
= 0; i
< level
; i
++) {
5095 x
->forward
[i
] = update
[i
]->forward
[i
];
5096 update
[i
]->forward
[i
] = x
;
5098 /* update span covered by update[i] as x is inserted here */
5100 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5101 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5105 /* increment span for untouched levels */
5106 for (i
= level
; i
< zsl
->level
; i
++) {
5107 update
[i
]->span
[i
-1]++;
5110 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5112 x
->forward
[0]->backward
= x
;
5118 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5119 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5121 for (i
= 0; i
< zsl
->level
; i
++) {
5122 if (update
[i
]->forward
[i
] == x
) {
5124 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5126 update
[i
]->forward
[i
] = x
->forward
[i
];
5128 /* invariant: i > 0, because update[0]->forward[0]
5129 * is always equal to x */
5130 update
[i
]->span
[i
-1] -= 1;
5133 if (x
->forward
[0]) {
5134 x
->forward
[0]->backward
= x
->backward
;
5136 zsl
->tail
= x
->backward
;
5138 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5143 /* Delete an element with matching score/object from the skiplist. */
5144 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5145 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5149 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5150 while (x
->forward
[i
] &&
5151 (x
->forward
[i
]->score
< score
||
5152 (x
->forward
[i
]->score
== score
&&
5153 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5157 /* We may have multiple elements with the same score, what we need
5158 * is to find the element with both the right score and object. */
5160 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5161 zslDeleteNode(zsl
, x
, update
);
5165 return 0; /* not found */
5167 return 0; /* not found */
5170 /* Delete all the elements with score between min and max from the skiplist.
5171 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5172 * Note that this function takes the reference to the hash table view of the
5173 * sorted set, in order to remove the elements from the hash table too. */
5174 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5175 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5176 unsigned long removed
= 0;
5180 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5181 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5185 /* We may have multiple elements with the same score, what we need
5186 * is to find the element with both the right score and object. */
5188 while (x
&& x
->score
<= max
) {
5189 zskiplistNode
*next
= x
->forward
[0];
5190 zslDeleteNode(zsl
, x
, update
);
5191 dictDelete(dict
,x
->obj
);
5196 return removed
; /* not found */
5199 /* Delete all the elements with rank between start and end from the skiplist.
5200 * Start and end are inclusive. Note that start and end need to be 1-based */
5201 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5202 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5203 unsigned long traversed
= 0, removed
= 0;
5207 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5208 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5209 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5217 while (x
&& traversed
<= end
) {
5218 zskiplistNode
*next
= x
->forward
[0];
5219 zslDeleteNode(zsl
, x
, update
);
5220 dictDelete(dict
,x
->obj
);
5229 /* Find the first node having a score equal or greater than the specified one.
5230 * Returns NULL if there is no match. */
5231 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5236 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5237 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5240 /* We may have multiple elements with the same score, what we need
5241 * is to find the element with both the right score and object. */
5242 return x
->forward
[0];
5245 /* Find the rank for an element by both score and key.
5246 * Returns 0 when the element cannot be found, rank otherwise.
5247 * Note that the rank is 1-based due to the span of zsl->header to the
5249 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5251 unsigned long rank
= 0;
5255 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5256 while (x
->forward
[i
] &&
5257 (x
->forward
[i
]->score
< score
||
5258 (x
->forward
[i
]->score
== score
&&
5259 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5260 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5264 /* x might be equal to zsl->header, so test if obj is non-NULL */
5265 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5272 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5273 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5275 unsigned long traversed
= 0;
5279 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5280 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5282 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5285 if (traversed
== rank
) {
5292 /* The actual Z-commands implementations */
5294 /* This generic command implements both ZADD and ZINCRBY.
5295 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5296 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5297 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5302 zsetobj
= lookupKeyWrite(c
->db
,key
);
5303 if (zsetobj
== NULL
) {
5304 zsetobj
= createZsetObject();
5305 dictAdd(c
->db
->dict
,key
,zsetobj
);
5308 if (zsetobj
->type
!= REDIS_ZSET
) {
5309 addReply(c
,shared
.wrongtypeerr
);
5315 /* Ok now since we implement both ZADD and ZINCRBY here the code
5316 * needs to handle the two different conditions. It's all about setting
5317 * '*score', that is, the new score to set, to the right value. */
5318 score
= zmalloc(sizeof(double));
5322 /* Read the old score. If the element was not present starts from 0 */
5323 de
= dictFind(zs
->dict
,ele
);
5325 double *oldscore
= dictGetEntryVal(de
);
5326 *score
= *oldscore
+ scoreval
;
5334 /* What follows is a simple remove and re-insert operation that is common
5335 * to both ZADD and ZINCRBY... */
5336 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5337 /* case 1: New element */
5338 incrRefCount(ele
); /* added to hash */
5339 zslInsert(zs
->zsl
,*score
,ele
);
5340 incrRefCount(ele
); /* added to skiplist */
5343 addReplyDouble(c
,*score
);
5345 addReply(c
,shared
.cone
);
5350 /* case 2: Score update operation */
5351 de
= dictFind(zs
->dict
,ele
);
5352 redisAssert(de
!= NULL
);
5353 oldscore
= dictGetEntryVal(de
);
5354 if (*score
!= *oldscore
) {
5357 /* Remove and insert the element in the skip list with new score */
5358 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5359 redisAssert(deleted
!= 0);
5360 zslInsert(zs
->zsl
,*score
,ele
);
5362 /* Update the score in the hash table */
5363 dictReplace(zs
->dict
,ele
,score
);
5369 addReplyDouble(c
,*score
);
5371 addReply(c
,shared
.czero
);
5375 static void zaddCommand(redisClient
*c
) {
5378 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5379 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5382 static void zincrbyCommand(redisClient
*c
) {
5385 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5386 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5389 static void zremCommand(redisClient
*c
) {
5396 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5397 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5400 de
= dictFind(zs
->dict
,c
->argv
[2]);
5402 addReply(c
,shared
.czero
);
5405 /* Delete from the skiplist */
5406 oldscore
= dictGetEntryVal(de
);
5407 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5408 redisAssert(deleted
!= 0);
5410 /* Delete from the hash table */
5411 dictDelete(zs
->dict
,c
->argv
[2]);
5412 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5413 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5415 addReply(c
,shared
.cone
);
5418 static void zremrangebyscoreCommand(redisClient
*c
) {
5419 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5420 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5425 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5426 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5429 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5430 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5431 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5432 server
.dirty
+= deleted
;
5433 addReplyLong(c
,deleted
);
5436 static void zremrangebyrankCommand(redisClient
*c
) {
5437 int start
= atoi(c
->argv
[2]->ptr
);
5438 int end
= atoi(c
->argv
[3]->ptr
);
5444 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5445 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5447 llen
= zs
->zsl
->length
;
5449 /* convert negative indexes */
5450 if (start
< 0) start
= llen
+start
;
5451 if (end
< 0) end
= llen
+end
;
5452 if (start
< 0) start
= 0;
5453 if (end
< 0) end
= 0;
5455 /* indexes sanity checks */
5456 if (start
> end
|| start
>= llen
) {
5457 addReply(c
,shared
.czero
);
5460 if (end
>= llen
) end
= llen
-1;
5462 /* increment start and end because zsl*Rank functions
5463 * use 1-based rank */
5464 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5465 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5466 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5467 server
.dirty
+= deleted
;
5468 addReplyLong(c
, deleted
);
5476 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5477 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5478 unsigned long size1
, size2
;
5479 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5480 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5481 return size1
- size2
;
5484 #define REDIS_AGGR_SUM 1
5485 #define REDIS_AGGR_MIN 2
5486 #define REDIS_AGGR_MAX 3
5488 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5489 if (aggregate
== REDIS_AGGR_SUM
) {
5490 *target
= *target
+ val
;
5491 } else if (aggregate
== REDIS_AGGR_MIN
) {
5492 *target
= val
< *target
? val
: *target
;
5493 } else if (aggregate
== REDIS_AGGR_MAX
) {
5494 *target
= val
> *target
? val
: *target
;
5497 redisAssert(0 != 0);
5501 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5503 int aggregate
= REDIS_AGGR_SUM
;
5510 /* expect zsetnum input keys to be given */
5511 zsetnum
= atoi(c
->argv
[2]->ptr
);
5513 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5517 /* test if the expected number of keys would overflow */
5518 if (3+zsetnum
> c
->argc
) {
5519 addReply(c
,shared
.syntaxerr
);
5523 /* read keys to be used for input */
5524 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5525 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5526 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5530 if (zsetobj
->type
!= REDIS_ZSET
) {
5532 addReply(c
,shared
.wrongtypeerr
);
5535 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5538 /* default all weights to 1 */
5539 src
[i
].weight
= 1.0;
5542 /* parse optional extra arguments */
5544 int remaining
= c
->argc
- j
;
5547 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5549 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5550 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5552 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5554 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5555 aggregate
= REDIS_AGGR_SUM
;
5556 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5557 aggregate
= REDIS_AGGR_MIN
;
5558 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5559 aggregate
= REDIS_AGGR_MAX
;
5562 addReply(c
,shared
.syntaxerr
);
5568 addReply(c
,shared
.syntaxerr
);
5574 /* sort sets from the smallest to largest, this will improve our
5575 * algorithm's performance */
5576 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5578 dstobj
= createZsetObject();
5579 dstzset
= dstobj
->ptr
;
5581 if (op
== REDIS_OP_INTER
) {
5582 /* skip going over all entries if the smallest zset is NULL or empty */
5583 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5584 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5585 * from small to large, all src[i > 0].dict are non-empty too */
5586 di
= dictGetIterator(src
[0].dict
);
5587 while((de
= dictNext(di
)) != NULL
) {
5588 double *score
= zmalloc(sizeof(double)), value
;
5589 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5591 for (j
= 1; j
< zsetnum
; j
++) {
5592 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5594 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5595 zunionInterAggregate(score
, value
, aggregate
);
5601 /* skip entry when not present in every source dict */
5605 robj
*o
= dictGetEntryKey(de
);
5606 dictAdd(dstzset
->dict
,o
,score
);
5607 incrRefCount(o
); /* added to dictionary */
5608 zslInsert(dstzset
->zsl
,*score
,o
);
5609 incrRefCount(o
); /* added to skiplist */
5612 dictReleaseIterator(di
);
5614 } else if (op
== REDIS_OP_UNION
) {
5615 for (i
= 0; i
< zsetnum
; i
++) {
5616 if (!src
[i
].dict
) continue;
5618 di
= dictGetIterator(src
[i
].dict
);
5619 while((de
= dictNext(di
)) != NULL
) {
5620 /* skip key when already processed */
5621 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5623 double *score
= zmalloc(sizeof(double)), value
;
5624 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5626 /* because the zsets are sorted by size, its only possible
5627 * for sets at larger indices to hold this entry */
5628 for (j
= (i
+1); j
< zsetnum
; j
++) {
5629 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5631 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5632 zunionInterAggregate(score
, value
, aggregate
);
5636 robj
*o
= dictGetEntryKey(de
);
5637 dictAdd(dstzset
->dict
,o
,score
);
5638 incrRefCount(o
); /* added to dictionary */
5639 zslInsert(dstzset
->zsl
,*score
,o
);
5640 incrRefCount(o
); /* added to skiplist */
5642 dictReleaseIterator(di
);
5645 /* unknown operator */
5646 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5649 deleteKey(c
->db
,dstkey
);
5650 if (dstzset
->zsl
->length
) {
5651 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5652 incrRefCount(dstkey
);
5653 addReplyLong(c
, dstzset
->zsl
->length
);
5656 decrRefCount(dstzset
);
5657 addReply(c
, shared
.czero
);
5662 static void zunionCommand(redisClient
*c
) {
5663 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5666 static void zinterCommand(redisClient
*c
) {
5667 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5670 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5672 int start
= atoi(c
->argv
[2]->ptr
);
5673 int end
= atoi(c
->argv
[3]->ptr
);
5682 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5684 } else if (c
->argc
>= 5) {
5685 addReply(c
,shared
.syntaxerr
);
5689 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5690 checkType(c
,o
,REDIS_ZSET
)) return;
5695 /* convert negative indexes */
5696 if (start
< 0) start
= llen
+start
;
5697 if (end
< 0) end
= llen
+end
;
5698 if (start
< 0) start
= 0;
5699 if (end
< 0) end
= 0;
5701 /* indexes sanity checks */
5702 if (start
> end
|| start
>= llen
) {
5703 /* Out of range start or start > end result in empty list */
5704 addReply(c
,shared
.emptymultibulk
);
5707 if (end
>= llen
) end
= llen
-1;
5708 rangelen
= (end
-start
)+1;
5710 /* check if starting point is trivial, before searching
5711 * the element in log(N) time */
5713 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5716 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5719 /* Return the result in form of a multi-bulk reply */
5720 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5721 withscores
? (rangelen
*2) : rangelen
));
5722 for (j
= 0; j
< rangelen
; j
++) {
5724 addReplyBulk(c
,ele
);
5726 addReplyDouble(c
,ln
->score
);
5727 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5731 static void zrangeCommand(redisClient
*c
) {
5732 zrangeGenericCommand(c
,0);
5735 static void zrevrangeCommand(redisClient
*c
) {
5736 zrangeGenericCommand(c
,1);
5739 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5740 * If justcount is non-zero, just the count is returned. */
5741 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5744 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5745 int offset
= 0, limit
= -1;
5749 /* Parse the min-max interval. If one of the values is prefixed
5750 * by the "(" character, it's considered "open". For instance
5751 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5752 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5753 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5754 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5757 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5759 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5760 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5763 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5766 /* Parse "WITHSCORES": note that if the command was called with
5767 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5768 * enter the following paths to parse WITHSCORES and LIMIT. */
5769 if (c
->argc
== 5 || c
->argc
== 8) {
5770 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5775 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5779 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5784 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5785 addReply(c
,shared
.syntaxerr
);
5787 } else if (c
->argc
== (7 + withscores
)) {
5788 offset
= atoi(c
->argv
[5]->ptr
);
5789 limit
= atoi(c
->argv
[6]->ptr
);
5790 if (offset
< 0) offset
= 0;
5793 /* Ok, lookup the key and get the range */
5794 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5796 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5798 if (o
->type
!= REDIS_ZSET
) {
5799 addReply(c
,shared
.wrongtypeerr
);
5801 zset
*zsetobj
= o
->ptr
;
5802 zskiplist
*zsl
= zsetobj
->zsl
;
5804 robj
*ele
, *lenobj
= NULL
;
5805 unsigned long rangelen
= 0;
5807 /* Get the first node with the score >= min, or with
5808 * score > min if 'minex' is true. */
5809 ln
= zslFirstWithScore(zsl
,min
);
5810 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5813 /* No element matching the speciifed interval */
5814 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5818 /* We don't know in advance how many matching elements there
5819 * are in the list, so we push this object that will represent
5820 * the multi-bulk length in the output buffer, and will "fix"
5823 lenobj
= createObject(REDIS_STRING
,NULL
);
5825 decrRefCount(lenobj
);
5828 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5831 ln
= ln
->forward
[0];
5834 if (limit
== 0) break;
5837 addReplyBulk(c
,ele
);
5839 addReplyDouble(c
,ln
->score
);
5841 ln
= ln
->forward
[0];
5843 if (limit
> 0) limit
--;
5846 addReplyLong(c
,(long)rangelen
);
5848 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5849 withscores
? (rangelen
*2) : rangelen
);
5855 static void zrangebyscoreCommand(redisClient
*c
) {
5856 genericZrangebyscoreCommand(c
,0);
5859 static void zcountCommand(redisClient
*c
) {
5860 genericZrangebyscoreCommand(c
,1);
5863 static void zcardCommand(redisClient
*c
) {
5867 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5868 checkType(c
,o
,REDIS_ZSET
)) return;
5871 addReplyUlong(c
,zs
->zsl
->length
);
5874 static void zscoreCommand(redisClient
*c
) {
5879 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5880 checkType(c
,o
,REDIS_ZSET
)) return;
5883 de
= dictFind(zs
->dict
,c
->argv
[2]);
5885 addReply(c
,shared
.nullbulk
);
5887 double *score
= dictGetEntryVal(de
);
5889 addReplyDouble(c
,*score
);
5893 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5901 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5902 checkType(c
,o
,REDIS_ZSET
)) return;
5906 de
= dictFind(zs
->dict
,c
->argv
[2]);
5908 addReply(c
,shared
.nullbulk
);
5912 score
= dictGetEntryVal(de
);
5913 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5916 addReplyLong(c
, zsl
->length
- rank
);
5918 addReplyLong(c
, rank
-1);
5921 addReply(c
,shared
.nullbulk
);
5925 static void zrankCommand(redisClient
*c
) {
5926 zrankGenericCommand(c
, 0);
5929 static void zrevrankCommand(redisClient
*c
) {
5930 zrankGenericCommand(c
, 1);
5933 /* =================================== Hashes =============================== */
5934 static void hsetCommand(redisClient
*c
) {
5936 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5939 o
= createHashObject();
5940 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5941 incrRefCount(c
->argv
[1]);
5943 if (o
->type
!= REDIS_HASH
) {
5944 addReply(c
,shared
.wrongtypeerr
);
5948 /* We want to convert the zipmap into an hash table right now if the
5949 * entry to be added is too big. Note that we check if the object
5950 * is integer encoded before to try fetching the length in the test below.
5951 * This is because integers are small, but currently stringObjectLen()
5952 * performs a slow conversion: not worth it. */
5953 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5954 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5955 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5956 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5957 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5959 convertToRealHash(o
);
5962 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5963 unsigned char *zm
= o
->ptr
;
5964 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5966 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5967 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5968 decrRefCount(valobj
);
5971 /* And here there is the second check for hash conversion...
5972 * we want to do it only if the operation was not just an update as
5973 * zipmapLen() is O(N). */
5974 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5975 convertToRealHash(o
);
5977 tryObjectEncoding(c
->argv
[2]);
5978 /* note that c->argv[3] is already encoded, as the latest arg
5979 * of a bulk command is always integer encoded if possible. */
5980 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5981 incrRefCount(c
->argv
[2]);
5985 incrRefCount(c
->argv
[3]);
5988 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5991 static void hincrbyCommand(redisClient
*c
) {
5993 long long value
= 0, incr
= 0;
5994 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5997 o
= createHashObject();
5998 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5999 incrRefCount(c
->argv
[1]);
6001 if (o
->type
!= REDIS_HASH
) {
6002 addReply(c
,shared
.wrongtypeerr
);
6007 robj
*o_incr
= getDecodedObject(c
->argv
[3]);
6008 incr
= strtoll(o_incr
->ptr
, NULL
, 10);
6009 decrRefCount(o_incr
);
6011 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6012 unsigned char *zm
= o
->ptr
;
6013 unsigned char *zval
;
6016 /* Find value if already present in hash */
6017 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6019 /* strtoll needs the char* to have a trailing \0, but
6020 * the zipmap doesn't include them. */
6021 sds szval
= sdsnewlen(zval
, zvlen
);
6022 value
= strtoll(szval
,NULL
,10);
6027 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6028 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6029 (unsigned char*)svalue
,sdslen(svalue
),&update
);
6033 /* Check if the zipmap needs to be converted
6034 * if this was not an update. */
6035 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6036 convertToRealHash(o
);
6041 /* Find value if already present in hash */
6042 de
= dictFind(o
->ptr
,c
->argv
[2]);
6044 hval
= dictGetEntryVal(de
);
6045 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6046 value
= strtoll(hval
->ptr
,NULL
,10);
6047 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6048 value
= (long)hval
->ptr
;
6050 redisAssert(1 != 1);
6054 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6055 tryObjectEncoding(hval
);
6056 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6057 incrRefCount(c
->argv
[2]);
6062 addReplyLong(c
, value
);
6065 static void hgetCommand(redisClient
*c
) {
6068 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6069 checkType(c
,o
,REDIS_HASH
)) return;
6071 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6072 unsigned char *zm
= o
->ptr
;
6077 field
= getDecodedObject(c
->argv
[2]);
6078 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6079 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6080 addReplySds(c
,sdsnewlen(val
,vlen
));
6081 addReply(c
,shared
.crlf
);
6082 decrRefCount(field
);
6085 addReply(c
,shared
.nullbulk
);
6086 decrRefCount(field
);
6090 struct dictEntry
*de
;
6092 de
= dictFind(o
->ptr
,c
->argv
[2]);
6094 addReply(c
,shared
.nullbulk
);
6096 robj
*e
= dictGetEntryVal(de
);
6103 static void hdelCommand(redisClient
*c
) {
6107 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6108 checkType(c
,o
,REDIS_HASH
)) return;
6110 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6111 robj
*field
= getDecodedObject(c
->argv
[2]);
6113 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6114 (unsigned char*) field
->ptr
,
6115 sdslen(field
->ptr
), &deleted
);
6116 decrRefCount(field
);
6117 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6118 deleteKey(c
->db
,c
->argv
[1]);
6120 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6121 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6122 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6124 if (deleted
) server
.dirty
++;
6125 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6128 static void hlenCommand(redisClient
*c
) {
6132 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6133 checkType(c
,o
,REDIS_HASH
)) return;
6135 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6136 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6137 addReplyUlong(c
,len
);
6140 #define REDIS_GETALL_KEYS 1
6141 #define REDIS_GETALL_VALS 2
6142 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6144 unsigned long count
= 0;
6146 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6147 || checkType(c
,o
,REDIS_HASH
)) return;
6149 lenobj
= createObject(REDIS_STRING
,NULL
);
6151 decrRefCount(lenobj
);
6153 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6154 unsigned char *p
= zipmapRewind(o
->ptr
);
6155 unsigned char *field
, *val
;
6156 unsigned int flen
, vlen
;
6158 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6161 if (flags
& REDIS_GETALL_KEYS
) {
6162 aux
= createStringObject((char*)field
,flen
);
6163 addReplyBulk(c
,aux
);
6167 if (flags
& REDIS_GETALL_VALS
) {
6168 aux
= createStringObject((char*)val
,vlen
);
6169 addReplyBulk(c
,aux
);
6175 dictIterator
*di
= dictGetIterator(o
->ptr
);
6178 while((de
= dictNext(di
)) != NULL
) {
6179 robj
*fieldobj
= dictGetEntryKey(de
);
6180 robj
*valobj
= dictGetEntryVal(de
);
6182 if (flags
& REDIS_GETALL_KEYS
) {
6183 addReplyBulk(c
,fieldobj
);
6186 if (flags
& REDIS_GETALL_VALS
) {
6187 addReplyBulk(c
,valobj
);
6191 dictReleaseIterator(di
);
6193 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6196 static void hkeysCommand(redisClient
*c
) {
6197 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6200 static void hvalsCommand(redisClient
*c
) {
6201 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6204 static void hgetallCommand(redisClient
*c
) {
6205 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6208 static void hexistsCommand(redisClient
*c
) {
6212 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6213 checkType(c
,o
,REDIS_HASH
)) return;
6215 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6217 unsigned char *zm
= o
->ptr
;
6219 field
= getDecodedObject(c
->argv
[2]);
6220 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6221 decrRefCount(field
);
6223 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6225 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6228 static void convertToRealHash(robj
*o
) {
6229 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6230 unsigned int klen
, vlen
;
6231 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6233 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6234 p
= zipmapRewind(zm
);
6235 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6236 robj
*keyobj
, *valobj
;
6238 keyobj
= createStringObject((char*)key
,klen
);
6239 valobj
= createStringObject((char*)val
,vlen
);
6240 tryObjectEncoding(keyobj
);
6241 tryObjectEncoding(valobj
);
6242 dictAdd(dict
,keyobj
,valobj
);
6244 o
->encoding
= REDIS_ENCODING_HT
;
6249 /* ========================= Non type-specific commands ==================== */
6251 static void flushdbCommand(redisClient
*c
) {
6252 server
.dirty
+= dictSize(c
->db
->dict
);
6253 dictEmpty(c
->db
->dict
);
6254 dictEmpty(c
->db
->expires
);
6255 addReply(c
,shared
.ok
);
6258 static void flushallCommand(redisClient
*c
) {
6259 server
.dirty
+= emptyDb();
6260 addReply(c
,shared
.ok
);
6261 if (server
.bgsavechildpid
!= -1) {
6262 kill(server
.bgsavechildpid
,SIGKILL
);
6263 rdbRemoveTempFile(server
.bgsavechildpid
);
6265 rdbSave(server
.dbfilename
);
6269 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6270 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6272 so
->pattern
= pattern
;
6276 /* Return the value associated to the key with a name obtained
6277 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6278 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6282 int prefixlen
, sublen
, postfixlen
;
6283 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6287 char buf
[REDIS_SORTKEY_MAX
+1];
6290 /* If the pattern is "#" return the substitution object itself in order
6291 * to implement the "SORT ... GET #" feature. */
6292 spat
= pattern
->ptr
;
6293 if (spat
[0] == '#' && spat
[1] == '\0') {
6297 /* The substitution object may be specially encoded. If so we create
6298 * a decoded object on the fly. Otherwise getDecodedObject will just
6299 * increment the ref count, that we'll decrement later. */
6300 subst
= getDecodedObject(subst
);
6303 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6304 p
= strchr(spat
,'*');
6306 decrRefCount(subst
);
6311 sublen
= sdslen(ssub
);
6312 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6313 memcpy(keyname
.buf
,spat
,prefixlen
);
6314 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6315 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6316 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6317 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6319 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6320 decrRefCount(subst
);
6322 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6323 return lookupKeyRead(db
,&keyobj
);
6326 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6327 * the additional parameter is not standard but a BSD-specific we have to
6328 * pass sorting parameters via the global 'server' structure */
6329 static int sortCompare(const void *s1
, const void *s2
) {
6330 const redisSortObject
*so1
= s1
, *so2
= s2
;
6333 if (!server
.sort_alpha
) {
6334 /* Numeric sorting. Here it's trivial as we precomputed scores */
6335 if (so1
->u
.score
> so2
->u
.score
) {
6337 } else if (so1
->u
.score
< so2
->u
.score
) {
6343 /* Alphanumeric sorting */
6344 if (server
.sort_bypattern
) {
6345 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6346 /* At least one compare object is NULL */
6347 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6349 else if (so1
->u
.cmpobj
== NULL
)
6354 /* We have both the objects, use strcoll */
6355 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6358 /* Compare elements directly */
6361 dec1
= getDecodedObject(so1
->obj
);
6362 dec2
= getDecodedObject(so2
->obj
);
6363 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6368 return server
.sort_desc
? -cmp
: cmp
;
6371 /* The SORT command is the most complex command in Redis. Warning: this code
6372 * is optimized for speed and a bit less for readability */
6373 static void sortCommand(redisClient
*c
) {
6376 int desc
= 0, alpha
= 0;
6377 int limit_start
= 0, limit_count
= -1, start
, end
;
6378 int j
, dontsort
= 0, vectorlen
;
6379 int getop
= 0; /* GET operation counter */
6380 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6381 redisSortObject
*vector
; /* Resulting vector to sort */
6383 /* Lookup the key to sort. It must be of the right types */
6384 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6385 if (sortval
== NULL
) {
6386 addReply(c
,shared
.nullmultibulk
);
6389 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6390 sortval
->type
!= REDIS_ZSET
)
6392 addReply(c
,shared
.wrongtypeerr
);
6396 /* Create a list of operations to perform for every sorted element.
6397 * Operations can be GET/DEL/INCR/DECR */
6398 operations
= listCreate();
6399 listSetFreeMethod(operations
,zfree
);
6402 /* Now we need to protect sortval incrementing its count, in the future
6403 * SORT may have options able to overwrite/delete keys during the sorting
6404 * and the sorted key itself may get destroied */
6405 incrRefCount(sortval
);
6407 /* The SORT command has an SQL-alike syntax, parse it */
6408 while(j
< c
->argc
) {
6409 int leftargs
= c
->argc
-j
-1;
6410 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6412 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6414 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6416 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6417 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6418 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6420 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6421 storekey
= c
->argv
[j
+1];
6423 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6424 sortby
= c
->argv
[j
+1];
6425 /* If the BY pattern does not contain '*', i.e. it is constant,
6426 * we don't need to sort nor to lookup the weight keys. */
6427 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6429 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6430 listAddNodeTail(operations
,createSortOperation(
6431 REDIS_SORT_GET
,c
->argv
[j
+1]));
6435 decrRefCount(sortval
);
6436 listRelease(operations
);
6437 addReply(c
,shared
.syntaxerr
);
6443 /* Load the sorting vector with all the objects to sort */
6444 switch(sortval
->type
) {
6445 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6446 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6447 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6448 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6450 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6453 if (sortval
->type
== REDIS_LIST
) {
6454 list
*list
= sortval
->ptr
;
6458 listRewind(list
,&li
);
6459 while((ln
= listNext(&li
))) {
6460 robj
*ele
= ln
->value
;
6461 vector
[j
].obj
= ele
;
6462 vector
[j
].u
.score
= 0;
6463 vector
[j
].u
.cmpobj
= NULL
;
6471 if (sortval
->type
== REDIS_SET
) {
6474 zset
*zs
= sortval
->ptr
;
6478 di
= dictGetIterator(set
);
6479 while((setele
= dictNext(di
)) != NULL
) {
6480 vector
[j
].obj
= dictGetEntryKey(setele
);
6481 vector
[j
].u
.score
= 0;
6482 vector
[j
].u
.cmpobj
= NULL
;
6485 dictReleaseIterator(di
);
6487 redisAssert(j
== vectorlen
);
6489 /* Now it's time to load the right scores in the sorting vector */
6490 if (dontsort
== 0) {
6491 for (j
= 0; j
< vectorlen
; j
++) {
6495 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6496 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6498 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6500 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6501 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6503 /* Don't need to decode the object if it's
6504 * integer-encoded (the only encoding supported) so
6505 * far. We can just cast it */
6506 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6507 vector
[j
].u
.score
= (long)byval
->ptr
;
6509 redisAssert(1 != 1);
6514 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6515 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6517 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6518 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6520 redisAssert(1 != 1);
6527 /* We are ready to sort the vector... perform a bit of sanity check
6528 * on the LIMIT option too. We'll use a partial version of quicksort. */
6529 start
= (limit_start
< 0) ? 0 : limit_start
;
6530 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6531 if (start
>= vectorlen
) {
6532 start
= vectorlen
-1;
6535 if (end
>= vectorlen
) end
= vectorlen
-1;
6537 if (dontsort
== 0) {
6538 server
.sort_desc
= desc
;
6539 server
.sort_alpha
= alpha
;
6540 server
.sort_bypattern
= sortby
? 1 : 0;
6541 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6542 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6544 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6547 /* Send command output to the output buffer, performing the specified
6548 * GET/DEL/INCR/DECR operations if any. */
6549 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6550 if (storekey
== NULL
) {
6551 /* STORE option not specified, sent the sorting result to client */
6552 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6553 for (j
= start
; j
<= end
; j
++) {
6557 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6558 listRewind(operations
,&li
);
6559 while((ln
= listNext(&li
))) {
6560 redisSortOperation
*sop
= ln
->value
;
6561 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6564 if (sop
->type
== REDIS_SORT_GET
) {
6565 if (!val
|| val
->type
!= REDIS_STRING
) {
6566 addReply(c
,shared
.nullbulk
);
6568 addReplyBulk(c
,val
);
6571 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6576 robj
*listObject
= createListObject();
6577 list
*listPtr
= (list
*) listObject
->ptr
;
6579 /* STORE option specified, set the sorting result as a List object */
6580 for (j
= start
; j
<= end
; j
++) {
6585 listAddNodeTail(listPtr
,vector
[j
].obj
);
6586 incrRefCount(vector
[j
].obj
);
6588 listRewind(operations
,&li
);
6589 while((ln
= listNext(&li
))) {
6590 redisSortOperation
*sop
= ln
->value
;
6591 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6594 if (sop
->type
== REDIS_SORT_GET
) {
6595 if (!val
|| val
->type
!= REDIS_STRING
) {
6596 listAddNodeTail(listPtr
,createStringObject("",0));
6598 listAddNodeTail(listPtr
,val
);
6602 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6606 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6607 incrRefCount(storekey
);
6609 /* Note: we add 1 because the DB is dirty anyway since even if the
6610 * SORT result is empty a new key is set and maybe the old content
6612 server
.dirty
+= 1+outputlen
;
6613 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6617 decrRefCount(sortval
);
6618 listRelease(operations
);
6619 for (j
= 0; j
< vectorlen
; j
++) {
6620 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6621 decrRefCount(vector
[j
].u
.cmpobj
);
6626 /* Convert an amount of bytes into a human readable string in the form
6627 * of 100B, 2G, 100M, 4K, and so forth. */
6628 static void bytesToHuman(char *s
, unsigned long long n
) {
6633 sprintf(s
,"%lluB",n
);
6635 } else if (n
< (1024*1024)) {
6636 d
= (double)n
/(1024);
6637 sprintf(s
,"%.2fK",d
);
6638 } else if (n
< (1024LL*1024*1024)) {
6639 d
= (double)n
/(1024*1024);
6640 sprintf(s
,"%.2fM",d
);
6641 } else if (n
< (1024LL*1024*1024*1024)) {
6642 d
= (double)n
/(1024LL*1024*1024);
6643 sprintf(s
,"%.2fG",d
);
6647 /* Create the string returned by the INFO command. This is decoupled
6648 * by the INFO command itself as we need to report the same information
6649 * on memory corruption problems. */
6650 static sds
genRedisInfoString(void) {
6652 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6656 bytesToHuman(hmem
,zmalloc_used_memory());
6657 info
= sdscatprintf(sdsempty(),
6658 "redis_version:%s\r\n"
6660 "multiplexing_api:%s\r\n"
6661 "process_id:%ld\r\n"
6662 "uptime_in_seconds:%ld\r\n"
6663 "uptime_in_days:%ld\r\n"
6664 "connected_clients:%d\r\n"
6665 "connected_slaves:%d\r\n"
6666 "blocked_clients:%d\r\n"
6667 "used_memory:%zu\r\n"
6668 "used_memory_human:%s\r\n"
6669 "changes_since_last_save:%lld\r\n"
6670 "bgsave_in_progress:%d\r\n"
6671 "last_save_time:%ld\r\n"
6672 "bgrewriteaof_in_progress:%d\r\n"
6673 "total_connections_received:%lld\r\n"
6674 "total_commands_processed:%lld\r\n"
6675 "expired_keys:%lld\r\n"
6676 "hash_max_zipmap_entries:%ld\r\n"
6677 "hash_max_zipmap_value:%ld\r\n"
6678 "pubsub_classes:%ld\r\n"
6682 (sizeof(long) == 8) ? "64" : "32",
6687 listLength(server
.clients
)-listLength(server
.slaves
),
6688 listLength(server
.slaves
),
6689 server
.blpop_blocked_clients
,
6690 zmalloc_used_memory(),
6693 server
.bgsavechildpid
!= -1,
6695 server
.bgrewritechildpid
!= -1,
6696 server
.stat_numconnections
,
6697 server
.stat_numcommands
,
6698 server
.stat_expiredkeys
,
6699 server
.hash_max_zipmap_entries
,
6700 server
.hash_max_zipmap_value
,
6701 dictSize(server
.pubsub_classes
),
6702 server
.vm_enabled
!= 0,
6703 server
.masterhost
== NULL
? "master" : "slave"
6705 if (server
.masterhost
) {
6706 info
= sdscatprintf(info
,
6707 "master_host:%s\r\n"
6708 "master_port:%d\r\n"
6709 "master_link_status:%s\r\n"
6710 "master_last_io_seconds_ago:%d\r\n"
6713 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6715 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6718 if (server
.vm_enabled
) {
6720 info
= sdscatprintf(info
,
6721 "vm_conf_max_memory:%llu\r\n"
6722 "vm_conf_page_size:%llu\r\n"
6723 "vm_conf_pages:%llu\r\n"
6724 "vm_stats_used_pages:%llu\r\n"
6725 "vm_stats_swapped_objects:%llu\r\n"
6726 "vm_stats_swappin_count:%llu\r\n"
6727 "vm_stats_swappout_count:%llu\r\n"
6728 "vm_stats_io_newjobs_len:%lu\r\n"
6729 "vm_stats_io_processing_len:%lu\r\n"
6730 "vm_stats_io_processed_len:%lu\r\n"
6731 "vm_stats_io_active_threads:%lu\r\n"
6732 "vm_stats_blocked_clients:%lu\r\n"
6733 ,(unsigned long long) server
.vm_max_memory
,
6734 (unsigned long long) server
.vm_page_size
,
6735 (unsigned long long) server
.vm_pages
,
6736 (unsigned long long) server
.vm_stats_used_pages
,
6737 (unsigned long long) server
.vm_stats_swapped_objects
,
6738 (unsigned long long) server
.vm_stats_swapins
,
6739 (unsigned long long) server
.vm_stats_swapouts
,
6740 (unsigned long) listLength(server
.io_newjobs
),
6741 (unsigned long) listLength(server
.io_processing
),
6742 (unsigned long) listLength(server
.io_processed
),
6743 (unsigned long) server
.io_active_threads
,
6744 (unsigned long) server
.vm_blocked_clients
6748 for (j
= 0; j
< server
.dbnum
; j
++) {
6749 long long keys
, vkeys
;
6751 keys
= dictSize(server
.db
[j
].dict
);
6752 vkeys
= dictSize(server
.db
[j
].expires
);
6753 if (keys
|| vkeys
) {
6754 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6761 static void infoCommand(redisClient
*c
) {
6762 sds info
= genRedisInfoString();
6763 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6764 (unsigned long)sdslen(info
)));
6765 addReplySds(c
,info
);
6766 addReply(c
,shared
.crlf
);
6769 static void monitorCommand(redisClient
*c
) {
6770 /* ignore MONITOR if aleady slave or in monitor mode */
6771 if (c
->flags
& REDIS_SLAVE
) return;
6773 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6775 listAddNodeTail(server
.monitors
,c
);
6776 addReply(c
,shared
.ok
);
6779 /* ================================= Expire ================================= */
6780 static int removeExpire(redisDb
*db
, robj
*key
) {
6781 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6788 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6789 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6797 /* Return the expire time of the specified key, or -1 if no expire
6798 * is associated with this key (i.e. the key is non volatile) */
6799 static time_t getExpire(redisDb
*db
, robj
*key
) {
6802 /* No expire? return ASAP */
6803 if (dictSize(db
->expires
) == 0 ||
6804 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6806 return (time_t) dictGetEntryVal(de
);
6809 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6813 /* No expire? return ASAP */
6814 if (dictSize(db
->expires
) == 0 ||
6815 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6817 /* Lookup the expire */
6818 when
= (time_t) dictGetEntryVal(de
);
6819 if (time(NULL
) <= when
) return 0;
6821 /* Delete the key */
6822 dictDelete(db
->expires
,key
);
6823 server
.stat_expiredkeys
++;
6824 return dictDelete(db
->dict
,key
) == DICT_OK
;
6827 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6830 /* No expire? return ASAP */
6831 if (dictSize(db
->expires
) == 0 ||
6832 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6834 /* Delete the key */
6836 server
.stat_expiredkeys
++;
6837 dictDelete(db
->expires
,key
);
6838 return dictDelete(db
->dict
,key
) == DICT_OK
;
6841 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6844 de
= dictFind(c
->db
->dict
,key
);
6846 addReply(c
,shared
.czero
);
6850 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6851 addReply(c
, shared
.cone
);
6854 time_t when
= time(NULL
)+seconds
;
6855 if (setExpire(c
->db
,key
,when
)) {
6856 addReply(c
,shared
.cone
);
6859 addReply(c
,shared
.czero
);
6865 static void expireCommand(redisClient
*c
) {
6866 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6869 static void expireatCommand(redisClient
*c
) {
6870 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6873 static void ttlCommand(redisClient
*c
) {
6877 expire
= getExpire(c
->db
,c
->argv
[1]);
6879 ttl
= (int) (expire
-time(NULL
));
6880 if (ttl
< 0) ttl
= -1;
6882 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6885 /* ================================ MULTI/EXEC ============================== */
6887 /* Client state initialization for MULTI/EXEC */
6888 static void initClientMultiState(redisClient
*c
) {
6889 c
->mstate
.commands
= NULL
;
6890 c
->mstate
.count
= 0;
6893 /* Release all the resources associated with MULTI/EXEC state */
6894 static void freeClientMultiState(redisClient
*c
) {
6897 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6899 multiCmd
*mc
= c
->mstate
.commands
+j
;
6901 for (i
= 0; i
< mc
->argc
; i
++)
6902 decrRefCount(mc
->argv
[i
]);
6905 zfree(c
->mstate
.commands
);
6908 /* Add a new command into the MULTI commands queue */
6909 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6913 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6914 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6915 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6918 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6919 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6920 for (j
= 0; j
< c
->argc
; j
++)
6921 incrRefCount(mc
->argv
[j
]);
6925 static void multiCommand(redisClient
*c
) {
6926 c
->flags
|= REDIS_MULTI
;
6927 addReply(c
,shared
.ok
);
6930 static void discardCommand(redisClient
*c
) {
6931 if (!(c
->flags
& REDIS_MULTI
)) {
6932 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6936 freeClientMultiState(c
);
6937 initClientMultiState(c
);
6938 c
->flags
&= (~REDIS_MULTI
);
6939 addReply(c
,shared
.ok
);
6942 static void execCommand(redisClient
*c
) {
6947 if (!(c
->flags
& REDIS_MULTI
)) {
6948 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6952 orig_argv
= c
->argv
;
6953 orig_argc
= c
->argc
;
6954 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6955 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6956 c
->argc
= c
->mstate
.commands
[j
].argc
;
6957 c
->argv
= c
->mstate
.commands
[j
].argv
;
6958 call(c
,c
->mstate
.commands
[j
].cmd
);
6960 c
->argv
= orig_argv
;
6961 c
->argc
= orig_argc
;
6962 freeClientMultiState(c
);
6963 initClientMultiState(c
);
6964 c
->flags
&= (~REDIS_MULTI
);
6967 /* =========================== Blocking Operations ========================= */
6969 /* Currently Redis blocking operations support is limited to list POP ops,
6970 * so the current implementation is not fully generic, but it is also not
6971 * completely specific so it will not require a rewrite to support new
6972 * kind of blocking operations in the future.
6974 * Still it's important to note that list blocking operations can be already
6975 * used as a notification mechanism in order to implement other blocking
6976 * operations at application level, so there must be a very strong evidence
6977 * of usefulness and generality before new blocking operations are implemented.
6979 * This is how the current blocking POP works, we use BLPOP as example:
6980 * - If the user calls BLPOP and the key exists and contains a non empty list
6981 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6982 * if there is not to block.
6983 * - If instead BLPOP is called and the key does not exists or the list is
6984 * empty we need to block. In order to do so we remove the notification for
6985 * new data to read in the client socket (so that we'll not serve new
6986 * requests if the blocking request is not served). Also we put the client
6987 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6988 * blocking for this keys.
6989 * - If a PUSH operation against a key with blocked clients waiting is
6990 * performed, we serve the first in the list: basically instead to push
6991 * the new element inside the list we return it to the (first / oldest)
6992 * blocking client, unblock the client, and remove it form the list.
6994 * The above comment and the source code should be enough in order to understand
6995 * the implementation and modify / fix it later.
6998 /* Set a client in blocking mode for the specified key, with the specified
7000 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7005 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7006 c
->blockingkeysnum
= numkeys
;
7007 c
->blockingto
= timeout
;
7008 for (j
= 0; j
< numkeys
; j
++) {
7009 /* Add the key in the client structure, to map clients -> keys */
7010 c
->blockingkeys
[j
] = keys
[j
];
7011 incrRefCount(keys
[j
]);
7013 /* And in the other "side", to map keys -> clients */
7014 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7018 /* For every key we take a list of clients blocked for it */
7020 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7021 incrRefCount(keys
[j
]);
7022 assert(retval
== DICT_OK
);
7024 l
= dictGetEntryVal(de
);
7026 listAddNodeTail(l
,c
);
7028 /* Mark the client as a blocked client */
7029 c
->flags
|= REDIS_BLOCKED
;
7030 server
.blpop_blocked_clients
++;
7033 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7034 static void unblockClientWaitingData(redisClient
*c
) {
7039 assert(c
->blockingkeys
!= NULL
);
7040 /* The client may wait for multiple keys, so unblock it for every key. */
7041 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7042 /* Remove this client from the list of clients waiting for this key. */
7043 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7045 l
= dictGetEntryVal(de
);
7046 listDelNode(l
,listSearchKey(l
,c
));
7047 /* If the list is empty we need to remove it to avoid wasting memory */
7048 if (listLength(l
) == 0)
7049 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7050 decrRefCount(c
->blockingkeys
[j
]);
7052 /* Cleanup the client structure */
7053 zfree(c
->blockingkeys
);
7054 c
->blockingkeys
= NULL
;
7055 c
->flags
&= (~REDIS_BLOCKED
);
7056 server
.blpop_blocked_clients
--;
7057 /* We want to process data if there is some command waiting
7058 * in the input buffer. Note that this is safe even if
7059 * unblockClientWaitingData() gets called from freeClient() because
7060 * freeClient() will be smart enough to call this function
7061 * *after* c->querybuf was set to NULL. */
7062 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7065 /* This should be called from any function PUSHing into lists.
7066 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7067 * 'ele' is the element pushed.
7069 * If the function returns 0 there was no client waiting for a list push
7072 * If the function returns 1 there was a client waiting for a list push
7073 * against this key, the element was passed to this client thus it's not
7074 * needed to actually add it to the list and the caller should return asap. */
7075 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7076 struct dictEntry
*de
;
7077 redisClient
*receiver
;
7081 de
= dictFind(c
->db
->blockingkeys
,key
);
7082 if (de
== NULL
) return 0;
7083 l
= dictGetEntryVal(de
);
7086 receiver
= ln
->value
;
7088 addReplySds(receiver
,sdsnew("*2\r\n"));
7089 addReplyBulk(receiver
,key
);
7090 addReplyBulk(receiver
,ele
);
7091 unblockClientWaitingData(receiver
);
7095 /* Blocking RPOP/LPOP */
7096 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7101 for (j
= 1; j
< c
->argc
-1; j
++) {
7102 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7104 if (o
->type
!= REDIS_LIST
) {
7105 addReply(c
,shared
.wrongtypeerr
);
7108 list
*list
= o
->ptr
;
7109 if (listLength(list
) != 0) {
7110 /* If the list contains elements fall back to the usual
7111 * non-blocking POP operation */
7112 robj
*argv
[2], **orig_argv
;
7115 /* We need to alter the command arguments before to call
7116 * popGenericCommand() as the command takes a single key. */
7117 orig_argv
= c
->argv
;
7118 orig_argc
= c
->argc
;
7119 argv
[1] = c
->argv
[j
];
7123 /* Also the return value is different, we need to output
7124 * the multi bulk reply header and the key name. The
7125 * "real" command will add the last element (the value)
7126 * for us. If this souds like an hack to you it's just
7127 * because it is... */
7128 addReplySds(c
,sdsnew("*2\r\n"));
7129 addReplyBulk(c
,argv
[1]);
7130 popGenericCommand(c
,where
);
7132 /* Fix the client structure with the original stuff */
7133 c
->argv
= orig_argv
;
7134 c
->argc
= orig_argc
;
7140 /* If the list is empty or the key does not exists we must block */
7141 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7142 if (timeout
> 0) timeout
+= time(NULL
);
7143 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7146 static void blpopCommand(redisClient
*c
) {
7147 blockingPopGenericCommand(c
,REDIS_HEAD
);
7150 static void brpopCommand(redisClient
*c
) {
7151 blockingPopGenericCommand(c
,REDIS_TAIL
);
7154 /* =============================== Replication ============================= */
7156 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7157 ssize_t nwritten
, ret
= size
;
7158 time_t start
= time(NULL
);
7162 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7163 nwritten
= write(fd
,ptr
,size
);
7164 if (nwritten
== -1) return -1;
7168 if ((time(NULL
)-start
) > timeout
) {
7176 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7177 ssize_t nread
, totread
= 0;
7178 time_t start
= time(NULL
);
7182 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7183 nread
= read(fd
,ptr
,size
);
7184 if (nread
== -1) return -1;
7189 if ((time(NULL
)-start
) > timeout
) {
7197 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7204 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7207 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7218 static void syncCommand(redisClient
*c
) {
7219 /* ignore SYNC if aleady slave or in monitor mode */
7220 if (c
->flags
& REDIS_SLAVE
) return;
7222 /* SYNC can't be issued when the server has pending data to send to
7223 * the client about already issued commands. We need a fresh reply
7224 * buffer registering the differences between the BGSAVE and the current
7225 * dataset, so that we can copy to other slaves if needed. */
7226 if (listLength(c
->reply
) != 0) {
7227 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7231 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7232 /* Here we need to check if there is a background saving operation
7233 * in progress, or if it is required to start one */
7234 if (server
.bgsavechildpid
!= -1) {
7235 /* Ok a background save is in progress. Let's check if it is a good
7236 * one for replication, i.e. if there is another slave that is
7237 * registering differences since the server forked to save */
7242 listRewind(server
.slaves
,&li
);
7243 while((ln
= listNext(&li
))) {
7245 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7248 /* Perfect, the server is already registering differences for
7249 * another slave. Set the right state, and copy the buffer. */
7250 listRelease(c
->reply
);
7251 c
->reply
= listDup(slave
->reply
);
7252 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7253 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7255 /* No way, we need to wait for the next BGSAVE in order to
7256 * register differences */
7257 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7258 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7261 /* Ok we don't have a BGSAVE in progress, let's start one */
7262 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7263 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7264 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7265 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7268 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7271 c
->flags
|= REDIS_SLAVE
;
7273 listAddNodeTail(server
.slaves
,c
);
7277 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7278 redisClient
*slave
= privdata
;
7280 REDIS_NOTUSED(mask
);
7281 char buf
[REDIS_IOBUF_LEN
];
7282 ssize_t nwritten
, buflen
;
7284 if (slave
->repldboff
== 0) {
7285 /* Write the bulk write count before to transfer the DB. In theory here
7286 * we don't know how much room there is in the output buffer of the
7287 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7288 * operations) will never be smaller than the few bytes we need. */
7291 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7293 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7301 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7302 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7304 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7305 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7309 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7310 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7315 slave
->repldboff
+= nwritten
;
7316 if (slave
->repldboff
== slave
->repldbsize
) {
7317 close(slave
->repldbfd
);
7318 slave
->repldbfd
= -1;
7319 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7320 slave
->replstate
= REDIS_REPL_ONLINE
;
7321 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7322 sendReplyToClient
, slave
) == AE_ERR
) {
7326 addReplySds(slave
,sdsempty());
7327 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7331 /* This function is called at the end of every backgrond saving.
7332 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7333 * otherwise REDIS_ERR is passed to the function.
7335 * The goal of this function is to handle slaves waiting for a successful
7336 * background saving in order to perform non-blocking synchronization. */
7337 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7339 int startbgsave
= 0;
7342 listRewind(server
.slaves
,&li
);
7343 while((ln
= listNext(&li
))) {
7344 redisClient
*slave
= ln
->value
;
7346 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7348 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7349 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7350 struct redis_stat buf
;
7352 if (bgsaveerr
!= REDIS_OK
) {
7354 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7357 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7358 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7360 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7363 slave
->repldboff
= 0;
7364 slave
->repldbsize
= buf
.st_size
;
7365 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7366 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7367 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7374 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7377 listRewind(server
.slaves
,&li
);
7378 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7379 while((ln
= listNext(&li
))) {
7380 redisClient
*slave
= ln
->value
;
7382 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7389 static int syncWithMaster(void) {
7390 char buf
[1024], tmpfile
[256], authcmd
[1024];
7392 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7393 int dfd
, maxtries
= 5;
7396 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7401 /* AUTH with the master if required. */
7402 if(server
.masterauth
) {
7403 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7404 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7406 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7410 /* Read the AUTH result. */
7411 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7413 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7417 if (buf
[0] != '+') {
7419 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7424 /* Issue the SYNC command */
7425 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7427 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7431 /* Read the bulk write count */
7432 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7434 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7438 if (buf
[0] != '$') {
7440 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7443 dumpsize
= strtol(buf
+1,NULL
,10);
7444 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7445 /* Read the bulk write data on a temp file */
7447 snprintf(tmpfile
,256,
7448 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7449 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7450 if (dfd
!= -1) break;
7455 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7459 int nread
, nwritten
;
7461 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7463 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7469 nwritten
= write(dfd
,buf
,nread
);
7470 if (nwritten
== -1) {
7471 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7479 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7480 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7486 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7487 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7491 server
.master
= createClient(fd
);
7492 server
.master
->flags
|= REDIS_MASTER
;
7493 server
.master
->authenticated
= 1;
7494 server
.replstate
= REDIS_REPL_CONNECTED
;
7498 static void slaveofCommand(redisClient
*c
) {
7499 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7500 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7501 if (server
.masterhost
) {
7502 sdsfree(server
.masterhost
);
7503 server
.masterhost
= NULL
;
7504 if (server
.master
) freeClient(server
.master
);
7505 server
.replstate
= REDIS_REPL_NONE
;
7506 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7509 sdsfree(server
.masterhost
);
7510 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7511 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7512 if (server
.master
) freeClient(server
.master
);
7513 server
.replstate
= REDIS_REPL_CONNECT
;
7514 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7515 server
.masterhost
, server
.masterport
);
7517 addReply(c
,shared
.ok
);
7520 /* ============================ Maxmemory directive ======================== */
7522 /* Try to free one object form the pre-allocated objects free list.
7523 * This is useful under low mem conditions as by default we take 1 million
7524 * free objects allocated. On success REDIS_OK is returned, otherwise
7526 static int tryFreeOneObjectFromFreelist(void) {
7529 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7530 if (listLength(server
.objfreelist
)) {
7531 listNode
*head
= listFirst(server
.objfreelist
);
7532 o
= listNodeValue(head
);
7533 listDelNode(server
.objfreelist
,head
);
7534 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7538 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7543 /* This function gets called when 'maxmemory' is set on the config file to limit
7544 * the max memory used by the server, and we are out of memory.
7545 * This function will try to, in order:
7547 * - Free objects from the free list
7548 * - Try to remove keys with an EXPIRE set
7550 * It is not possible to free enough memory to reach used-memory < maxmemory
7551 * the server will start refusing commands that will enlarge even more the
7554 static void freeMemoryIfNeeded(void) {
7555 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7556 int j
, k
, freed
= 0;
7558 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7559 for (j
= 0; j
< server
.dbnum
; j
++) {
7561 robj
*minkey
= NULL
;
7562 struct dictEntry
*de
;
7564 if (dictSize(server
.db
[j
].expires
)) {
7566 /* From a sample of three keys drop the one nearest to
7567 * the natural expire */
7568 for (k
= 0; k
< 3; k
++) {
7571 de
= dictGetRandomKey(server
.db
[j
].expires
);
7572 t
= (time_t) dictGetEntryVal(de
);
7573 if (minttl
== -1 || t
< minttl
) {
7574 minkey
= dictGetEntryKey(de
);
7578 deleteKey(server
.db
+j
,minkey
);
7581 if (!freed
) return; /* nothing to free... */
7585 /* ============================== Append Only file ========================== */
7587 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7588 sds buf
= sdsempty();
7594 /* The DB this command was targetting is not the same as the last command
7595 * we appendend. To issue a SELECT command is needed. */
7596 if (dictid
!= server
.appendseldb
) {
7599 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7600 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7601 (unsigned long)strlen(seldb
),seldb
);
7602 server
.appendseldb
= dictid
;
7605 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7606 * EXPIREs into EXPIREATs calls */
7607 if (cmd
->proc
== expireCommand
) {
7610 tmpargv
[0] = createStringObject("EXPIREAT",8);
7611 tmpargv
[1] = argv
[1];
7612 incrRefCount(argv
[1]);
7613 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7614 tmpargv
[2] = createObject(REDIS_STRING
,
7615 sdscatprintf(sdsempty(),"%ld",when
));
7619 /* Append the actual command */
7620 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7621 for (j
= 0; j
< argc
; j
++) {
7624 o
= getDecodedObject(o
);
7625 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7626 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7627 buf
= sdscatlen(buf
,"\r\n",2);
7631 /* Free the objects from the modified argv for EXPIREAT */
7632 if (cmd
->proc
== expireCommand
) {
7633 for (j
= 0; j
< 3; j
++)
7634 decrRefCount(argv
[j
]);
7637 /* We want to perform a single write. This should be guaranteed atomic
7638 * at least if the filesystem we are writing is a real physical one.
7639 * While this will save us against the server being killed I don't think
7640 * there is much to do about the whole server stopping for power problems
7642 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7643 if (nwritten
!= (signed)sdslen(buf
)) {
7644 /* Ooops, we are in troubles. The best thing to do for now is
7645 * to simply exit instead to give the illusion that everything is
7646 * working as expected. */
7647 if (nwritten
== -1) {
7648 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7650 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7654 /* If a background append only file rewriting is in progress we want to
7655 * accumulate the differences between the child DB and the current one
7656 * in a buffer, so that when the child process will do its work we
7657 * can append the differences to the new append only file. */
7658 if (server
.bgrewritechildpid
!= -1)
7659 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7663 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7664 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7665 now
-server
.lastfsync
> 1))
7667 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7668 server
.lastfsync
= now
;
7672 /* In Redis commands are always executed in the context of a client, so in
7673 * order to load the append only file we need to create a fake client. */
7674 static struct redisClient
*createFakeClient(void) {
7675 struct redisClient
*c
= zmalloc(sizeof(*c
));
7679 c
->querybuf
= sdsempty();
7683 /* We set the fake client as a slave waiting for the synchronization
7684 * so that Redis will not try to send replies to this client. */
7685 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7686 c
->reply
= listCreate();
7687 listSetFreeMethod(c
->reply
,decrRefCount
);
7688 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7692 static void freeFakeClient(struct redisClient
*c
) {
7693 sdsfree(c
->querybuf
);
7694 listRelease(c
->reply
);
7698 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7699 * error (the append only file is zero-length) REDIS_ERR is returned. On
7700 * fatal error an error message is logged and the program exists. */
7701 int loadAppendOnlyFile(char *filename
) {
7702 struct redisClient
*fakeClient
;
7703 FILE *fp
= fopen(filename
,"r");
7704 struct redis_stat sb
;
7705 unsigned long long loadedkeys
= 0;
7707 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7711 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7715 fakeClient
= createFakeClient();
7722 struct redisCommand
*cmd
;
7724 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7730 if (buf
[0] != '*') goto fmterr
;
7732 argv
= zmalloc(sizeof(robj
*)*argc
);
7733 for (j
= 0; j
< argc
; j
++) {
7734 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7735 if (buf
[0] != '$') goto fmterr
;
7736 len
= strtol(buf
+1,NULL
,10);
7737 argsds
= sdsnewlen(NULL
,len
);
7738 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7739 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7740 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7743 /* Command lookup */
7744 cmd
= lookupCommand(argv
[0]->ptr
);
7746 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7749 /* Try object sharing and encoding */
7750 if (server
.shareobjects
) {
7752 for(j
= 1; j
< argc
; j
++)
7753 argv
[j
] = tryObjectSharing(argv
[j
]);
7755 if (cmd
->flags
& REDIS_CMD_BULK
)
7756 tryObjectEncoding(argv
[argc
-1]);
7757 /* Run the command in the context of a fake client */
7758 fakeClient
->argc
= argc
;
7759 fakeClient
->argv
= argv
;
7760 cmd
->proc(fakeClient
);
7761 /* Discard the reply objects list from the fake client */
7762 while(listLength(fakeClient
->reply
))
7763 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7764 /* Clean up, ready for the next command */
7765 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7767 /* Handle swapping while loading big datasets when VM is on */
7769 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7770 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7771 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7776 freeFakeClient(fakeClient
);
7781 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7783 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7787 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7791 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7792 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7796 /* Avoid the incr/decr ref count business if possible to help
7797 * copy-on-write (we are often in a child process when this function
7799 * Also makes sure that key objects don't get incrRefCount-ed when VM
7801 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7802 obj
= getDecodedObject(obj
);
7805 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7806 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7807 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7809 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7810 if (decrrc
) decrRefCount(obj
);
7813 if (decrrc
) decrRefCount(obj
);
7817 /* Write binary-safe string into a file in the bulkformat
7818 * $<count>\r\n<payload>\r\n */
7819 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7822 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7823 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7824 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7825 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7829 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7830 static int fwriteBulkDouble(FILE *fp
, double d
) {
7831 char buf
[128], dbuf
[128];
7833 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7834 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7835 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7836 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7840 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7841 static int fwriteBulkLong(FILE *fp
, long l
) {
7842 char buf
[128], lbuf
[128];
7844 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7845 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7846 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7847 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7851 /* Write a sequence of commands able to fully rebuild the dataset into
7852 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7853 static int rewriteAppendOnlyFile(char *filename
) {
7854 dictIterator
*di
= NULL
;
7859 time_t now
= time(NULL
);
7861 /* Note that we have to use a different temp name here compared to the
7862 * one used by rewriteAppendOnlyFileBackground() function. */
7863 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7864 fp
= fopen(tmpfile
,"w");
7866 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7869 for (j
= 0; j
< server
.dbnum
; j
++) {
7870 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7871 redisDb
*db
= server
.db
+j
;
7873 if (dictSize(d
) == 0) continue;
7874 di
= dictGetIterator(d
);
7880 /* SELECT the new DB */
7881 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7882 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7884 /* Iterate this DB writing every entry */
7885 while((de
= dictNext(di
)) != NULL
) {
7890 key
= dictGetEntryKey(de
);
7891 /* If the value for this key is swapped, load a preview in memory.
7892 * We use a "swapped" flag to remember if we need to free the
7893 * value object instead to just increment the ref count anyway
7894 * in order to avoid copy-on-write of pages if we are forked() */
7895 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7896 key
->storage
== REDIS_VM_SWAPPING
) {
7897 o
= dictGetEntryVal(de
);
7900 o
= vmPreviewObject(key
);
7903 expiretime
= getExpire(db
,key
);
7905 /* Save the key and associated value */
7906 if (o
->type
== REDIS_STRING
) {
7907 /* Emit a SET command */
7908 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7909 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7911 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7912 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7913 } else if (o
->type
== REDIS_LIST
) {
7914 /* Emit the RPUSHes needed to rebuild the list */
7915 list
*list
= o
->ptr
;
7919 listRewind(list
,&li
);
7920 while((ln
= listNext(&li
))) {
7921 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7922 robj
*eleobj
= listNodeValue(ln
);
7924 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7925 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7926 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7928 } else if (o
->type
== REDIS_SET
) {
7929 /* Emit the SADDs needed to rebuild the set */
7931 dictIterator
*di
= dictGetIterator(set
);
7934 while((de
= dictNext(di
)) != NULL
) {
7935 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7936 robj
*eleobj
= dictGetEntryKey(de
);
7938 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7939 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7940 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7942 dictReleaseIterator(di
);
7943 } else if (o
->type
== REDIS_ZSET
) {
7944 /* Emit the ZADDs needed to rebuild the sorted set */
7946 dictIterator
*di
= dictGetIterator(zs
->dict
);
7949 while((de
= dictNext(di
)) != NULL
) {
7950 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7951 robj
*eleobj
= dictGetEntryKey(de
);
7952 double *score
= dictGetEntryVal(de
);
7954 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7955 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7956 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7957 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7959 dictReleaseIterator(di
);
7960 } else if (o
->type
== REDIS_HASH
) {
7961 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7963 /* Emit the HSETs needed to rebuild the hash */
7964 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7965 unsigned char *p
= zipmapRewind(o
->ptr
);
7966 unsigned char *field
, *val
;
7967 unsigned int flen
, vlen
;
7969 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7970 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7971 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7972 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7974 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7978 dictIterator
*di
= dictGetIterator(o
->ptr
);
7981 while((de
= dictNext(di
)) != NULL
) {
7982 robj
*field
= dictGetEntryKey(de
);
7983 robj
*val
= dictGetEntryVal(de
);
7985 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7986 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7987 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7988 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7990 dictReleaseIterator(di
);
7995 /* Save the expire time */
7996 if (expiretime
!= -1) {
7997 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7998 /* If this key is already expired skip it */
7999 if (expiretime
< now
) continue;
8000 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8001 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8002 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8004 if (swapped
) decrRefCount(o
);
8006 dictReleaseIterator(di
);
8009 /* Make sure data will not remain on the OS's output buffers */
8014 /* Use RENAME to make sure the DB file is changed atomically only
8015 * if the generate DB file is ok. */
8016 if (rename(tmpfile
,filename
) == -1) {
8017 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8021 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8027 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8028 if (di
) dictReleaseIterator(di
);
8032 /* This is how rewriting of the append only file in background works:
8034 * 1) The user calls BGREWRITEAOF
8035 * 2) Redis calls this function, that forks():
8036 * 2a) the child rewrite the append only file in a temp file.
8037 * 2b) the parent accumulates differences in server.bgrewritebuf.
8038 * 3) When the child finished '2a' exists.
8039 * 4) The parent will trap the exit code, if it's OK, will append the
8040 * data accumulated into server.bgrewritebuf into the temp file, and
8041 * finally will rename(2) the temp file in the actual file name.
8042 * The the new file is reopened as the new append only file. Profit!
8044 static int rewriteAppendOnlyFileBackground(void) {
8047 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8048 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8049 if ((childpid
= fork()) == 0) {
8053 if (server
.vm_enabled
) vmReopenSwapFile();
8055 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8056 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8063 if (childpid
== -1) {
8064 redisLog(REDIS_WARNING
,
8065 "Can't rewrite append only file in background: fork: %s",
8069 redisLog(REDIS_NOTICE
,
8070 "Background append only file rewriting started by pid %d",childpid
);
8071 server
.bgrewritechildpid
= childpid
;
8072 /* We set appendseldb to -1 in order to force the next call to the
8073 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8074 * accumulated by the parent into server.bgrewritebuf will start
8075 * with a SELECT statement and it will be safe to merge. */
8076 server
.appendseldb
= -1;
8079 return REDIS_OK
; /* unreached */
8082 static void bgrewriteaofCommand(redisClient
*c
) {
8083 if (server
.bgrewritechildpid
!= -1) {
8084 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8087 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8088 char *status
= "+Background append only file rewriting started\r\n";
8089 addReplySds(c
,sdsnew(status
));
8091 addReply(c
,shared
.err
);
8095 static void aofRemoveTempFile(pid_t childpid
) {
8098 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8102 /* Virtual Memory is composed mainly of two subsystems:
8103 * - Blocking Virutal Memory
8104 * - Threaded Virtual Memory I/O
8105 * The two parts are not fully decoupled, but functions are split among two
8106 * different sections of the source code (delimited by comments) in order to
8107 * make more clear what functionality is about the blocking VM and what about
8108 * the threaded (not blocking) VM.
8112 * Redis VM is a blocking VM (one that blocks reading swapped values from
8113 * disk into memory when a value swapped out is needed in memory) that is made
8114 * unblocking by trying to examine the command argument vector in order to
8115 * load in background values that will likely be needed in order to exec
8116 * the command. The command is executed only once all the relevant keys
8117 * are loaded into memory.
8119 * This basically is almost as simple of a blocking VM, but almost as parallel
8120 * as a fully non-blocking VM.
8123 /* =================== Virtual Memory - Blocking Side ====================== */
8125 /* substitute the first occurrence of '%p' with the process pid in the
8126 * swap file name. */
8127 static void expandVmSwapFilename(void) {
8128 char *p
= strstr(server
.vm_swap_file
,"%p");
8134 new = sdscat(new,server
.vm_swap_file
);
8135 new = sdscatprintf(new,"%ld",(long) getpid());
8136 new = sdscat(new,p
+2);
8137 zfree(server
.vm_swap_file
);
8138 server
.vm_swap_file
= new;
8141 static void vmInit(void) {
8146 if (server
.vm_max_threads
!= 0)
8147 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8149 expandVmSwapFilename();
8150 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8151 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8152 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8154 if (server
.vm_fp
== NULL
) {
8155 redisLog(REDIS_WARNING
,
8156 "Impossible to open the swap file: %s. Exiting.",
8160 server
.vm_fd
= fileno(server
.vm_fp
);
8161 server
.vm_next_page
= 0;
8162 server
.vm_near_pages
= 0;
8163 server
.vm_stats_used_pages
= 0;
8164 server
.vm_stats_swapped_objects
= 0;
8165 server
.vm_stats_swapouts
= 0;
8166 server
.vm_stats_swapins
= 0;
8167 totsize
= server
.vm_pages
*server
.vm_page_size
;
8168 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8169 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8170 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8174 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8176 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8177 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8178 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8179 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8181 /* Initialize threaded I/O (used by Virtual Memory) */
8182 server
.io_newjobs
= listCreate();
8183 server
.io_processing
= listCreate();
8184 server
.io_processed
= listCreate();
8185 server
.io_ready_clients
= listCreate();
8186 pthread_mutex_init(&server
.io_mutex
,NULL
);
8187 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8188 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8189 server
.io_active_threads
= 0;
8190 if (pipe(pipefds
) == -1) {
8191 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8195 server
.io_ready_pipe_read
= pipefds
[0];
8196 server
.io_ready_pipe_write
= pipefds
[1];
8197 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8198 /* LZF requires a lot of stack */
8199 pthread_attr_init(&server
.io_threads_attr
);
8200 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8201 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8202 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8203 /* Listen for events in the threaded I/O pipe */
8204 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8205 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8206 oom("creating file event");
8209 /* Mark the page as used */
8210 static void vmMarkPageUsed(off_t page
) {
8211 off_t byte
= page
/8;
8213 redisAssert(vmFreePage(page
) == 1);
8214 server
.vm_bitmap
[byte
] |= 1<<bit
;
8217 /* Mark N contiguous pages as used, with 'page' being the first. */
8218 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8221 for (j
= 0; j
< count
; j
++)
8222 vmMarkPageUsed(page
+j
);
8223 server
.vm_stats_used_pages
+= count
;
8224 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8225 (long long)count
, (long long)page
);
8228 /* Mark the page as free */
8229 static void vmMarkPageFree(off_t page
) {
8230 off_t byte
= page
/8;
8232 redisAssert(vmFreePage(page
) == 0);
8233 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8236 /* Mark N contiguous pages as free, with 'page' being the first. */
8237 static void vmMarkPagesFree(off_t page
, off_t count
) {
8240 for (j
= 0; j
< count
; j
++)
8241 vmMarkPageFree(page
+j
);
8242 server
.vm_stats_used_pages
-= count
;
8243 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8244 (long long)count
, (long long)page
);
8247 /* Test if the page is free */
8248 static int vmFreePage(off_t page
) {
8249 off_t byte
= page
/8;
8251 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8254 /* Find N contiguous free pages storing the first page of the cluster in *first.
8255 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8256 * REDIS_ERR is returned.
8258 * This function uses a simple algorithm: we try to allocate
8259 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8260 * again from the start of the swap file searching for free spaces.
8262 * If it looks pretty clear that there are no free pages near our offset
8263 * we try to find less populated places doing a forward jump of
8264 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8265 * without hurry, and then we jump again and so forth...
8267 * This function can be improved using a free list to avoid to guess
8268 * too much, since we could collect data about freed pages.
8270 * note: I implemented this function just after watching an episode of
8271 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8273 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8274 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8276 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8277 server
.vm_near_pages
= 0;
8278 server
.vm_next_page
= 0;
8280 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8281 base
= server
.vm_next_page
;
8283 while(offset
< server
.vm_pages
) {
8284 off_t
this = base
+offset
;
8286 /* If we overflow, restart from page zero */
8287 if (this >= server
.vm_pages
) {
8288 this -= server
.vm_pages
;
8290 /* Just overflowed, what we found on tail is no longer
8291 * interesting, as it's no longer contiguous. */
8295 if (vmFreePage(this)) {
8296 /* This is a free page */
8298 /* Already got N free pages? Return to the caller, with success */
8300 *first
= this-(n
-1);
8301 server
.vm_next_page
= this+1;
8302 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8306 /* The current one is not a free page */
8310 /* Fast-forward if the current page is not free and we already
8311 * searched enough near this place. */
8313 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8314 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8316 /* Note that even if we rewind after the jump, we are don't need
8317 * to make sure numfree is set to zero as we only jump *if* it
8318 * is set to zero. */
8320 /* Otherwise just check the next page */
8327 /* Write the specified object at the specified page of the swap file */
8328 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8329 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8330 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8331 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8332 redisLog(REDIS_WARNING
,
8333 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8337 rdbSaveObject(server
.vm_fp
,o
);
8338 fflush(server
.vm_fp
);
8339 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8343 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8344 * needed to later retrieve the object into the key object.
8345 * If we can't find enough contiguous empty pages to swap the object on disk
8346 * REDIS_ERR is returned. */
8347 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8348 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8351 assert(key
->storage
== REDIS_VM_MEMORY
);
8352 assert(key
->refcount
== 1);
8353 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8354 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8355 key
->vm
.page
= page
;
8356 key
->vm
.usedpages
= pages
;
8357 key
->storage
= REDIS_VM_SWAPPED
;
8358 key
->vtype
= val
->type
;
8359 decrRefCount(val
); /* Deallocate the object from memory. */
8360 vmMarkPagesUsed(page
,pages
);
8361 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8362 (unsigned char*) key
->ptr
,
8363 (unsigned long long) page
, (unsigned long long) pages
);
8364 server
.vm_stats_swapped_objects
++;
8365 server
.vm_stats_swapouts
++;
8369 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8372 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8373 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8374 redisLog(REDIS_WARNING
,
8375 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8379 o
= rdbLoadObject(type
,server
.vm_fp
);
8381 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8384 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8388 /* Load the value object relative to the 'key' object from swap to memory.
8389 * The newly allocated object is returned.
8391 * If preview is true the unserialized object is returned to the caller but
8392 * no changes are made to the key object, nor the pages are marked as freed */
8393 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8396 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8397 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8399 key
->storage
= REDIS_VM_MEMORY
;
8400 key
->vm
.atime
= server
.unixtime
;
8401 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8402 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8403 (unsigned char*) key
->ptr
);
8404 server
.vm_stats_swapped_objects
--;
8406 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8407 (unsigned char*) key
->ptr
);
8409 server
.vm_stats_swapins
++;
8413 /* Plain object loading, from swap to memory */
8414 static robj
*vmLoadObject(robj
*key
) {
8415 /* If we are loading the object in background, stop it, we
8416 * need to load this object synchronously ASAP. */
8417 if (key
->storage
== REDIS_VM_LOADING
)
8418 vmCancelThreadedIOJob(key
);
8419 return vmGenericLoadObject(key
,0);
8422 /* Just load the value on disk, without to modify the key.
8423 * This is useful when we want to perform some operation on the value
8424 * without to really bring it from swap to memory, like while saving the
8425 * dataset or rewriting the append only log. */
8426 static robj
*vmPreviewObject(robj
*key
) {
8427 return vmGenericLoadObject(key
,1);
8430 /* How a good candidate is this object for swapping?
8431 * The better candidate it is, the greater the returned value.
8433 * Currently we try to perform a fast estimation of the object size in
8434 * memory, and combine it with aging informations.
8436 * Basically swappability = idle-time * log(estimated size)
8438 * Bigger objects are preferred over smaller objects, but not
8439 * proportionally, this is why we use the logarithm. This algorithm is
8440 * just a first try and will probably be tuned later. */
8441 static double computeObjectSwappability(robj
*o
) {
8442 time_t age
= server
.unixtime
- o
->vm
.atime
;
8446 struct dictEntry
*de
;
8449 if (age
<= 0) return 0;
8452 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8455 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8460 listNode
*ln
= listFirst(l
);
8462 asize
= sizeof(list
);
8464 robj
*ele
= ln
->value
;
8467 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8468 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8470 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8475 z
= (o
->type
== REDIS_ZSET
);
8476 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8478 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8479 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8484 de
= dictGetRandomKey(d
);
8485 ele
= dictGetEntryKey(de
);
8486 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8487 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8489 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8490 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8494 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8495 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8496 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8497 unsigned int klen
, vlen
;
8498 unsigned char *key
, *val
;
8500 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8504 asize
= len
*(klen
+vlen
+3);
8505 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8507 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8512 de
= dictGetRandomKey(d
);
8513 ele
= dictGetEntryKey(de
);
8514 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8515 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8517 ele
= dictGetEntryVal(de
);
8518 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8519 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8521 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8526 return (double)age
*log(1+asize
);
8529 /* Try to swap an object that's a good candidate for swapping.
8530 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8531 * to swap any object at all.
8533 * If 'usethreaded' is true, Redis will try to swap the object in background
8534 * using I/O threads. */
8535 static int vmSwapOneObject(int usethreads
) {
8537 struct dictEntry
*best
= NULL
;
8538 double best_swappability
= 0;
8539 redisDb
*best_db
= NULL
;
8542 for (j
= 0; j
< server
.dbnum
; j
++) {
8543 redisDb
*db
= server
.db
+j
;
8544 /* Why maxtries is set to 100?
8545 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8546 * are swappable objects */
8549 if (dictSize(db
->dict
) == 0) continue;
8550 for (i
= 0; i
< 5; i
++) {
8552 double swappability
;
8554 if (maxtries
) maxtries
--;
8555 de
= dictGetRandomKey(db
->dict
);
8556 key
= dictGetEntryKey(de
);
8557 val
= dictGetEntryVal(de
);
8558 /* Only swap objects that are currently in memory.
8560 * Also don't swap shared objects if threaded VM is on, as we
8561 * try to ensure that the main thread does not touch the
8562 * object while the I/O thread is using it, but we can't
8563 * control other keys without adding additional mutex. */
8564 if (key
->storage
!= REDIS_VM_MEMORY
||
8565 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8566 if (maxtries
) i
--; /* don't count this try */
8569 swappability
= computeObjectSwappability(val
);
8570 if (!best
|| swappability
> best_swappability
) {
8572 best_swappability
= swappability
;
8577 if (best
== NULL
) return REDIS_ERR
;
8578 key
= dictGetEntryKey(best
);
8579 val
= dictGetEntryVal(best
);
8581 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8582 key
->ptr
, best_swappability
);
8584 /* Unshare the key if needed */
8585 if (key
->refcount
> 1) {
8586 robj
*newkey
= dupStringObject(key
);
8588 key
= dictGetEntryKey(best
) = newkey
;
8592 vmSwapObjectThreaded(key
,val
,best_db
);
8595 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8596 dictGetEntryVal(best
) = NULL
;
8604 static int vmSwapOneObjectBlocking() {
8605 return vmSwapOneObject(0);
8608 static int vmSwapOneObjectThreaded() {
8609 return vmSwapOneObject(1);
8612 /* Return true if it's safe to swap out objects in a given moment.
8613 * Basically we don't want to swap objects out while there is a BGSAVE
8614 * or a BGAEOREWRITE running in backgroud. */
8615 static int vmCanSwapOut(void) {
8616 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8619 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8620 * and was deleted. Otherwise 0 is returned. */
8621 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8625 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8626 foundkey
= dictGetEntryKey(de
);
8627 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8632 /* =================== Virtual Memory - Threaded I/O ======================= */
8634 static void freeIOJob(iojob
*j
) {
8635 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8636 j
->type
== REDIS_IOJOB_DO_SWAP
||
8637 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8638 decrRefCount(j
->val
);
8639 decrRefCount(j
->key
);
8643 /* Every time a thread finished a Job, it writes a byte into the write side
8644 * of an unix pipe in order to "awake" the main thread, and this function
8646 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8650 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8652 REDIS_NOTUSED(mask
);
8653 REDIS_NOTUSED(privdata
);
8655 /* For every byte we read in the read side of the pipe, there is one
8656 * I/O job completed to process. */
8657 while((retval
= read(fd
,buf
,1)) == 1) {
8661 struct dictEntry
*de
;
8663 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8665 /* Get the processed element (the oldest one) */
8667 assert(listLength(server
.io_processed
) != 0);
8668 if (toprocess
== -1) {
8669 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8670 if (toprocess
<= 0) toprocess
= 1;
8672 ln
= listFirst(server
.io_processed
);
8674 listDelNode(server
.io_processed
,ln
);
8676 /* If this job is marked as canceled, just ignore it */
8681 /* Post process it in the main thread, as there are things we
8682 * can do just here to avoid race conditions and/or invasive locks */
8683 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8684 de
= dictFind(j
->db
->dict
,j
->key
);
8686 key
= dictGetEntryKey(de
);
8687 if (j
->type
== REDIS_IOJOB_LOAD
) {
8690 /* Key loaded, bring it at home */
8691 key
->storage
= REDIS_VM_MEMORY
;
8692 key
->vm
.atime
= server
.unixtime
;
8693 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8694 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8695 (unsigned char*) key
->ptr
);
8696 server
.vm_stats_swapped_objects
--;
8697 server
.vm_stats_swapins
++;
8698 dictGetEntryVal(de
) = j
->val
;
8699 incrRefCount(j
->val
);
8702 /* Handle clients waiting for this key to be loaded. */
8703 handleClientsBlockedOnSwappedKey(db
,key
);
8704 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8705 /* Now we know the amount of pages required to swap this object.
8706 * Let's find some space for it, and queue this task again
8707 * rebranded as REDIS_IOJOB_DO_SWAP. */
8708 if (!vmCanSwapOut() ||
8709 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8711 /* Ooops... no space or we can't swap as there is
8712 * a fork()ed Redis trying to save stuff on disk. */
8714 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8716 /* Note that we need to mark this pages as used now,
8717 * if the job will be canceled, we'll mark them as freed
8719 vmMarkPagesUsed(j
->page
,j
->pages
);
8720 j
->type
= REDIS_IOJOB_DO_SWAP
;
8725 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8728 /* Key swapped. We can finally free some memory. */
8729 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8730 printf("key->storage: %d\n",key
->storage
);
8731 printf("key->name: %s\n",(char*)key
->ptr
);
8732 printf("key->refcount: %d\n",key
->refcount
);
8733 printf("val: %p\n",(void*)j
->val
);
8734 printf("val->type: %d\n",j
->val
->type
);
8735 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8737 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8738 val
= dictGetEntryVal(de
);
8739 key
->vm
.page
= j
->page
;
8740 key
->vm
.usedpages
= j
->pages
;
8741 key
->storage
= REDIS_VM_SWAPPED
;
8742 key
->vtype
= j
->val
->type
;
8743 decrRefCount(val
); /* Deallocate the object from memory. */
8744 dictGetEntryVal(de
) = NULL
;
8745 redisLog(REDIS_DEBUG
,
8746 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8747 (unsigned char*) key
->ptr
,
8748 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8749 server
.vm_stats_swapped_objects
++;
8750 server
.vm_stats_swapouts
++;
8752 /* Put a few more swap requests in queue if we are still
8754 if (trytoswap
&& vmCanSwapOut() &&
8755 zmalloc_used_memory() > server
.vm_max_memory
)
8760 more
= listLength(server
.io_newjobs
) <
8761 (unsigned) server
.vm_max_threads
;
8763 /* Don't waste CPU time if swappable objects are rare. */
8764 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8772 if (processed
== toprocess
) return;
8774 if (retval
< 0 && errno
!= EAGAIN
) {
8775 redisLog(REDIS_WARNING
,
8776 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8781 static void lockThreadedIO(void) {
8782 pthread_mutex_lock(&server
.io_mutex
);
8785 static void unlockThreadedIO(void) {
8786 pthread_mutex_unlock(&server
.io_mutex
);
8789 /* Remove the specified object from the threaded I/O queue if still not
8790 * processed, otherwise make sure to flag it as canceled. */
8791 static void vmCancelThreadedIOJob(robj
*o
) {
8793 server
.io_newjobs
, /* 0 */
8794 server
.io_processing
, /* 1 */
8795 server
.io_processed
/* 2 */
8799 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8802 /* Search for a matching key in one of the queues */
8803 for (i
= 0; i
< 3; i
++) {
8807 listRewind(lists
[i
],&li
);
8808 while ((ln
= listNext(&li
)) != NULL
) {
8809 iojob
*job
= ln
->value
;
8811 if (job
->canceled
) continue; /* Skip this, already canceled. */
8812 if (compareStringObjects(job
->key
,o
) == 0) {
8813 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8814 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8815 /* Mark the pages as free since the swap didn't happened
8816 * or happened but is now discarded. */
8817 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8818 vmMarkPagesFree(job
->page
,job
->pages
);
8819 /* Cancel the job. It depends on the list the job is
8822 case 0: /* io_newjobs */
8823 /* If the job was yet not processed the best thing to do
8824 * is to remove it from the queue at all */
8826 listDelNode(lists
[i
],ln
);
8828 case 1: /* io_processing */
8829 /* Oh Shi- the thread is messing with the Job:
8831 * Probably it's accessing the object if this is a
8832 * PREPARE_SWAP or DO_SWAP job.
8833 * If it's a LOAD job it may be reading from disk and
8834 * if we don't wait for the job to terminate before to
8835 * cancel it, maybe in a few microseconds data can be
8836 * corrupted in this pages. So the short story is:
8838 * Better to wait for the job to move into the
8839 * next queue (processed)... */
8841 /* We try again and again until the job is completed. */
8843 /* But let's wait some time for the I/O thread
8844 * to finish with this job. After all this condition
8845 * should be very rare. */
8848 case 2: /* io_processed */
8849 /* The job was already processed, that's easy...
8850 * just mark it as canceled so that we'll ignore it
8851 * when processing completed jobs. */
8855 /* Finally we have to adjust the storage type of the object
8856 * in order to "UNDO" the operaiton. */
8857 if (o
->storage
== REDIS_VM_LOADING
)
8858 o
->storage
= REDIS_VM_SWAPPED
;
8859 else if (o
->storage
== REDIS_VM_SWAPPING
)
8860 o
->storage
= REDIS_VM_MEMORY
;
8867 assert(1 != 1); /* We should never reach this */
8870 static void *IOThreadEntryPoint(void *arg
) {
8875 pthread_detach(pthread_self());
8877 /* Get a new job to process */
8879 if (listLength(server
.io_newjobs
) == 0) {
8880 /* No new jobs in queue, exit. */
8881 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8882 (long) pthread_self());
8883 server
.io_active_threads
--;
8887 ln
= listFirst(server
.io_newjobs
);
8889 listDelNode(server
.io_newjobs
,ln
);
8890 /* Add the job in the processing queue */
8891 j
->thread
= pthread_self();
8892 listAddNodeTail(server
.io_processing
,j
);
8893 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8895 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8896 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8898 /* Process the Job */
8899 if (j
->type
== REDIS_IOJOB_LOAD
) {
8900 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8901 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8902 FILE *fp
= fopen("/dev/null","w+");
8903 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8905 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8906 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8910 /* Done: insert the job into the processed queue */
8911 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8912 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8914 listDelNode(server
.io_processing
,ln
);
8915 listAddNodeTail(server
.io_processed
,j
);
8918 /* Signal the main thread there is new stuff to process */
8919 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8921 return NULL
; /* never reached */
8924 static void spawnIOThread(void) {
8926 sigset_t mask
, omask
;
8930 sigaddset(&mask
,SIGCHLD
);
8931 sigaddset(&mask
,SIGHUP
);
8932 sigaddset(&mask
,SIGPIPE
);
8933 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8934 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8935 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8939 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8940 server
.io_active_threads
++;
8943 /* We need to wait for the last thread to exit before we are able to
8944 * fork() in order to BGSAVE or BGREWRITEAOF. */
8945 static void waitEmptyIOJobsQueue(void) {
8947 int io_processed_len
;
8950 if (listLength(server
.io_newjobs
) == 0 &&
8951 listLength(server
.io_processing
) == 0 &&
8952 server
.io_active_threads
== 0)
8957 /* While waiting for empty jobs queue condition we post-process some
8958 * finshed job, as I/O threads may be hanging trying to write against
8959 * the io_ready_pipe_write FD but there are so much pending jobs that
8961 io_processed_len
= listLength(server
.io_processed
);
8963 if (io_processed_len
) {
8964 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8965 usleep(1000); /* 1 millisecond */
8967 usleep(10000); /* 10 milliseconds */
8972 static void vmReopenSwapFile(void) {
8973 /* Note: we don't close the old one as we are in the child process
8974 * and don't want to mess at all with the original file object. */
8975 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8976 if (server
.vm_fp
== NULL
) {
8977 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8978 server
.vm_swap_file
);
8981 server
.vm_fd
= fileno(server
.vm_fp
);
8984 /* This function must be called while with threaded IO locked */
8985 static void queueIOJob(iojob
*j
) {
8986 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8987 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8988 listAddNodeTail(server
.io_newjobs
,j
);
8989 if (server
.io_active_threads
< server
.vm_max_threads
)
8993 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8996 assert(key
->storage
== REDIS_VM_MEMORY
);
8997 assert(key
->refcount
== 1);
8999 j
= zmalloc(sizeof(*j
));
9000 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9002 j
->key
= dupStringObject(key
);
9006 j
->thread
= (pthread_t
) -1;
9007 key
->storage
= REDIS_VM_SWAPPING
;
9015 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9017 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9018 * If there is not already a job loading the key, it is craeted.
9019 * The key is added to the io_keys list in the client structure, and also
9020 * in the hash table mapping swapped keys to waiting clients, that is,
9021 * server.io_waited_keys. */
9022 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9023 struct dictEntry
*de
;
9027 /* If the key does not exist or is already in RAM we don't need to
9028 * block the client at all. */
9029 de
= dictFind(c
->db
->dict
,key
);
9030 if (de
== NULL
) return 0;
9031 o
= dictGetEntryKey(de
);
9032 if (o
->storage
== REDIS_VM_MEMORY
) {
9034 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9035 /* We were swapping the key, undo it! */
9036 vmCancelThreadedIOJob(o
);
9040 /* OK: the key is either swapped, or being loaded just now. */
9042 /* Add the key to the list of keys this client is waiting for.
9043 * This maps clients to keys they are waiting for. */
9044 listAddNodeTail(c
->io_keys
,key
);
9047 /* Add the client to the swapped keys => clients waiting map. */
9048 de
= dictFind(c
->db
->io_keys
,key
);
9052 /* For every key we take a list of clients blocked for it */
9054 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9056 assert(retval
== DICT_OK
);
9058 l
= dictGetEntryVal(de
);
9060 listAddNodeTail(l
,c
);
9062 /* Are we already loading the key from disk? If not create a job */
9063 if (o
->storage
== REDIS_VM_SWAPPED
) {
9066 o
->storage
= REDIS_VM_LOADING
;
9067 j
= zmalloc(sizeof(*j
));
9068 j
->type
= REDIS_IOJOB_LOAD
;
9070 j
->key
= dupStringObject(key
);
9071 j
->key
->vtype
= o
->vtype
;
9072 j
->page
= o
->vm
.page
;
9075 j
->thread
= (pthread_t
) -1;
9083 /* Preload keys needed for the ZUNION and ZINTER commands. */
9084 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9086 num
= atoi(c
->argv
[2]->ptr
);
9087 for (i
= 0; i
< num
; i
++) {
9088 waitForSwappedKey(c
,c
->argv
[3+i
]);
9092 /* Is this client attempting to run a command against swapped keys?
9093 * If so, block it ASAP, load the keys in background, then resume it.
9095 * The important idea about this function is that it can fail! If keys will
9096 * still be swapped when the client is resumed, this key lookups will
9097 * just block loading keys from disk. In practical terms this should only
9098 * happen with SORT BY command or if there is a bug in this function.
9100 * Return 1 if the client is marked as blocked, 0 if the client can
9101 * continue as the keys it is going to access appear to be in memory. */
9102 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9105 if (cmd
->vm_preload_proc
!= NULL
) {
9106 cmd
->vm_preload_proc(c
);
9108 if (cmd
->vm_firstkey
== 0) return 0;
9109 last
= cmd
->vm_lastkey
;
9110 if (last
< 0) last
= c
->argc
+last
;
9111 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9112 waitForSwappedKey(c
,c
->argv
[j
]);
9115 /* If the client was blocked for at least one key, mark it as blocked. */
9116 if (listLength(c
->io_keys
)) {
9117 c
->flags
|= REDIS_IO_WAIT
;
9118 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9119 server
.vm_blocked_clients
++;
9126 /* Remove the 'key' from the list of blocked keys for a given client.
9128 * The function returns 1 when there are no longer blocking keys after
9129 * the current one was removed (and the client can be unblocked). */
9130 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9134 struct dictEntry
*de
;
9136 /* Remove the key from the list of keys this client is waiting for. */
9137 listRewind(c
->io_keys
,&li
);
9138 while ((ln
= listNext(&li
)) != NULL
) {
9139 if (compareStringObjects(ln
->value
,key
) == 0) {
9140 listDelNode(c
->io_keys
,ln
);
9146 /* Remove the client form the key => waiting clients map. */
9147 de
= dictFind(c
->db
->io_keys
,key
);
9149 l
= dictGetEntryVal(de
);
9150 ln
= listSearchKey(l
,c
);
9153 if (listLength(l
) == 0)
9154 dictDelete(c
->db
->io_keys
,key
);
9156 return listLength(c
->io_keys
) == 0;
9159 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9160 struct dictEntry
*de
;
9165 de
= dictFind(db
->io_keys
,key
);
9168 l
= dictGetEntryVal(de
);
9169 len
= listLength(l
);
9170 /* Note: we can't use something like while(listLength(l)) as the list
9171 * can be freed by the calling function when we remove the last element. */
9174 redisClient
*c
= ln
->value
;
9176 if (dontWaitForSwappedKey(c
,key
)) {
9177 /* Put the client in the list of clients ready to go as we
9178 * loaded all the keys about it. */
9179 listAddNodeTail(server
.io_ready_clients
,c
);
9184 /* =========================== Remote Configuration ========================= */
9186 static void configSetCommand(redisClient
*c
) {
9187 robj
*o
= getDecodedObject(c
->argv
[3]);
9188 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9189 zfree(server
.dbfilename
);
9190 server
.dbfilename
= zstrdup(o
->ptr
);
9191 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9192 zfree(server
.requirepass
);
9193 server
.requirepass
= zstrdup(o
->ptr
);
9194 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9195 zfree(server
.masterauth
);
9196 server
.masterauth
= zstrdup(o
->ptr
);
9197 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9198 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9200 addReplySds(c
,sdscatprintf(sdsempty(),
9201 "-ERR not supported CONFIG parameter %s\r\n",
9202 (char*)c
->argv
[2]->ptr
));
9207 addReply(c
,shared
.ok
);
9210 static void configGetCommand(redisClient
*c
) {
9211 robj
*o
= getDecodedObject(c
->argv
[2]);
9212 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9213 char *pattern
= o
->ptr
;
9217 decrRefCount(lenobj
);
9219 if (stringmatch(pattern
,"dbfilename",0)) {
9220 addReplyBulkCString(c
,"dbfilename");
9221 addReplyBulkCString(c
,server
.dbfilename
);
9224 if (stringmatch(pattern
,"requirepass",0)) {
9225 addReplyBulkCString(c
,"requirepass");
9226 addReplyBulkCString(c
,server
.requirepass
);
9229 if (stringmatch(pattern
,"masterauth",0)) {
9230 addReplyBulkCString(c
,"masterauth");
9231 addReplyBulkCString(c
,server
.masterauth
);
9234 if (stringmatch(pattern
,"maxmemory",0)) {
9237 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9238 addReplyBulkCString(c
,"maxmemory");
9239 addReplyBulkCString(c
,buf
);
9243 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9246 static void configCommand(redisClient
*c
) {
9247 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9248 if (c
->argc
!= 4) goto badarity
;
9249 configSetCommand(c
);
9250 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9251 if (c
->argc
!= 3) goto badarity
;
9252 configGetCommand(c
);
9253 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9254 if (c
->argc
!= 2) goto badarity
;
9255 server
.stat_numcommands
= 0;
9256 server
.stat_numconnections
= 0;
9257 server
.stat_expiredkeys
= 0;
9258 server
.stat_starttime
= time(NULL
);
9259 addReply(c
,shared
.ok
);
9261 addReplySds(c
,sdscatprintf(sdsempty(),
9262 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9267 addReplySds(c
,sdscatprintf(sdsempty(),
9268 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9269 (char*) c
->argv
[1]->ptr
));
9272 /* =========================== Pubsub implementation ======================== */
9274 /* Subscribe a client to a class. Returns 1 if the operation succeeded, or
9275 * 0 if the client was already subscribed to that class. */
9276 static int pubsubSubscribe(redisClient
*c
, robj
*class) {
9277 struct dictEntry
*de
;
9278 list
*clients
= NULL
;
9281 /* Add the class to the client -> classes hash table */
9282 if (dictAdd(c
->pubsub_classes
,class,NULL
) == DICT_OK
) {
9284 incrRefCount(class);
9285 /* Add the client to the class -> list of clients hash table */
9286 de
= dictFind(server
.pubsub_classes
,class);
9288 clients
= listCreate();
9289 dictAdd(server
.pubsub_classes
,class,clients
);
9290 incrRefCount(class);
9292 clients
= dictGetEntryVal(de
);
9294 listAddNodeTail(clients
,c
);
9296 /* Notify the client */
9297 addReply(c
,shared
.mbulk3
);
9298 addReply(c
,shared
.subscribebulk
);
9299 addReplyBulk(c
,class);
9300 addReplyLong(c
,dictSize(c
->pubsub_classes
));
9304 /* Unsubscribe a client from a class. Returns 1 if the operation succeeded, or
9305 * 0 if the client was not subscribed to the specified class. */
9306 static int pubsubUnsubscribe(redisClient
*c
, robj
*class, int notify
) {
9307 struct dictEntry
*de
;
9312 /* Remove the class from the client -> classes hash table */
9313 incrRefCount(class); /* class may be just a pointer to the same object
9314 we have in the hash tables. Protect it... */
9315 if (dictDelete(c
->pubsub_classes
,class) == DICT_OK
) {
9317 /* Remove the client from the class -> clients list hash table */
9318 de
= dictFind(server
.pubsub_classes
,class);
9320 clients
= dictGetEntryVal(de
);
9321 ln
= listSearchKey(clients
,c
);
9323 listDelNode(clients
,ln
);
9324 if (listLength(clients
) == 0) {
9325 /* Free the list and associated hash entry at all if this was
9326 * the latest client, so that it will be possible to abuse
9327 * Redis PUBSUB creating millions of classes. */
9328 dictDelete(server
.pubsub_classes
,class);
9331 /* Notify the client */
9333 addReply(c
,shared
.mbulk3
);
9334 addReply(c
,shared
.unsubscribebulk
);
9335 addReplyBulk(c
,class);
9336 addReplyLong(c
,dictSize(c
->pubsub_classes
));
9338 decrRefCount(class); /* it is finally safe to release it */
9342 /* Unsubscribe from all the classes. Return the number of classes the
9343 * client was subscribed to. */
9344 static int pubsubUnsubscribeAll(redisClient
*c
, int notify
) {
9345 dictIterator
*di
= dictGetIterator(c
->pubsub_classes
);
9349 while((de
= dictNext(di
)) != NULL
) {
9350 robj
*class = dictGetEntryKey(de
);
9352 count
+= pubsubUnsubscribe(c
,class,notify
);
9354 dictReleaseIterator(di
);
9358 /* Publish a message */
9359 static int pubsubPublishMessage(robj
*class, robj
*message
) {
9361 struct dictEntry
*de
;
9363 de
= dictFind(server
.pubsub_classes
,class);
9365 list
*list
= dictGetEntryVal(de
);
9369 listRewind(list
,&li
);
9370 while ((ln
= listNext(&li
)) != NULL
) {
9371 redisClient
*c
= ln
->value
;
9373 addReply(c
,shared
.mbulk3
);
9374 addReply(c
,shared
.messagebulk
);
9375 addReplyBulk(c
,class);
9376 addReplyBulk(c
,message
);
9383 static void subscribeCommand(redisClient
*c
) {
9386 for (j
= 1; j
< c
->argc
; j
++)
9387 pubsubSubscribe(c
,c
->argv
[j
]);
9390 static void unsubscribeCommand(redisClient
*c
) {
9392 pubsubUnsubscribeAll(c
,1);
9397 for (j
= 1; j
< c
->argc
; j
++)
9398 pubsubUnsubscribe(c
,c
->argv
[j
],1);
9402 static void publishCommand(redisClient
*c
) {
9403 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9404 addReplyLong(c
,receivers
);
9407 /* ================================= Debugging ============================== */
9409 static void debugCommand(redisClient
*c
) {
9410 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9412 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9413 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9414 addReply(c
,shared
.err
);
9418 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9419 addReply(c
,shared
.err
);
9422 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9423 addReply(c
,shared
.ok
);
9424 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9426 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9427 addReply(c
,shared
.err
);
9430 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9431 addReply(c
,shared
.ok
);
9432 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9433 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9437 addReply(c
,shared
.nokeyerr
);
9440 key
= dictGetEntryKey(de
);
9441 val
= dictGetEntryVal(de
);
9442 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9443 key
->storage
== REDIS_VM_SWAPPING
)) {
9447 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9448 strenc
= strencoding
[val
->encoding
];
9450 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9453 addReplySds(c
,sdscatprintf(sdsempty(),
9454 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9455 "encoding:%s serializedlength:%lld\r\n",
9456 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9457 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9459 addReplySds(c
,sdscatprintf(sdsempty(),
9460 "+Key at:%p refcount:%d, value swapped at: page %llu "
9461 "using %llu pages\r\n",
9462 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9463 (unsigned long long) key
->vm
.usedpages
));
9465 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9466 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9469 if (!server
.vm_enabled
) {
9470 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9474 addReply(c
,shared
.nokeyerr
);
9477 key
= dictGetEntryKey(de
);
9478 val
= dictGetEntryVal(de
);
9479 /* If the key is shared we want to create a copy */
9480 if (key
->refcount
> 1) {
9481 robj
*newkey
= dupStringObject(key
);
9483 key
= dictGetEntryKey(de
) = newkey
;
9486 if (key
->storage
!= REDIS_VM_MEMORY
) {
9487 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9488 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9489 dictGetEntryVal(de
) = NULL
;
9490 addReply(c
,shared
.ok
);
9492 addReply(c
,shared
.err
);
9495 addReplySds(c
,sdsnew(
9496 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9500 static void _redisAssert(char *estr
, char *file
, int line
) {
9501 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9502 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9503 #ifdef HAVE_BACKTRACE
9504 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9509 /* =================================== Main! ================================ */
9512 int linuxOvercommitMemoryValue(void) {
9513 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9517 if (fgets(buf
,64,fp
) == NULL
) {
9526 void linuxOvercommitMemoryWarning(void) {
9527 if (linuxOvercommitMemoryValue() == 0) {
9528 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9531 #endif /* __linux__ */
9533 static void daemonize(void) {
9537 if (fork() != 0) exit(0); /* parent exits */
9538 setsid(); /* create a new session */
9540 /* Every output goes to /dev/null. If Redis is daemonized but
9541 * the 'logfile' is set to 'stdout' in the configuration file
9542 * it will not log at all. */
9543 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9544 dup2(fd
, STDIN_FILENO
);
9545 dup2(fd
, STDOUT_FILENO
);
9546 dup2(fd
, STDERR_FILENO
);
9547 if (fd
> STDERR_FILENO
) close(fd
);
9549 /* Try to write the pid file */
9550 fp
= fopen(server
.pidfile
,"w");
9552 fprintf(fp
,"%d\n",getpid());
9557 static void version() {
9558 printf("Redis server version %s\n", REDIS_VERSION
);
9562 static void usage() {
9563 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9564 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9568 int main(int argc
, char **argv
) {
9573 if (strcmp(argv
[1], "-v") == 0 ||
9574 strcmp(argv
[1], "--version") == 0) version();
9575 if (strcmp(argv
[1], "--help") == 0) usage();
9576 resetServerSaveParams();
9577 loadServerConfig(argv
[1]);
9578 } else if ((argc
> 2)) {
9581 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9583 if (server
.daemonize
) daemonize();
9585 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9587 linuxOvercommitMemoryWarning();
9590 if (server
.appendonly
) {
9591 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9592 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9594 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9595 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9597 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9598 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9600 aeDeleteEventLoop(server
.el
);
9604 /* ============================= Backtrace support ========================= */
9606 #ifdef HAVE_BACKTRACE
9607 static char *findFuncName(void *pointer
, unsigned long *offset
);
9609 static void *getMcontextEip(ucontext_t
*uc
) {
9610 #if defined(__FreeBSD__)
9611 return (void*) uc
->uc_mcontext
.mc_eip
;
9612 #elif defined(__dietlibc__)
9613 return (void*) uc
->uc_mcontext
.eip
;
9614 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9616 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9618 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9620 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9621 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9622 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9624 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9626 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9627 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9628 #elif defined(__ia64__) /* Linux IA64 */
9629 return (void*) uc
->uc_mcontext
.sc_ip
;
9635 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9637 char **messages
= NULL
;
9638 int i
, trace_size
= 0;
9639 unsigned long offset
=0;
9640 ucontext_t
*uc
= (ucontext_t
*) secret
;
9642 REDIS_NOTUSED(info
);
9644 redisLog(REDIS_WARNING
,
9645 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9646 infostring
= genRedisInfoString();
9647 redisLog(REDIS_WARNING
, "%s",infostring
);
9648 /* It's not safe to sdsfree() the returned string under memory
9649 * corruption conditions. Let it leak as we are going to abort */
9651 trace_size
= backtrace(trace
, 100);
9652 /* overwrite sigaction with caller's address */
9653 if (getMcontextEip(uc
) != NULL
) {
9654 trace
[1] = getMcontextEip(uc
);
9656 messages
= backtrace_symbols(trace
, trace_size
);
9658 for (i
=1; i
<trace_size
; ++i
) {
9659 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9661 p
= strchr(messages
[i
],'+');
9662 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9663 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9665 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9668 /* free(messages); Don't call free() with possibly corrupted memory. */
9672 static void setupSigSegvAction(void) {
9673 struct sigaction act
;
9675 sigemptyset (&act
.sa_mask
);
9676 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9677 * is used. Otherwise, sa_handler is used */
9678 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9679 act
.sa_sigaction
= segvHandler
;
9680 sigaction (SIGSEGV
, &act
, NULL
);
9681 sigaction (SIGBUS
, &act
, NULL
);
9682 sigaction (SIGFPE
, &act
, NULL
);
9683 sigaction (SIGILL
, &act
, NULL
);
9684 sigaction (SIGBUS
, &act
, NULL
);
9688 #include "staticsymbols.h"
9689 /* This function try to convert a pointer into a function name. It's used in
9690 * oreder to provide a backtrace under segmentation fault that's able to
9691 * display functions declared as static (otherwise the backtrace is useless). */
9692 static char *findFuncName(void *pointer
, unsigned long *offset
){
9694 unsigned long off
, minoff
= 0;
9696 /* Try to match against the Symbol with the smallest offset */
9697 for (i
=0; symsTable
[i
].pointer
; i
++) {
9698 unsigned long lp
= (unsigned long) pointer
;
9700 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9701 off
=lp
-symsTable
[i
].pointer
;
9702 if (ret
< 0 || off
< minoff
) {
9708 if (ret
== -1) return NULL
;
9710 return symsTable
[ret
].name
;
9712 #else /* HAVE_BACKTRACE */
9713 static void setupSigSegvAction(void) {
9715 #endif /* HAVE_BACKTRACE */