2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_STRING 0
122 /* Objects encoding. Some kind of objects like Strings and Hashes can be
123 * internally represented in multiple ways. The 'encoding' field of the object
124 * is set to one of this fields for this object. */
125 #define REDIS_ENCODING_RAW 0 /* Raw representation */
126 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
127 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
128 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
130 static char* strencoding
[] = {
131 "raw", "int", "zipmap", "hashtable"
134 /* Object types only used for dumping to disk */
135 #define REDIS_EXPIRETIME 253
136 #define REDIS_SELECTDB 254
137 #define REDIS_EOF 255
139 /* Defines related to the dump file format. To store 32 bits lengths for short
140 * keys requires a lot of space, so we check the most significant 2 bits of
141 * the first byte to interpreter the length:
143 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
144 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
145 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
146 * 11|000000 this means: specially encoded object will follow. The six bits
147 * number specify the kind of object that follows.
148 * See the REDIS_RDB_ENC_* defines.
150 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
151 * values, will fit inside. */
152 #define REDIS_RDB_6BITLEN 0
153 #define REDIS_RDB_14BITLEN 1
154 #define REDIS_RDB_32BITLEN 2
155 #define REDIS_RDB_ENCVAL 3
156 #define REDIS_RDB_LENERR UINT_MAX
158 /* When a length of a string object stored on disk has the first two bits
159 * set, the remaining two bits specify a special encoding for the object
160 * accordingly to the following defines: */
161 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
162 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
163 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
164 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
166 /* Virtual memory object->where field. */
167 #define REDIS_VM_MEMORY 0 /* The object is on memory */
168 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
169 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
170 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
172 /* Virtual memory static configuration stuff.
173 * Check vmFindContiguousPages() to know more about this magic numbers. */
174 #define REDIS_VM_MAX_NEAR_PAGES 65536
175 #define REDIS_VM_MAX_RANDOM_JUMP 4096
176 #define REDIS_VM_MAX_THREADS 32
177 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
178 /* The following is the *percentage* of completed I/O jobs to process when the
179 * handelr is called. While Virtual Memory I/O operations are performed by
180 * threads, this operations must be processed by the main thread when completed
181 * in order to take effect. */
182 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
185 #define REDIS_SLAVE 1 /* This client is a slave server */
186 #define REDIS_MASTER 2 /* This client is a master server */
187 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
188 #define REDIS_MULTI 8 /* This client is in a MULTI context */
189 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
190 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
192 /* Slave replication state - slave side */
193 #define REDIS_REPL_NONE 0 /* No active replication */
194 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
195 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
197 /* Slave replication state - from the point of view of master
198 * Note that in SEND_BULK and ONLINE state the slave receives new updates
199 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
200 * to start the next background saving in order to send updates to it. */
201 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
202 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
203 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
204 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
206 /* List related stuff */
210 /* Sort operations */
211 #define REDIS_SORT_GET 0
212 #define REDIS_SORT_ASC 1
213 #define REDIS_SORT_DESC 2
214 #define REDIS_SORTKEY_MAX 1024
217 #define REDIS_DEBUG 0
218 #define REDIS_VERBOSE 1
219 #define REDIS_NOTICE 2
220 #define REDIS_WARNING 3
222 /* Anti-warning macro... */
223 #define REDIS_NOTUSED(V) ((void) V)
225 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
226 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
228 /* Append only defines */
229 #define APPENDFSYNC_NO 0
230 #define APPENDFSYNC_ALWAYS 1
231 #define APPENDFSYNC_EVERYSEC 2
233 /* Hashes related defaults */
234 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
235 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
237 /* We can print the stacktrace, so our assert is defined this way: */
238 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
239 static void _redisAssert(char *estr
, char *file
, int line
);
241 /*================================= Data types ============================== */
243 /* A redis object, that is a type able to hold a string / list / set */
245 /* The VM object structure */
246 struct redisObjectVM
{
247 off_t page
; /* the page at witch the object is stored on disk */
248 off_t usedpages
; /* number of pages used on disk */
249 time_t atime
; /* Last access time */
252 /* The actual Redis Object */
253 typedef struct redisObject
{
256 unsigned char encoding
;
257 unsigned char storage
; /* If this object is a key, where is the value?
258 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
259 unsigned char vtype
; /* If this object is a key, and value is swapped out,
260 * this is the type of the swapped out object. */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
266 struct redisObjectVM vm
;
269 /* Macro used to initalize a Redis object allocated on the stack.
270 * Note that this macro is taken near the structure definition to make sure
271 * we'll update it when the structure is changed, to avoid bugs like
272 * bug #85 introduced exactly in this way. */
273 #define initStaticStringObject(_var,_ptr) do { \
275 _var.type = REDIS_STRING; \
276 _var.encoding = REDIS_ENCODING_RAW; \
278 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
281 typedef struct redisDb
{
282 dict
*dict
; /* The keyspace for this DB */
283 dict
*expires
; /* Timeout of keys with a timeout set */
284 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
285 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
289 /* Client MULTI/EXEC state */
290 typedef struct multiCmd
{
293 struct redisCommand
*cmd
;
296 typedef struct multiState
{
297 multiCmd
*commands
; /* Array of MULTI commands */
298 int count
; /* Total number of MULTI commands */
301 /* With multiplexing we need to take per-clinet state.
302 * Clients are taken in a liked list. */
303 typedef struct redisClient
{
308 robj
**argv
, **mbargv
;
310 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
311 int multibulk
; /* multi bulk command format active */
314 time_t lastinteraction
; /* time of the last interaction, used for timeout */
315 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
316 int slaveseldb
; /* slave selected db, if this client is a slave */
317 int authenticated
; /* when requirepass is non-NULL */
318 int replstate
; /* replication state if this is a slave */
319 int repldbfd
; /* replication DB file descriptor */
320 long repldboff
; /* replication DB file offset */
321 off_t repldbsize
; /* replication DB file size */
322 multiState mstate
; /* MULTI/EXEC state */
323 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
324 * operation such as BLPOP. Otherwise NULL. */
325 int blockingkeysnum
; /* Number of blocking keys */
326 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
327 * is >= blockingto then the operation timed out. */
328 list
*io_keys
; /* Keys this client is waiting to be loaded from the
329 * swap file in order to continue. */
330 dict
*pubsub_classes
; /* Classes a client is interested in (SUBSCRIBE) */
338 /* Global server state structure */
343 dict
*sharingpool
; /* Poll used for object sharing */
344 unsigned int sharingpoolsize
;
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_classes
; /* Associate classes to list of subscribed clients */
445 typedef void redisCommandProc(redisClient
*c
);
446 struct redisCommand
{
448 redisCommandProc
*proc
;
451 /* Use a function to determine which keys need to be loaded
452 * in the background prior to executing this command. Takes precedence
453 * over vm_firstkey and others, ignored when NULL */
454 redisCommandProc
*vm_preload_proc
;
455 /* What keys should be loaded in background when calling this command? */
456 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
457 int vm_lastkey
; /* THe last argument that's a key */
458 int vm_keystep
; /* The step between first and last key */
461 struct redisFunctionSym
{
463 unsigned long pointer
;
466 typedef struct _redisSortObject
{
474 typedef struct _redisSortOperation
{
477 } redisSortOperation
;
479 /* ZSETs use a specialized version of Skiplists */
481 typedef struct zskiplistNode
{
482 struct zskiplistNode
**forward
;
483 struct zskiplistNode
*backward
;
489 typedef struct zskiplist
{
490 struct zskiplistNode
*header
, *tail
;
491 unsigned long length
;
495 typedef struct zset
{
500 /* Our shared "common" objects */
502 struct sharedObjectsStruct
{
503 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
504 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
505 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
506 *outofrangeerr
, *plus
,
507 *select0
, *select1
, *select2
, *select3
, *select4
,
508 *select5
, *select6
, *select7
, *select8
, *select9
,
509 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
;
512 /* Global vars that are actally used as constants. The following double
513 * values are used for double on-disk serialization, and are initialized
514 * at runtime to avoid strange compiler optimizations. */
516 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
518 /* VM threaded I/O request message */
519 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
520 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
521 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
522 typedef struct iojob
{
523 int type
; /* Request type, REDIS_IOJOB_* */
524 redisDb
*db
;/* Redis database */
525 robj
*key
; /* This I/O request is about swapping this key */
526 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
527 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
528 off_t page
; /* Swap page where to read/write the object */
529 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
530 int canceled
; /* True if this command was canceled by blocking side of VM */
531 pthread_t thread
; /* ID of the thread processing this entry */
534 /*================================ Prototypes =============================== */
536 static void freeStringObject(robj
*o
);
537 static void freeListObject(robj
*o
);
538 static void freeSetObject(robj
*o
);
539 static void decrRefCount(void *o
);
540 static robj
*createObject(int type
, void *ptr
);
541 static void freeClient(redisClient
*c
);
542 static int rdbLoad(char *filename
);
543 static void addReply(redisClient
*c
, robj
*obj
);
544 static void addReplySds(redisClient
*c
, sds s
);
545 static void incrRefCount(robj
*o
);
546 static int rdbSaveBackground(char *filename
);
547 static robj
*createStringObject(char *ptr
, size_t len
);
548 static robj
*dupStringObject(robj
*o
);
549 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
550 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
551 static int syncWithMaster(void);
552 static robj
*tryObjectSharing(robj
*o
);
553 static int tryObjectEncoding(robj
*o
);
554 static robj
*getDecodedObject(robj
*o
);
555 static int removeExpire(redisDb
*db
, robj
*key
);
556 static int expireIfNeeded(redisDb
*db
, robj
*key
);
557 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
558 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
559 static int deleteKey(redisDb
*db
, robj
*key
);
560 static time_t getExpire(redisDb
*db
, robj
*key
);
561 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
562 static void updateSlavesWaitingBgsave(int bgsaveerr
);
563 static void freeMemoryIfNeeded(void);
564 static int processCommand(redisClient
*c
);
565 static void setupSigSegvAction(void);
566 static void rdbRemoveTempFile(pid_t childpid
);
567 static void aofRemoveTempFile(pid_t childpid
);
568 static size_t stringObjectLen(robj
*o
);
569 static void processInputBuffer(redisClient
*c
);
570 static zskiplist
*zslCreate(void);
571 static void zslFree(zskiplist
*zsl
);
572 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
573 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
574 static void initClientMultiState(redisClient
*c
);
575 static void freeClientMultiState(redisClient
*c
);
576 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
577 static void unblockClientWaitingData(redisClient
*c
);
578 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
579 static void vmInit(void);
580 static void vmMarkPagesFree(off_t page
, off_t count
);
581 static robj
*vmLoadObject(robj
*key
);
582 static robj
*vmPreviewObject(robj
*key
);
583 static int vmSwapOneObjectBlocking(void);
584 static int vmSwapOneObjectThreaded(void);
585 static int vmCanSwapOut(void);
586 static int tryFreeOneObjectFromFreelist(void);
587 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
588 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
589 static void vmCancelThreadedIOJob(robj
*o
);
590 static void lockThreadedIO(void);
591 static void unlockThreadedIO(void);
592 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
593 static void freeIOJob(iojob
*j
);
594 static void queueIOJob(iojob
*j
);
595 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
596 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
597 static void waitEmptyIOJobsQueue(void);
598 static void vmReopenSwapFile(void);
599 static int vmFreePage(off_t page
);
600 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
601 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
602 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
603 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
604 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
605 static struct redisCommand
*lookupCommand(char *name
);
606 static void call(redisClient
*c
, struct redisCommand
*cmd
);
607 static void resetClient(redisClient
*c
);
608 static void convertToRealHash(robj
*o
);
609 static int pubsubUnsubscribeAll(redisClient
*c
, int notify
);
612 static void authCommand(redisClient
*c
);
613 static void pingCommand(redisClient
*c
);
614 static void echoCommand(redisClient
*c
);
615 static void setCommand(redisClient
*c
);
616 static void setnxCommand(redisClient
*c
);
617 static void getCommand(redisClient
*c
);
618 static void delCommand(redisClient
*c
);
619 static void existsCommand(redisClient
*c
);
620 static void incrCommand(redisClient
*c
);
621 static void decrCommand(redisClient
*c
);
622 static void incrbyCommand(redisClient
*c
);
623 static void decrbyCommand(redisClient
*c
);
624 static void selectCommand(redisClient
*c
);
625 static void randomkeyCommand(redisClient
*c
);
626 static void keysCommand(redisClient
*c
);
627 static void dbsizeCommand(redisClient
*c
);
628 static void lastsaveCommand(redisClient
*c
);
629 static void saveCommand(redisClient
*c
);
630 static void bgsaveCommand(redisClient
*c
);
631 static void bgrewriteaofCommand(redisClient
*c
);
632 static void shutdownCommand(redisClient
*c
);
633 static void moveCommand(redisClient
*c
);
634 static void renameCommand(redisClient
*c
);
635 static void renamenxCommand(redisClient
*c
);
636 static void lpushCommand(redisClient
*c
);
637 static void rpushCommand(redisClient
*c
);
638 static void lpopCommand(redisClient
*c
);
639 static void rpopCommand(redisClient
*c
);
640 static void llenCommand(redisClient
*c
);
641 static void lindexCommand(redisClient
*c
);
642 static void lrangeCommand(redisClient
*c
);
643 static void ltrimCommand(redisClient
*c
);
644 static void typeCommand(redisClient
*c
);
645 static void lsetCommand(redisClient
*c
);
646 static void saddCommand(redisClient
*c
);
647 static void sremCommand(redisClient
*c
);
648 static void smoveCommand(redisClient
*c
);
649 static void sismemberCommand(redisClient
*c
);
650 static void scardCommand(redisClient
*c
);
651 static void spopCommand(redisClient
*c
);
652 static void srandmemberCommand(redisClient
*c
);
653 static void sinterCommand(redisClient
*c
);
654 static void sinterstoreCommand(redisClient
*c
);
655 static void sunionCommand(redisClient
*c
);
656 static void sunionstoreCommand(redisClient
*c
);
657 static void sdiffCommand(redisClient
*c
);
658 static void sdiffstoreCommand(redisClient
*c
);
659 static void syncCommand(redisClient
*c
);
660 static void flushdbCommand(redisClient
*c
);
661 static void flushallCommand(redisClient
*c
);
662 static void sortCommand(redisClient
*c
);
663 static void lremCommand(redisClient
*c
);
664 static void rpoplpushcommand(redisClient
*c
);
665 static void infoCommand(redisClient
*c
);
666 static void mgetCommand(redisClient
*c
);
667 static void monitorCommand(redisClient
*c
);
668 static void expireCommand(redisClient
*c
);
669 static void expireatCommand(redisClient
*c
);
670 static void getsetCommand(redisClient
*c
);
671 static void ttlCommand(redisClient
*c
);
672 static void slaveofCommand(redisClient
*c
);
673 static void debugCommand(redisClient
*c
);
674 static void msetCommand(redisClient
*c
);
675 static void msetnxCommand(redisClient
*c
);
676 static void zaddCommand(redisClient
*c
);
677 static void zincrbyCommand(redisClient
*c
);
678 static void zrangeCommand(redisClient
*c
);
679 static void zrangebyscoreCommand(redisClient
*c
);
680 static void zcountCommand(redisClient
*c
);
681 static void zrevrangeCommand(redisClient
*c
);
682 static void zcardCommand(redisClient
*c
);
683 static void zremCommand(redisClient
*c
);
684 static void zscoreCommand(redisClient
*c
);
685 static void zremrangebyscoreCommand(redisClient
*c
);
686 static void multiCommand(redisClient
*c
);
687 static void execCommand(redisClient
*c
);
688 static void discardCommand(redisClient
*c
);
689 static void blpopCommand(redisClient
*c
);
690 static void brpopCommand(redisClient
*c
);
691 static void appendCommand(redisClient
*c
);
692 static void substrCommand(redisClient
*c
);
693 static void zrankCommand(redisClient
*c
);
694 static void zrevrankCommand(redisClient
*c
);
695 static void hsetCommand(redisClient
*c
);
696 static void hgetCommand(redisClient
*c
);
697 static void hdelCommand(redisClient
*c
);
698 static void hlenCommand(redisClient
*c
);
699 static void zremrangebyrankCommand(redisClient
*c
);
700 static void zunionCommand(redisClient
*c
);
701 static void zinterCommand(redisClient
*c
);
702 static void hkeysCommand(redisClient
*c
);
703 static void hvalsCommand(redisClient
*c
);
704 static void hgetallCommand(redisClient
*c
);
705 static void hexistsCommand(redisClient
*c
);
706 static void configCommand(redisClient
*c
);
707 static void hincrbyCommand(redisClient
*c
);
708 static void subscribeCommand(redisClient
*c
);
709 static void unsubscribeCommand(redisClient
*c
);
710 static void publishCommand(redisClient
*c
);
712 /*================================= Globals ================================= */
715 static struct redisServer server
; /* server global state */
716 static struct redisCommand cmdTable
[] = {
717 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
718 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
719 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
720 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
721 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
722 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
723 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
724 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
725 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
726 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
727 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
728 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
729 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
730 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
731 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
732 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
733 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
734 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
735 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
736 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
738 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
739 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
740 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
742 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
743 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
744 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
748 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
749 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
750 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
751 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
752 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
753 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
755 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
760 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
761 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
767 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
768 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
769 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
772 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
773 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
778 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
780 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
782 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
783 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
785 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
789 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
790 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
791 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
792 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
793 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
794 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
795 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
796 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
797 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
798 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
799 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
800 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"exec",execCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
803 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
804 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
808 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
814 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"publish",publishCommand
,3,REDIS_CMD_BULK
,NULL
,0,0,0},
817 {NULL
,NULL
,0,0,NULL
,0,0,0}
820 /*============================ Utility functions ============================ */
822 /* Glob-style pattern matching. */
823 static int stringmatchlen(const char *pattern
, int patternLen
,
824 const char *string
, int stringLen
, int nocase
)
829 while (pattern
[1] == '*') {
834 return 1; /* match */
836 if (stringmatchlen(pattern
+1, patternLen
-1,
837 string
, stringLen
, nocase
))
838 return 1; /* match */
842 return 0; /* no match */
846 return 0; /* no match */
856 not = pattern
[0] == '^';
863 if (pattern
[0] == '\\') {
866 if (pattern
[0] == string
[0])
868 } else if (pattern
[0] == ']') {
870 } else if (patternLen
== 0) {
874 } else if (pattern
[1] == '-' && patternLen
>= 3) {
875 int start
= pattern
[0];
876 int end
= pattern
[2];
884 start
= tolower(start
);
890 if (c
>= start
&& c
<= end
)
894 if (pattern
[0] == string
[0])
897 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
907 return 0; /* no match */
913 if (patternLen
>= 2) {
920 if (pattern
[0] != string
[0])
921 return 0; /* no match */
923 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
924 return 0; /* no match */
932 if (stringLen
== 0) {
933 while(*pattern
== '*') {
940 if (patternLen
== 0 && stringLen
== 0)
945 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
946 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
949 static void redisLog(int level
, const char *fmt
, ...) {
953 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
957 if (level
>= server
.verbosity
) {
963 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
964 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
965 vfprintf(fp
, fmt
, ap
);
971 if (server
.logfile
) fclose(fp
);
974 /*====================== Hash table type implementation ==================== */
976 /* This is an hash table type that uses the SDS dynamic strings libary as
977 * keys and radis objects as values (objects can hold SDS strings,
980 static void dictVanillaFree(void *privdata
, void *val
)
982 DICT_NOTUSED(privdata
);
986 static void dictListDestructor(void *privdata
, void *val
)
988 DICT_NOTUSED(privdata
);
989 listRelease((list
*)val
);
992 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
996 DICT_NOTUSED(privdata
);
998 l1
= sdslen((sds
)key1
);
999 l2
= sdslen((sds
)key2
);
1000 if (l1
!= l2
) return 0;
1001 return memcmp(key1
, key2
, l1
) == 0;
1004 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1006 DICT_NOTUSED(privdata
);
1008 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1012 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1015 const robj
*o1
= key1
, *o2
= key2
;
1016 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1019 static unsigned int dictObjHash(const void *key
) {
1020 const robj
*o
= key
;
1021 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1024 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1027 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1030 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1031 o2
->encoding
== REDIS_ENCODING_INT
&&
1032 o1
->ptr
== o2
->ptr
) return 1;
1034 o1
= getDecodedObject(o1
);
1035 o2
= getDecodedObject(o2
);
1036 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1042 static unsigned int dictEncObjHash(const void *key
) {
1043 robj
*o
= (robj
*) key
;
1045 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1046 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1048 if (o
->encoding
== REDIS_ENCODING_INT
) {
1052 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1053 return dictGenHashFunction((unsigned char*)buf
, len
);
1057 o
= getDecodedObject(o
);
1058 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1065 /* Sets type and expires */
1066 static dictType setDictType
= {
1067 dictEncObjHash
, /* hash function */
1070 dictEncObjKeyCompare
, /* key compare */
1071 dictRedisObjectDestructor
, /* key destructor */
1072 NULL
/* val destructor */
1075 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1076 static dictType zsetDictType
= {
1077 dictEncObjHash
, /* hash function */
1080 dictEncObjKeyCompare
, /* key compare */
1081 dictRedisObjectDestructor
, /* key destructor */
1082 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1086 static dictType dbDictType
= {
1087 dictObjHash
, /* hash function */
1090 dictObjKeyCompare
, /* key compare */
1091 dictRedisObjectDestructor
, /* key destructor */
1092 dictRedisObjectDestructor
/* val destructor */
1096 static dictType keyptrDictType
= {
1097 dictObjHash
, /* hash function */
1100 dictObjKeyCompare
, /* key compare */
1101 dictRedisObjectDestructor
, /* key destructor */
1102 NULL
/* val destructor */
1105 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1106 static dictType hashDictType
= {
1107 dictEncObjHash
, /* hash function */
1110 dictEncObjKeyCompare
, /* key compare */
1111 dictRedisObjectDestructor
, /* key destructor */
1112 dictRedisObjectDestructor
/* val destructor */
1115 /* Keylist hash table type has unencoded redis objects as keys and
1116 * lists as values. It's used for blocking operations (BLPOP) and to
1117 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1118 static dictType keylistDictType
= {
1119 dictObjHash
, /* hash function */
1122 dictObjKeyCompare
, /* key compare */
1123 dictRedisObjectDestructor
, /* key destructor */
1124 dictListDestructor
/* val destructor */
1127 static void version();
1129 /* ========================= Random utility functions ======================= */
1131 /* Redis generally does not try to recover from out of memory conditions
1132 * when allocating objects or strings, it is not clear if it will be possible
1133 * to report this condition to the client since the networking layer itself
1134 * is based on heap allocation for send buffers, so we simply abort.
1135 * At least the code will be simpler to read... */
1136 static void oom(const char *msg
) {
1137 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1142 /* ====================== Redis server networking stuff ===================== */
1143 static void closeTimedoutClients(void) {
1146 time_t now
= time(NULL
);
1149 listRewind(server
.clients
,&li
);
1150 while ((ln
= listNext(&li
)) != NULL
) {
1151 c
= listNodeValue(ln
);
1152 if (server
.maxidletime
&&
1153 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1154 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1155 (now
- c
->lastinteraction
> server
.maxidletime
))
1157 redisLog(REDIS_VERBOSE
,"Closing idle client");
1159 } else if (c
->flags
& REDIS_BLOCKED
) {
1160 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1161 addReply(c
,shared
.nullmultibulk
);
1162 unblockClientWaitingData(c
);
1168 static int htNeedsResize(dict
*dict
) {
1169 long long size
, used
;
1171 size
= dictSlots(dict
);
1172 used
= dictSize(dict
);
1173 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1174 (used
*100/size
< REDIS_HT_MINFILL
));
1177 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1178 * we resize the hash table to save memory */
1179 static void tryResizeHashTables(void) {
1182 for (j
= 0; j
< server
.dbnum
; j
++) {
1183 if (htNeedsResize(server
.db
[j
].dict
)) {
1184 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1185 dictResize(server
.db
[j
].dict
);
1186 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1188 if (htNeedsResize(server
.db
[j
].expires
))
1189 dictResize(server
.db
[j
].expires
);
1193 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1194 void backgroundSaveDoneHandler(int statloc
) {
1195 int exitcode
= WEXITSTATUS(statloc
);
1196 int bysignal
= WIFSIGNALED(statloc
);
1198 if (!bysignal
&& exitcode
== 0) {
1199 redisLog(REDIS_NOTICE
,
1200 "Background saving terminated with success");
1202 server
.lastsave
= time(NULL
);
1203 } else if (!bysignal
&& exitcode
!= 0) {
1204 redisLog(REDIS_WARNING
, "Background saving error");
1206 redisLog(REDIS_WARNING
,
1207 "Background saving terminated by signal");
1208 rdbRemoveTempFile(server
.bgsavechildpid
);
1210 server
.bgsavechildpid
= -1;
1211 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1212 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1213 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1216 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1218 void backgroundRewriteDoneHandler(int statloc
) {
1219 int exitcode
= WEXITSTATUS(statloc
);
1220 int bysignal
= WIFSIGNALED(statloc
);
1222 if (!bysignal
&& exitcode
== 0) {
1226 redisLog(REDIS_NOTICE
,
1227 "Background append only file rewriting terminated with success");
1228 /* Now it's time to flush the differences accumulated by the parent */
1229 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1230 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1232 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1235 /* Flush our data... */
1236 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1237 (signed) sdslen(server
.bgrewritebuf
)) {
1238 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1242 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1243 /* Now our work is to rename the temp file into the stable file. And
1244 * switch the file descriptor used by the server for append only. */
1245 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1246 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1250 /* Mission completed... almost */
1251 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1252 if (server
.appendfd
!= -1) {
1253 /* If append only is actually enabled... */
1254 close(server
.appendfd
);
1255 server
.appendfd
= fd
;
1257 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1258 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1260 /* If append only is disabled we just generate a dump in this
1261 * format. Why not? */
1264 } else if (!bysignal
&& exitcode
!= 0) {
1265 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1267 redisLog(REDIS_WARNING
,
1268 "Background append only file rewriting terminated by signal");
1271 sdsfree(server
.bgrewritebuf
);
1272 server
.bgrewritebuf
= sdsempty();
1273 aofRemoveTempFile(server
.bgrewritechildpid
);
1274 server
.bgrewritechildpid
= -1;
1277 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1278 int j
, loops
= server
.cronloops
++;
1279 REDIS_NOTUSED(eventLoop
);
1281 REDIS_NOTUSED(clientData
);
1283 /* We take a cached value of the unix time in the global state because
1284 * with virtual memory and aging there is to store the current time
1285 * in objects at every object access, and accuracy is not needed.
1286 * To access a global var is faster than calling time(NULL) */
1287 server
.unixtime
= time(NULL
);
1289 /* Show some info about non-empty databases */
1290 for (j
= 0; j
< server
.dbnum
; j
++) {
1291 long long size
, used
, vkeys
;
1293 size
= dictSlots(server
.db
[j
].dict
);
1294 used
= dictSize(server
.db
[j
].dict
);
1295 vkeys
= dictSize(server
.db
[j
].expires
);
1296 if (!(loops
% 50) && (used
|| vkeys
)) {
1297 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1298 /* dictPrintStats(server.dict); */
1302 /* We don't want to resize the hash tables while a bacground saving
1303 * is in progress: the saving child is created using fork() that is
1304 * implemented with a copy-on-write semantic in most modern systems, so
1305 * if we resize the HT while there is the saving child at work actually
1306 * a lot of memory movements in the parent will cause a lot of pages
1308 if (server
.bgsavechildpid
== -1 && !(loops
% 10)) tryResizeHashTables();
1310 /* Show information about connected clients */
1311 if (!(loops
% 50)) {
1312 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
1313 listLength(server
.clients
)-listLength(server
.slaves
),
1314 listLength(server
.slaves
),
1315 zmalloc_used_memory(),
1316 dictSize(server
.sharingpool
));
1319 /* Close connections of timedout clients */
1320 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1321 closeTimedoutClients();
1323 /* Check if a background saving or AOF rewrite in progress terminated */
1324 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1328 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1329 if (pid
== server
.bgsavechildpid
) {
1330 backgroundSaveDoneHandler(statloc
);
1332 backgroundRewriteDoneHandler(statloc
);
1336 /* If there is not a background saving in progress check if
1337 * we have to save now */
1338 time_t now
= time(NULL
);
1339 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1340 struct saveparam
*sp
= server
.saveparams
+j
;
1342 if (server
.dirty
>= sp
->changes
&&
1343 now
-server
.lastsave
> sp
->seconds
) {
1344 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1345 sp
->changes
, sp
->seconds
);
1346 rdbSaveBackground(server
.dbfilename
);
1352 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1353 * will use few CPU cycles if there are few expiring keys, otherwise
1354 * it will get more aggressive to avoid that too much memory is used by
1355 * keys that can be removed from the keyspace. */
1356 for (j
= 0; j
< server
.dbnum
; j
++) {
1358 redisDb
*db
= server
.db
+j
;
1360 /* Continue to expire if at the end of the cycle more than 25%
1361 * of the keys were expired. */
1363 long num
= dictSize(db
->expires
);
1364 time_t now
= time(NULL
);
1367 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1368 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1373 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1374 t
= (time_t) dictGetEntryVal(de
);
1376 deleteKey(db
,dictGetEntryKey(de
));
1378 server
.stat_expiredkeys
++;
1381 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1384 /* Swap a few keys on disk if we are over the memory limit and VM
1385 * is enbled. Try to free objects from the free list first. */
1386 if (vmCanSwapOut()) {
1387 while (server
.vm_enabled
&& zmalloc_used_memory() >
1388 server
.vm_max_memory
)
1392 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1393 retval
= (server
.vm_max_threads
== 0) ?
1394 vmSwapOneObjectBlocking() :
1395 vmSwapOneObjectThreaded();
1396 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1397 zmalloc_used_memory() >
1398 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1400 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1402 /* Note that when using threade I/O we free just one object,
1403 * because anyway when the I/O thread in charge to swap this
1404 * object out will finish, the handler of completed jobs
1405 * will try to swap more objects if we are still out of memory. */
1406 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1410 /* Check if we should connect to a MASTER */
1411 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1412 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1413 if (syncWithMaster() == REDIS_OK
) {
1414 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1420 /* This function gets called every time Redis is entering the
1421 * main loop of the event driven library, that is, before to sleep
1422 * for ready file descriptors. */
1423 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1424 REDIS_NOTUSED(eventLoop
);
1426 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1430 listRewind(server
.io_ready_clients
,&li
);
1431 while((ln
= listNext(&li
))) {
1432 redisClient
*c
= ln
->value
;
1433 struct redisCommand
*cmd
;
1435 /* Resume the client. */
1436 listDelNode(server
.io_ready_clients
,ln
);
1437 c
->flags
&= (~REDIS_IO_WAIT
);
1438 server
.vm_blocked_clients
--;
1439 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1440 readQueryFromClient
, c
);
1441 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1442 assert(cmd
!= NULL
);
1445 /* There may be more data to process in the input buffer. */
1446 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1447 processInputBuffer(c
);
1452 static void createSharedObjects(void) {
1453 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1454 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1455 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1456 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1457 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1458 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1459 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1460 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1461 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1462 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1463 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1464 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1465 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1466 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1467 "-ERR no such key\r\n"));
1468 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1469 "-ERR syntax error\r\n"));
1470 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1471 "-ERR source and destination objects are the same\r\n"));
1472 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1473 "-ERR index out of range\r\n"));
1474 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1475 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1476 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1477 shared
.select0
= createStringObject("select 0\r\n",10);
1478 shared
.select1
= createStringObject("select 1\r\n",10);
1479 shared
.select2
= createStringObject("select 2\r\n",10);
1480 shared
.select3
= createStringObject("select 3\r\n",10);
1481 shared
.select4
= createStringObject("select 4\r\n",10);
1482 shared
.select5
= createStringObject("select 5\r\n",10);
1483 shared
.select6
= createStringObject("select 6\r\n",10);
1484 shared
.select7
= createStringObject("select 7\r\n",10);
1485 shared
.select8
= createStringObject("select 8\r\n",10);
1486 shared
.select9
= createStringObject("select 9\r\n",10);
1487 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1488 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1489 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1490 shared
.mbulk3
= createStringObject("*3\r\n",4);
1493 static void appendServerSaveParams(time_t seconds
, int changes
) {
1494 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1495 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1496 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1497 server
.saveparamslen
++;
1500 static void resetServerSaveParams() {
1501 zfree(server
.saveparams
);
1502 server
.saveparams
= NULL
;
1503 server
.saveparamslen
= 0;
1506 static void initServerConfig() {
1507 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1508 server
.port
= REDIS_SERVERPORT
;
1509 server
.verbosity
= REDIS_VERBOSE
;
1510 server
.maxidletime
= REDIS_MAXIDLETIME
;
1511 server
.saveparams
= NULL
;
1512 server
.logfile
= NULL
; /* NULL = log on standard output */
1513 server
.bindaddr
= NULL
;
1514 server
.glueoutputbuf
= 1;
1515 server
.daemonize
= 0;
1516 server
.appendonly
= 0;
1517 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1518 server
.lastfsync
= time(NULL
);
1519 server
.appendfd
= -1;
1520 server
.appendseldb
= -1; /* Make sure the first time will not match */
1521 server
.pidfile
= zstrdup("/var/run/redis.pid");
1522 server
.dbfilename
= zstrdup("dump.rdb");
1523 server
.appendfilename
= zstrdup("appendonly.aof");
1524 server
.requirepass
= NULL
;
1525 server
.shareobjects
= 0;
1526 server
.rdbcompression
= 1;
1527 server
.sharingpoolsize
= 1024;
1528 server
.maxclients
= 0;
1529 server
.blpop_blocked_clients
= 0;
1530 server
.maxmemory
= 0;
1531 server
.vm_enabled
= 0;
1532 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1533 server
.vm_page_size
= 256; /* 256 bytes per page */
1534 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1535 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1536 server
.vm_max_threads
= 4;
1537 server
.vm_blocked_clients
= 0;
1538 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1539 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1541 resetServerSaveParams();
1543 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1544 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1545 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1546 /* Replication related */
1548 server
.masterauth
= NULL
;
1549 server
.masterhost
= NULL
;
1550 server
.masterport
= 6379;
1551 server
.master
= NULL
;
1552 server
.replstate
= REDIS_REPL_NONE
;
1554 /* Double constants initialization */
1556 R_PosInf
= 1.0/R_Zero
;
1557 R_NegInf
= -1.0/R_Zero
;
1558 R_Nan
= R_Zero
/R_Zero
;
1561 static void initServer() {
1564 signal(SIGHUP
, SIG_IGN
);
1565 signal(SIGPIPE
, SIG_IGN
);
1566 setupSigSegvAction();
1568 server
.devnull
= fopen("/dev/null","w");
1569 if (server
.devnull
== NULL
) {
1570 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1573 server
.clients
= listCreate();
1574 server
.slaves
= listCreate();
1575 server
.monitors
= listCreate();
1576 server
.objfreelist
= listCreate();
1577 createSharedObjects();
1578 server
.el
= aeCreateEventLoop();
1579 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1580 server
.sharingpool
= dictCreate(&setDictType
,NULL
);
1581 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1582 if (server
.fd
== -1) {
1583 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1586 for (j
= 0; j
< server
.dbnum
; j
++) {
1587 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1588 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1589 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1590 if (server
.vm_enabled
)
1591 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1592 server
.db
[j
].id
= j
;
1594 server
.pubsub_classes
= dictCreate(&keylistDictType
,NULL
);
1595 server
.cronloops
= 0;
1596 server
.bgsavechildpid
= -1;
1597 server
.bgrewritechildpid
= -1;
1598 server
.bgrewritebuf
= sdsempty();
1599 server
.lastsave
= time(NULL
);
1601 server
.stat_numcommands
= 0;
1602 server
.stat_numconnections
= 0;
1603 server
.stat_expiredkeys
= 0;
1604 server
.stat_starttime
= time(NULL
);
1605 server
.unixtime
= time(NULL
);
1606 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1607 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1608 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1610 if (server
.appendonly
) {
1611 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1612 if (server
.appendfd
== -1) {
1613 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1619 if (server
.vm_enabled
) vmInit();
1622 /* Empty the whole database */
1623 static long long emptyDb() {
1625 long long removed
= 0;
1627 for (j
= 0; j
< server
.dbnum
; j
++) {
1628 removed
+= dictSize(server
.db
[j
].dict
);
1629 dictEmpty(server
.db
[j
].dict
);
1630 dictEmpty(server
.db
[j
].expires
);
1635 static int yesnotoi(char *s
) {
1636 if (!strcasecmp(s
,"yes")) return 1;
1637 else if (!strcasecmp(s
,"no")) return 0;
1641 /* I agree, this is a very rudimental way to load a configuration...
1642 will improve later if the config gets more complex */
1643 static void loadServerConfig(char *filename
) {
1645 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1648 char *errormsg
= "Fatal error, can't open config file '%s'";
1649 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1650 sprintf(errorbuf
, errormsg
, filename
);
1652 if (filename
[0] == '-' && filename
[1] == '\0')
1655 if ((fp
= fopen(filename
,"r")) == NULL
) {
1656 redisLog(REDIS_WARNING
, errorbuf
);
1661 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1667 line
= sdstrim(line
," \t\r\n");
1669 /* Skip comments and blank lines*/
1670 if (line
[0] == '#' || line
[0] == '\0') {
1675 /* Split into arguments */
1676 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1677 sdstolower(argv
[0]);
1679 /* Execute config directives */
1680 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1681 server
.maxidletime
= atoi(argv
[1]);
1682 if (server
.maxidletime
< 0) {
1683 err
= "Invalid timeout value"; goto loaderr
;
1685 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1686 server
.port
= atoi(argv
[1]);
1687 if (server
.port
< 1 || server
.port
> 65535) {
1688 err
= "Invalid port"; goto loaderr
;
1690 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1691 server
.bindaddr
= zstrdup(argv
[1]);
1692 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1693 int seconds
= atoi(argv
[1]);
1694 int changes
= atoi(argv
[2]);
1695 if (seconds
< 1 || changes
< 0) {
1696 err
= "Invalid save parameters"; goto loaderr
;
1698 appendServerSaveParams(seconds
,changes
);
1699 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1700 if (chdir(argv
[1]) == -1) {
1701 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1702 argv
[1], strerror(errno
));
1705 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1706 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1707 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1708 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1709 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1711 err
= "Invalid log level. Must be one of debug, notice, warning";
1714 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1717 server
.logfile
= zstrdup(argv
[1]);
1718 if (!strcasecmp(server
.logfile
,"stdout")) {
1719 zfree(server
.logfile
);
1720 server
.logfile
= NULL
;
1722 if (server
.logfile
) {
1723 /* Test if we are able to open the file. The server will not
1724 * be able to abort just for this problem later... */
1725 logfp
= fopen(server
.logfile
,"a");
1726 if (logfp
== NULL
) {
1727 err
= sdscatprintf(sdsempty(),
1728 "Can't open the log file: %s", strerror(errno
));
1733 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1734 server
.dbnum
= atoi(argv
[1]);
1735 if (server
.dbnum
< 1) {
1736 err
= "Invalid number of databases"; goto loaderr
;
1738 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1739 loadServerConfig(argv
[1]);
1740 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1741 server
.maxclients
= atoi(argv
[1]);
1742 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1743 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1744 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1745 server
.masterhost
= sdsnew(argv
[1]);
1746 server
.masterport
= atoi(argv
[2]);
1747 server
.replstate
= REDIS_REPL_CONNECT
;
1748 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1749 server
.masterauth
= zstrdup(argv
[1]);
1750 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1751 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1752 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1754 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1755 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1756 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1758 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1759 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1760 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1762 } else if (!strcasecmp(argv
[0],"shareobjectspoolsize") && argc
== 2) {
1763 server
.sharingpoolsize
= atoi(argv
[1]);
1764 if (server
.sharingpoolsize
< 1) {
1765 err
= "invalid object sharing pool size"; goto loaderr
;
1767 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1768 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1769 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1771 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1772 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1773 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1775 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1776 if (!strcasecmp(argv
[1],"no")) {
1777 server
.appendfsync
= APPENDFSYNC_NO
;
1778 } else if (!strcasecmp(argv
[1],"always")) {
1779 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1780 } else if (!strcasecmp(argv
[1],"everysec")) {
1781 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1783 err
= "argument must be 'no', 'always' or 'everysec'";
1786 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1787 server
.requirepass
= zstrdup(argv
[1]);
1788 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1789 zfree(server
.pidfile
);
1790 server
.pidfile
= zstrdup(argv
[1]);
1791 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1792 zfree(server
.dbfilename
);
1793 server
.dbfilename
= zstrdup(argv
[1]);
1794 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1795 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1796 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1798 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1799 zfree(server
.vm_swap_file
);
1800 server
.vm_swap_file
= zstrdup(argv
[1]);
1801 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1802 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1803 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1804 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1805 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1806 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1807 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1808 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1809 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1810 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1811 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1812 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1813 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1814 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1816 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1818 for (j
= 0; j
< argc
; j
++)
1823 if (fp
!= stdin
) fclose(fp
);
1827 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1828 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1829 fprintf(stderr
, ">>> '%s'\n", line
);
1830 fprintf(stderr
, "%s\n", err
);
1834 static void freeClientArgv(redisClient
*c
) {
1837 for (j
= 0; j
< c
->argc
; j
++)
1838 decrRefCount(c
->argv
[j
]);
1839 for (j
= 0; j
< c
->mbargc
; j
++)
1840 decrRefCount(c
->mbargv
[j
]);
1845 static void freeClient(redisClient
*c
) {
1848 /* Note that if the client we are freeing is blocked into a blocking
1849 * call, we have to set querybuf to NULL *before* to call
1850 * unblockClientWaitingData() to avoid processInputBuffer() will get
1851 * called. Also it is important to remove the file events after
1852 * this, because this call adds the READABLE event. */
1853 sdsfree(c
->querybuf
);
1855 if (c
->flags
& REDIS_BLOCKED
)
1856 unblockClientWaitingData(c
);
1858 /* Unsubscribe from all the pubsub classes */
1859 pubsubUnsubscribeAll(c
,0);
1860 dictRelease(c
->pubsub_classes
);
1861 /* Obvious cleanup */
1862 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1863 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1864 listRelease(c
->reply
);
1867 /* Remove from the list of clients */
1868 ln
= listSearchKey(server
.clients
,c
);
1869 redisAssert(ln
!= NULL
);
1870 listDelNode(server
.clients
,ln
);
1871 /* Remove from the list of clients waiting for swapped keys */
1872 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1873 ln
= listSearchKey(server
.io_ready_clients
,c
);
1875 listDelNode(server
.io_ready_clients
,ln
);
1876 server
.vm_blocked_clients
--;
1879 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1880 ln
= listFirst(c
->io_keys
);
1881 dontWaitForSwappedKey(c
,ln
->value
);
1883 listRelease(c
->io_keys
);
1884 /* Master/slave cleanup */
1885 if (c
->flags
& REDIS_SLAVE
) {
1886 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1888 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1889 ln
= listSearchKey(l
,c
);
1890 redisAssert(ln
!= NULL
);
1893 if (c
->flags
& REDIS_MASTER
) {
1894 server
.master
= NULL
;
1895 server
.replstate
= REDIS_REPL_CONNECT
;
1897 /* Release memory */
1900 freeClientMultiState(c
);
1904 #define GLUEREPLY_UP_TO (1024)
1905 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1907 char buf
[GLUEREPLY_UP_TO
];
1912 listRewind(c
->reply
,&li
);
1913 while((ln
= listNext(&li
))) {
1917 objlen
= sdslen(o
->ptr
);
1918 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1919 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1921 listDelNode(c
->reply
,ln
);
1923 if (copylen
== 0) return;
1927 /* Now the output buffer is empty, add the new single element */
1928 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1929 listAddNodeHead(c
->reply
,o
);
1932 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1933 redisClient
*c
= privdata
;
1934 int nwritten
= 0, totwritten
= 0, objlen
;
1937 REDIS_NOTUSED(mask
);
1939 /* Use writev() if we have enough buffers to send */
1940 if (!server
.glueoutputbuf
&&
1941 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1942 !(c
->flags
& REDIS_MASTER
))
1944 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1948 while(listLength(c
->reply
)) {
1949 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1950 glueReplyBuffersIfNeeded(c
);
1952 o
= listNodeValue(listFirst(c
->reply
));
1953 objlen
= sdslen(o
->ptr
);
1956 listDelNode(c
->reply
,listFirst(c
->reply
));
1960 if (c
->flags
& REDIS_MASTER
) {
1961 /* Don't reply to a master */
1962 nwritten
= objlen
- c
->sentlen
;
1964 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
1965 if (nwritten
<= 0) break;
1967 c
->sentlen
+= nwritten
;
1968 totwritten
+= nwritten
;
1969 /* If we fully sent the object on head go to the next one */
1970 if (c
->sentlen
== objlen
) {
1971 listDelNode(c
->reply
,listFirst(c
->reply
));
1974 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1975 * bytes, in a single threaded server it's a good idea to serve
1976 * other clients as well, even if a very large request comes from
1977 * super fast link that is always able to accept data (in real world
1978 * scenario think about 'KEYS *' against the loopback interfae) */
1979 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
1981 if (nwritten
== -1) {
1982 if (errno
== EAGAIN
) {
1985 redisLog(REDIS_VERBOSE
,
1986 "Error writing to client: %s", strerror(errno
));
1991 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
1992 if (listLength(c
->reply
) == 0) {
1994 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1998 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2000 redisClient
*c
= privdata
;
2001 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2003 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2004 int offset
, ion
= 0;
2006 REDIS_NOTUSED(mask
);
2009 while (listLength(c
->reply
)) {
2010 offset
= c
->sentlen
;
2014 /* fill-in the iov[] array */
2015 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2016 o
= listNodeValue(node
);
2017 objlen
= sdslen(o
->ptr
);
2019 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2022 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2023 break; /* no more iovecs */
2025 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2026 iov
[ion
].iov_len
= objlen
- offset
;
2027 willwrite
+= objlen
- offset
;
2028 offset
= 0; /* just for the first item */
2035 /* write all collected blocks at once */
2036 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2037 if (errno
!= EAGAIN
) {
2038 redisLog(REDIS_VERBOSE
,
2039 "Error writing to client: %s", strerror(errno
));
2046 totwritten
+= nwritten
;
2047 offset
= c
->sentlen
;
2049 /* remove written robjs from c->reply */
2050 while (nwritten
&& listLength(c
->reply
)) {
2051 o
= listNodeValue(listFirst(c
->reply
));
2052 objlen
= sdslen(o
->ptr
);
2054 if(nwritten
>= objlen
- offset
) {
2055 listDelNode(c
->reply
, listFirst(c
->reply
));
2056 nwritten
-= objlen
- offset
;
2060 c
->sentlen
+= nwritten
;
2068 c
->lastinteraction
= time(NULL
);
2070 if (listLength(c
->reply
) == 0) {
2072 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2076 static struct redisCommand
*lookupCommand(char *name
) {
2078 while(cmdTable
[j
].name
!= NULL
) {
2079 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2085 /* resetClient prepare the client to process the next command */
2086 static void resetClient(redisClient
*c
) {
2092 /* Call() is the core of Redis execution of a command */
2093 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2096 dirty
= server
.dirty
;
2098 if (server
.appendonly
&& server
.dirty
-dirty
)
2099 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2100 if (server
.dirty
-dirty
&& listLength(server
.slaves
))
2101 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2102 if (listLength(server
.monitors
))
2103 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2104 server
.stat_numcommands
++;
2107 /* If this function gets called we already read a whole
2108 * command, argments are in the client argv/argc fields.
2109 * processCommand() execute the command or prepare the
2110 * server for a bulk read from the client.
2112 * If 1 is returned the client is still alive and valid and
2113 * and other operations can be performed by the caller. Otherwise
2114 * if 0 is returned the client was destroied (i.e. after QUIT). */
2115 static int processCommand(redisClient
*c
) {
2116 struct redisCommand
*cmd
;
2118 /* Free some memory if needed (maxmemory setting) */
2119 if (server
.maxmemory
) freeMemoryIfNeeded();
2121 /* Handle the multi bulk command type. This is an alternative protocol
2122 * supported by Redis in order to receive commands that are composed of
2123 * multiple binary-safe "bulk" arguments. The latency of processing is
2124 * a bit higher but this allows things like multi-sets, so if this
2125 * protocol is used only for MSET and similar commands this is a big win. */
2126 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2127 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2128 if (c
->multibulk
<= 0) {
2132 decrRefCount(c
->argv
[c
->argc
-1]);
2136 } else if (c
->multibulk
) {
2137 if (c
->bulklen
== -1) {
2138 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2139 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2143 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2144 decrRefCount(c
->argv
[0]);
2145 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2147 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2152 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2156 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2157 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2161 if (c
->multibulk
== 0) {
2165 /* Here we need to swap the multi-bulk argc/argv with the
2166 * normal argc/argv of the client structure. */
2168 c
->argv
= c
->mbargv
;
2169 c
->mbargv
= auxargv
;
2172 c
->argc
= c
->mbargc
;
2173 c
->mbargc
= auxargc
;
2175 /* We need to set bulklen to something different than -1
2176 * in order for the code below to process the command without
2177 * to try to read the last argument of a bulk command as
2178 * a special argument. */
2180 /* continue below and process the command */
2187 /* -- end of multi bulk commands processing -- */
2189 /* The QUIT command is handled as a special case. Normal command
2190 * procs are unable to close the client connection safely */
2191 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2196 /* Now lookup the command and check ASAP about trivial error conditions
2197 * such wrong arity, bad command name and so forth. */
2198 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2201 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2202 (char*)c
->argv
[0]->ptr
));
2205 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2206 (c
->argc
< -cmd
->arity
)) {
2208 sdscatprintf(sdsempty(),
2209 "-ERR wrong number of arguments for '%s' command\r\n",
2213 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2214 /* This is a bulk command, we have to read the last argument yet. */
2215 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2217 decrRefCount(c
->argv
[c
->argc
-1]);
2218 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2220 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2225 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2226 /* It is possible that the bulk read is already in the
2227 * buffer. Check this condition and handle it accordingly.
2228 * This is just a fast path, alternative to call processInputBuffer().
2229 * It's a good idea since the code is small and this condition
2230 * happens most of the times. */
2231 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2232 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2234 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2236 /* Otherwise return... there is to read the last argument
2237 * from the socket. */
2241 /* Let's try to share objects on the command arguments vector */
2242 if (server
.shareobjects
) {
2244 for(j
= 1; j
< c
->argc
; j
++)
2245 c
->argv
[j
] = tryObjectSharing(c
->argv
[j
]);
2247 /* Let's try to encode the bulk object to save space. */
2248 if (cmd
->flags
& REDIS_CMD_BULK
)
2249 tryObjectEncoding(c
->argv
[c
->argc
-1]);
2251 /* Check if the user is authenticated */
2252 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2253 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2258 /* Handle the maxmemory directive */
2259 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2260 zmalloc_used_memory() > server
.maxmemory
)
2262 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2267 /* Exec the command */
2268 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2269 queueMultiCommand(c
,cmd
);
2270 addReply(c
,shared
.queued
);
2272 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2273 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2277 /* Prepare the client for the next command */
2282 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2287 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2288 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2289 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2290 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2293 if (argc
<= REDIS_STATIC_ARGS
) {
2296 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2299 lenobj
= createObject(REDIS_STRING
,
2300 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2301 lenobj
->refcount
= 0;
2302 outv
[outc
++] = lenobj
;
2303 for (j
= 0; j
< argc
; j
++) {
2304 lenobj
= createObject(REDIS_STRING
,
2305 sdscatprintf(sdsempty(),"$%lu\r\n",
2306 (unsigned long) stringObjectLen(argv
[j
])));
2307 lenobj
->refcount
= 0;
2308 outv
[outc
++] = lenobj
;
2309 outv
[outc
++] = argv
[j
];
2310 outv
[outc
++] = shared
.crlf
;
2313 /* Increment all the refcounts at start and decrement at end in order to
2314 * be sure to free objects if there is no slave in a replication state
2315 * able to be feed with commands */
2316 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2317 listRewind(slaves
,&li
);
2318 while((ln
= listNext(&li
))) {
2319 redisClient
*slave
= ln
->value
;
2321 /* Don't feed slaves that are still waiting for BGSAVE to start */
2322 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2324 /* Feed all the other slaves, MONITORs and so on */
2325 if (slave
->slaveseldb
!= dictid
) {
2329 case 0: selectcmd
= shared
.select0
; break;
2330 case 1: selectcmd
= shared
.select1
; break;
2331 case 2: selectcmd
= shared
.select2
; break;
2332 case 3: selectcmd
= shared
.select3
; break;
2333 case 4: selectcmd
= shared
.select4
; break;
2334 case 5: selectcmd
= shared
.select5
; break;
2335 case 6: selectcmd
= shared
.select6
; break;
2336 case 7: selectcmd
= shared
.select7
; break;
2337 case 8: selectcmd
= shared
.select8
; break;
2338 case 9: selectcmd
= shared
.select9
; break;
2340 selectcmd
= createObject(REDIS_STRING
,
2341 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2342 selectcmd
->refcount
= 0;
2345 addReply(slave
,selectcmd
);
2346 slave
->slaveseldb
= dictid
;
2348 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2350 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2351 if (outv
!= static_outv
) zfree(outv
);
2354 static void processInputBuffer(redisClient
*c
) {
2356 /* Before to process the input buffer, make sure the client is not
2357 * waitig for a blocking operation such as BLPOP. Note that the first
2358 * iteration the client is never blocked, otherwise the processInputBuffer
2359 * would not be called at all, but after the execution of the first commands
2360 * in the input buffer the client may be blocked, and the "goto again"
2361 * will try to reiterate. The following line will make it return asap. */
2362 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2363 if (c
->bulklen
== -1) {
2364 /* Read the first line of the query */
2365 char *p
= strchr(c
->querybuf
,'\n');
2372 query
= c
->querybuf
;
2373 c
->querybuf
= sdsempty();
2374 querylen
= 1+(p
-(query
));
2375 if (sdslen(query
) > querylen
) {
2376 /* leave data after the first line of the query in the buffer */
2377 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2379 *p
= '\0'; /* remove "\n" */
2380 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2381 sdsupdatelen(query
);
2383 /* Now we can split the query in arguments */
2384 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2387 if (c
->argv
) zfree(c
->argv
);
2388 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2390 for (j
= 0; j
< argc
; j
++) {
2391 if (sdslen(argv
[j
])) {
2392 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2400 /* Execute the command. If the client is still valid
2401 * after processCommand() return and there is something
2402 * on the query buffer try to process the next command. */
2403 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2405 /* Nothing to process, argc == 0. Just process the query
2406 * buffer if it's not empty or return to the caller */
2407 if (sdslen(c
->querybuf
)) goto again
;
2410 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2411 redisLog(REDIS_VERBOSE
, "Client protocol error");
2416 /* Bulk read handling. Note that if we are at this point
2417 the client already sent a command terminated with a newline,
2418 we are reading the bulk data that is actually the last
2419 argument of the command. */
2420 int qbl
= sdslen(c
->querybuf
);
2422 if (c
->bulklen
<= qbl
) {
2423 /* Copy everything but the final CRLF as final argument */
2424 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2426 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2427 /* Process the command. If the client is still valid after
2428 * the processing and there is more data in the buffer
2429 * try to parse it. */
2430 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2436 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2437 redisClient
*c
= (redisClient
*) privdata
;
2438 char buf
[REDIS_IOBUF_LEN
];
2441 REDIS_NOTUSED(mask
);
2443 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2445 if (errno
== EAGAIN
) {
2448 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2452 } else if (nread
== 0) {
2453 redisLog(REDIS_VERBOSE
, "Client closed connection");
2458 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2459 c
->lastinteraction
= time(NULL
);
2463 processInputBuffer(c
);
2466 static int selectDb(redisClient
*c
, int id
) {
2467 if (id
< 0 || id
>= server
.dbnum
)
2469 c
->db
= &server
.db
[id
];
2473 static void *dupClientReplyValue(void *o
) {
2474 incrRefCount((robj
*)o
);
2478 static redisClient
*createClient(int fd
) {
2479 redisClient
*c
= zmalloc(sizeof(*c
));
2481 anetNonBlock(NULL
,fd
);
2482 anetTcpNoDelay(NULL
,fd
);
2483 if (!c
) return NULL
;
2486 c
->querybuf
= sdsempty();
2495 c
->lastinteraction
= time(NULL
);
2496 c
->authenticated
= 0;
2497 c
->replstate
= REDIS_REPL_NONE
;
2498 c
->reply
= listCreate();
2499 listSetFreeMethod(c
->reply
,decrRefCount
);
2500 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2501 c
->blockingkeys
= NULL
;
2502 c
->blockingkeysnum
= 0;
2503 c
->io_keys
= listCreate();
2504 c
->pubsub_classes
= dictCreate(&setDictType
,NULL
);
2505 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2506 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2507 readQueryFromClient
, c
) == AE_ERR
) {
2511 listAddNodeTail(server
.clients
,c
);
2512 initClientMultiState(c
);
2516 static void addReply(redisClient
*c
, robj
*obj
) {
2517 if (listLength(c
->reply
) == 0 &&
2518 (c
->replstate
== REDIS_REPL_NONE
||
2519 c
->replstate
== REDIS_REPL_ONLINE
) &&
2520 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2521 sendReplyToClient
, c
) == AE_ERR
) return;
2523 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2524 obj
= dupStringObject(obj
);
2525 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2527 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2530 static void addReplySds(redisClient
*c
, sds s
) {
2531 robj
*o
= createObject(REDIS_STRING
,s
);
2536 static void addReplyDouble(redisClient
*c
, double d
) {
2539 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2540 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2541 (unsigned long) strlen(buf
),buf
));
2544 static void addReplyLong(redisClient
*c
, long l
) {
2549 addReply(c
,shared
.czero
);
2551 } else if (l
== 1) {
2552 addReply(c
,shared
.cone
);
2555 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2556 addReplySds(c
,sdsnewlen(buf
,len
));
2559 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2564 addReply(c
,shared
.czero
);
2566 } else if (ul
== 1) {
2567 addReply(c
,shared
.cone
);
2570 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2571 addReplySds(c
,sdsnewlen(buf
,len
));
2574 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2577 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2578 len
= sdslen(obj
->ptr
);
2580 long n
= (long)obj
->ptr
;
2582 /* Compute how many bytes will take this integer as a radix 10 string */
2588 while((n
= n
/10) != 0) {
2592 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2595 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2596 addReplyBulkLen(c
,obj
);
2598 addReply(c
,shared
.crlf
);
2601 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2602 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2604 addReply(c
,shared
.nullbulk
);
2606 robj
*o
= createStringObject(s
,strlen(s
));
2612 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2617 REDIS_NOTUSED(mask
);
2618 REDIS_NOTUSED(privdata
);
2620 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2621 if (cfd
== AE_ERR
) {
2622 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2625 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2626 if ((c
= createClient(cfd
)) == NULL
) {
2627 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2628 close(cfd
); /* May be already closed, just ingore errors */
2631 /* If maxclient directive is set and this is one client more... close the
2632 * connection. Note that we create the client instead to check before
2633 * for this condition, since now the socket is already set in nonblocking
2634 * mode and we can send an error for free using the Kernel I/O */
2635 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2636 char *err
= "-ERR max number of clients reached\r\n";
2638 /* That's a best effort error message, don't check write errors */
2639 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2640 /* Nothing to do, Just to avoid the warning... */
2645 server
.stat_numconnections
++;
2648 /* ======================= Redis objects implementation ===================== */
2650 static robj
*createObject(int type
, void *ptr
) {
2653 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2654 if (listLength(server
.objfreelist
)) {
2655 listNode
*head
= listFirst(server
.objfreelist
);
2656 o
= listNodeValue(head
);
2657 listDelNode(server
.objfreelist
,head
);
2658 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2660 if (server
.vm_enabled
) {
2661 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2662 o
= zmalloc(sizeof(*o
));
2664 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2668 o
->encoding
= REDIS_ENCODING_RAW
;
2671 if (server
.vm_enabled
) {
2672 /* Note that this code may run in the context of an I/O thread
2673 * and accessing to server.unixtime in theory is an error
2674 * (no locks). But in practice this is safe, and even if we read
2675 * garbage Redis will not fail, as it's just a statistical info */
2676 o
->vm
.atime
= server
.unixtime
;
2677 o
->storage
= REDIS_VM_MEMORY
;
2682 static robj
*createStringObject(char *ptr
, size_t len
) {
2683 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2686 static robj
*dupStringObject(robj
*o
) {
2687 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2688 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2691 static robj
*createListObject(void) {
2692 list
*l
= listCreate();
2694 listSetFreeMethod(l
,decrRefCount
);
2695 return createObject(REDIS_LIST
,l
);
2698 static robj
*createSetObject(void) {
2699 dict
*d
= dictCreate(&setDictType
,NULL
);
2700 return createObject(REDIS_SET
,d
);
2703 static robj
*createHashObject(void) {
2704 /* All the Hashes start as zipmaps. Will be automatically converted
2705 * into hash tables if there are enough elements or big elements
2707 unsigned char *zm
= zipmapNew();
2708 robj
*o
= createObject(REDIS_HASH
,zm
);
2709 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2713 static robj
*createZsetObject(void) {
2714 zset
*zs
= zmalloc(sizeof(*zs
));
2716 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2717 zs
->zsl
= zslCreate();
2718 return createObject(REDIS_ZSET
,zs
);
2721 static void freeStringObject(robj
*o
) {
2722 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2727 static void freeListObject(robj
*o
) {
2728 listRelease((list
*) o
->ptr
);
2731 static void freeSetObject(robj
*o
) {
2732 dictRelease((dict
*) o
->ptr
);
2735 static void freeZsetObject(robj
*o
) {
2738 dictRelease(zs
->dict
);
2743 static void freeHashObject(robj
*o
) {
2744 switch (o
->encoding
) {
2745 case REDIS_ENCODING_HT
:
2746 dictRelease((dict
*) o
->ptr
);
2748 case REDIS_ENCODING_ZIPMAP
:
2757 static void incrRefCount(robj
*o
) {
2758 redisAssert(!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
);
2762 static void decrRefCount(void *obj
) {
2765 /* Object is a key of a swapped out value, or in the process of being
2767 if (server
.vm_enabled
&&
2768 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2770 if (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
) {
2771 redisAssert(o
->refcount
== 1);
2773 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2774 redisAssert(o
->type
== REDIS_STRING
);
2775 freeStringObject(o
);
2776 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2777 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2778 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2779 !listAddNodeHead(server
.objfreelist
,o
))
2781 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2782 server
.vm_stats_swapped_objects
--;
2785 /* Object is in memory, or in the process of being swapped out. */
2786 if (--(o
->refcount
) == 0) {
2787 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2788 vmCancelThreadedIOJob(obj
);
2790 case REDIS_STRING
: freeStringObject(o
); break;
2791 case REDIS_LIST
: freeListObject(o
); break;
2792 case REDIS_SET
: freeSetObject(o
); break;
2793 case REDIS_ZSET
: freeZsetObject(o
); break;
2794 case REDIS_HASH
: freeHashObject(o
); break;
2795 default: redisAssert(0); break;
2797 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2798 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2799 !listAddNodeHead(server
.objfreelist
,o
))
2801 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2805 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2806 dictEntry
*de
= dictFind(db
->dict
,key
);
2808 robj
*key
= dictGetEntryKey(de
);
2809 robj
*val
= dictGetEntryVal(de
);
2811 if (server
.vm_enabled
) {
2812 if (key
->storage
== REDIS_VM_MEMORY
||
2813 key
->storage
== REDIS_VM_SWAPPING
)
2815 /* If we were swapping the object out, stop it, this key
2817 if (key
->storage
== REDIS_VM_SWAPPING
)
2818 vmCancelThreadedIOJob(key
);
2819 /* Update the access time of the key for the aging algorithm. */
2820 key
->vm
.atime
= server
.unixtime
;
2822 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2824 /* Our value was swapped on disk. Bring it at home. */
2825 redisAssert(val
== NULL
);
2826 val
= vmLoadObject(key
);
2827 dictGetEntryVal(de
) = val
;
2829 /* Clients blocked by the VM subsystem may be waiting for
2831 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2840 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2841 expireIfNeeded(db
,key
);
2842 return lookupKey(db
,key
);
2845 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2846 deleteIfVolatile(db
,key
);
2847 return lookupKey(db
,key
);
2850 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2851 robj
*o
= lookupKeyRead(c
->db
, key
);
2852 if (!o
) addReply(c
,reply
);
2856 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2857 robj
*o
= lookupKeyWrite(c
->db
, key
);
2858 if (!o
) addReply(c
,reply
);
2862 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2863 if (o
->type
!= type
) {
2864 addReply(c
,shared
.wrongtypeerr
);
2870 static int deleteKey(redisDb
*db
, robj
*key
) {
2873 /* We need to protect key from destruction: after the first dictDelete()
2874 * it may happen that 'key' is no longer valid if we don't increment
2875 * it's count. This may happen when we get the object reference directly
2876 * from the hash table with dictRandomKey() or dict iterators */
2878 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2879 retval
= dictDelete(db
->dict
,key
);
2882 return retval
== DICT_OK
;
2885 /* Try to share an object against the shared objects pool */
2886 static robj
*tryObjectSharing(robj
*o
) {
2887 struct dictEntry
*de
;
2890 if (o
== NULL
|| server
.shareobjects
== 0) return o
;
2892 redisAssert(o
->type
== REDIS_STRING
);
2893 de
= dictFind(server
.sharingpool
,o
);
2895 robj
*shared
= dictGetEntryKey(de
);
2897 c
= ((unsigned long) dictGetEntryVal(de
))+1;
2898 dictGetEntryVal(de
) = (void*) c
;
2899 incrRefCount(shared
);
2903 /* Here we are using a stream algorihtm: Every time an object is
2904 * shared we increment its count, everytime there is a miss we
2905 * recrement the counter of a random object. If this object reaches
2906 * zero we remove the object and put the current object instead. */
2907 if (dictSize(server
.sharingpool
) >=
2908 server
.sharingpoolsize
) {
2909 de
= dictGetRandomKey(server
.sharingpool
);
2910 redisAssert(de
!= NULL
);
2911 c
= ((unsigned long) dictGetEntryVal(de
))-1;
2912 dictGetEntryVal(de
) = (void*) c
;
2914 dictDelete(server
.sharingpool
,de
->key
);
2917 c
= 0; /* If the pool is empty we want to add this object */
2922 retval
= dictAdd(server
.sharingpool
,o
,(void*)1);
2923 redisAssert(retval
== DICT_OK
);
2930 /* Check if the nul-terminated string 's' can be represented by a long
2931 * (that is, is a number that fits into long without any other space or
2932 * character before or after the digits).
2934 * If so, the function returns REDIS_OK and *longval is set to the value
2935 * of the number. Otherwise REDIS_ERR is returned */
2936 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2937 char buf
[32], *endptr
;
2941 value
= strtol(s
, &endptr
, 10);
2942 if (endptr
[0] != '\0') return REDIS_ERR
;
2943 slen
= snprintf(buf
,32,"%ld",value
);
2945 /* If the number converted back into a string is not identical
2946 * then it's not possible to encode the string as integer */
2947 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2948 if (longval
) *longval
= value
;
2952 /* Try to encode a string object in order to save space */
2953 static int tryObjectEncoding(robj
*o
) {
2957 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2958 return REDIS_ERR
; /* Already encoded */
2960 /* It's not save to encode shared objects: shared objects can be shared
2961 * everywhere in the "object space" of Redis. Encoded objects can only
2962 * appear as "values" (and not, for instance, as keys) */
2963 if (o
->refcount
> 1) return REDIS_ERR
;
2965 /* Currently we try to encode only strings */
2966 redisAssert(o
->type
== REDIS_STRING
);
2968 /* Check if we can represent this string as a long integer */
2969 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return REDIS_ERR
;
2971 /* Ok, this object can be encoded */
2972 o
->encoding
= REDIS_ENCODING_INT
;
2974 o
->ptr
= (void*) value
;
2978 /* Get a decoded version of an encoded object (returned as a new object).
2979 * If the object is already raw-encoded just increment the ref count. */
2980 static robj
*getDecodedObject(robj
*o
) {
2983 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2987 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
2990 snprintf(buf
,32,"%ld",(long)o
->ptr
);
2991 dec
= createStringObject(buf
,strlen(buf
));
2994 redisAssert(1 != 1);
2998 /* Compare two string objects via strcmp() or alike.
2999 * Note that the objects may be integer-encoded. In such a case we
3000 * use snprintf() to get a string representation of the numbers on the stack
3001 * and compare the strings, it's much faster than calling getDecodedObject().
3003 * Important note: if objects are not integer encoded, but binary-safe strings,
3004 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3006 static int compareStringObjects(robj
*a
, robj
*b
) {
3007 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3008 char bufa
[128], bufb
[128], *astr
, *bstr
;
3011 if (a
== b
) return 0;
3012 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3013 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3019 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3020 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3026 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3029 static size_t stringObjectLen(robj
*o
) {
3030 redisAssert(o
->type
== REDIS_STRING
);
3031 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3032 return sdslen(o
->ptr
);
3036 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3040 /*============================ RDB saving/loading =========================== */
3042 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3043 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3047 static int rdbSaveTime(FILE *fp
, time_t t
) {
3048 int32_t t32
= (int32_t) t
;
3049 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3053 /* check rdbLoadLen() comments for more info */
3054 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3055 unsigned char buf
[2];
3058 /* Save a 6 bit len */
3059 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3060 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3061 } else if (len
< (1<<14)) {
3062 /* Save a 14 bit len */
3063 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3065 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3067 /* Save a 32 bit len */
3068 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3069 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3071 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3076 /* String objects in the form "2391" "-100" without any space and with a
3077 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3078 * encoded as integers to save space */
3079 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3081 char *endptr
, buf
[32];
3083 /* Check if it's possible to encode this value as a number */
3084 value
= strtoll(s
, &endptr
, 10);
3085 if (endptr
[0] != '\0') return 0;
3086 snprintf(buf
,32,"%lld",value
);
3088 /* If the number converted back into a string is not identical
3089 * then it's not possible to encode the string as integer */
3090 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3092 /* Finally check if it fits in our ranges */
3093 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3094 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3095 enc
[1] = value
&0xFF;
3097 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3098 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3099 enc
[1] = value
&0xFF;
3100 enc
[2] = (value
>>8)&0xFF;
3102 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3103 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3104 enc
[1] = value
&0xFF;
3105 enc
[2] = (value
>>8)&0xFF;
3106 enc
[3] = (value
>>16)&0xFF;
3107 enc
[4] = (value
>>24)&0xFF;
3114 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3115 size_t comprlen
, outlen
;
3119 /* We require at least four bytes compression for this to be worth it */
3120 if (len
<= 4) return 0;
3122 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3123 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3124 if (comprlen
== 0) {
3128 /* Data compressed! Let's save it on disk */
3129 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3130 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3131 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3132 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3133 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3142 /* Save a string objet as [len][data] on disk. If the object is a string
3143 * representation of an integer value we try to safe it in a special form */
3144 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3147 /* Try integer encoding */
3149 unsigned char buf
[5];
3150 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3151 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3156 /* Try LZF compression - under 20 bytes it's unable to compress even
3157 * aaaaaaaaaaaaaaaaaa so skip it */
3158 if (server
.rdbcompression
&& len
> 20) {
3161 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3162 if (retval
== -1) return -1;
3163 if (retval
> 0) return 0;
3164 /* retval == 0 means data can't be compressed, save the old way */
3167 /* Store verbatim */
3168 if (rdbSaveLen(fp
,len
) == -1) return -1;
3169 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3173 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3174 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3177 /* Avoid incr/decr ref count business when possible.
3178 * This plays well with copy-on-write given that we are probably
3179 * in a child process (BGSAVE). Also this makes sure key objects
3180 * of swapped objects are not incRefCount-ed (an assert does not allow
3181 * this in order to avoid bugs) */
3182 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3183 obj
= getDecodedObject(obj
);
3184 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3187 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3192 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3193 * 8 bit integer specifing the length of the representation.
3194 * This 8 bit integer has special values in order to specify the following
3200 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3201 unsigned char buf
[128];
3207 } else if (!isfinite(val
)) {
3209 buf
[0] = (val
< 0) ? 255 : 254;
3211 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3212 buf
[0] = strlen((char*)buf
+1);
3215 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3219 /* Save a Redis object. */
3220 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3221 if (o
->type
== REDIS_STRING
) {
3222 /* Save a string value */
3223 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3224 } else if (o
->type
== REDIS_LIST
) {
3225 /* Save a list value */
3226 list
*list
= o
->ptr
;
3230 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3231 listRewind(list
,&li
);
3232 while((ln
= listNext(&li
))) {
3233 robj
*eleobj
= listNodeValue(ln
);
3235 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3237 } else if (o
->type
== REDIS_SET
) {
3238 /* Save a set value */
3240 dictIterator
*di
= dictGetIterator(set
);
3243 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3244 while((de
= dictNext(di
)) != NULL
) {
3245 robj
*eleobj
= dictGetEntryKey(de
);
3247 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3249 dictReleaseIterator(di
);
3250 } else if (o
->type
== REDIS_ZSET
) {
3251 /* Save a set value */
3253 dictIterator
*di
= dictGetIterator(zs
->dict
);
3256 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3257 while((de
= dictNext(di
)) != NULL
) {
3258 robj
*eleobj
= dictGetEntryKey(de
);
3259 double *score
= dictGetEntryVal(de
);
3261 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3262 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3264 dictReleaseIterator(di
);
3265 } else if (o
->type
== REDIS_HASH
) {
3266 /* Save a hash value */
3267 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3268 unsigned char *p
= zipmapRewind(o
->ptr
);
3269 unsigned int count
= zipmapLen(o
->ptr
);
3270 unsigned char *key
, *val
;
3271 unsigned int klen
, vlen
;
3273 if (rdbSaveLen(fp
,count
) == -1) return -1;
3274 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3275 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3276 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3279 dictIterator
*di
= dictGetIterator(o
->ptr
);
3282 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3283 while((de
= dictNext(di
)) != NULL
) {
3284 robj
*key
= dictGetEntryKey(de
);
3285 robj
*val
= dictGetEntryVal(de
);
3287 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3288 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3290 dictReleaseIterator(di
);
3298 /* Return the length the object will have on disk if saved with
3299 * the rdbSaveObject() function. Currently we use a trick to get
3300 * this length with very little changes to the code. In the future
3301 * we could switch to a faster solution. */
3302 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3303 if (fp
== NULL
) fp
= server
.devnull
;
3305 assert(rdbSaveObject(fp
,o
) != 1);
3309 /* Return the number of pages required to save this object in the swap file */
3310 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3311 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3313 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3316 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3317 static int rdbSave(char *filename
) {
3318 dictIterator
*di
= NULL
;
3323 time_t now
= time(NULL
);
3325 /* Wait for I/O therads to terminate, just in case this is a
3326 * foreground-saving, to avoid seeking the swap file descriptor at the
3328 if (server
.vm_enabled
)
3329 waitEmptyIOJobsQueue();
3331 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3332 fp
= fopen(tmpfile
,"w");
3334 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3337 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3338 for (j
= 0; j
< server
.dbnum
; j
++) {
3339 redisDb
*db
= server
.db
+j
;
3341 if (dictSize(d
) == 0) continue;
3342 di
= dictGetIterator(d
);
3348 /* Write the SELECT DB opcode */
3349 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3350 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3352 /* Iterate this DB writing every entry */
3353 while((de
= dictNext(di
)) != NULL
) {
3354 robj
*key
= dictGetEntryKey(de
);
3355 robj
*o
= dictGetEntryVal(de
);
3356 time_t expiretime
= getExpire(db
,key
);
3358 /* Save the expire time */
3359 if (expiretime
!= -1) {
3360 /* If this key is already expired skip it */
3361 if (expiretime
< now
) continue;
3362 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3363 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3365 /* Save the key and associated value. This requires special
3366 * handling if the value is swapped out. */
3367 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3368 key
->storage
== REDIS_VM_SWAPPING
) {
3369 /* Save type, key, value */
3370 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3371 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3372 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3374 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3376 /* Get a preview of the object in memory */
3377 po
= vmPreviewObject(key
);
3378 /* Save type, key, value */
3379 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3380 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3381 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3382 /* Remove the loaded object from memory */
3386 dictReleaseIterator(di
);
3389 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3391 /* Make sure data will not remain on the OS's output buffers */
3396 /* Use RENAME to make sure the DB file is changed atomically only
3397 * if the generate DB file is ok. */
3398 if (rename(tmpfile
,filename
) == -1) {
3399 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3403 redisLog(REDIS_NOTICE
,"DB saved on disk");
3405 server
.lastsave
= time(NULL
);
3411 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3412 if (di
) dictReleaseIterator(di
);
3416 static int rdbSaveBackground(char *filename
) {
3419 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3420 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3421 if ((childpid
= fork()) == 0) {
3423 if (server
.vm_enabled
) vmReopenSwapFile();
3425 if (rdbSave(filename
) == REDIS_OK
) {
3432 if (childpid
== -1) {
3433 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3437 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3438 server
.bgsavechildpid
= childpid
;
3441 return REDIS_OK
; /* unreached */
3444 static void rdbRemoveTempFile(pid_t childpid
) {
3447 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3451 static int rdbLoadType(FILE *fp
) {
3453 if (fread(&type
,1,1,fp
) == 0) return -1;
3457 static time_t rdbLoadTime(FILE *fp
) {
3459 if (fread(&t32
,4,1,fp
) == 0) return -1;
3460 return (time_t) t32
;
3463 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3464 * of this file for a description of how this are stored on disk.
3466 * isencoded is set to 1 if the readed length is not actually a length but
3467 * an "encoding type", check the above comments for more info */
3468 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3469 unsigned char buf
[2];
3473 if (isencoded
) *isencoded
= 0;
3474 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3475 type
= (buf
[0]&0xC0)>>6;
3476 if (type
== REDIS_RDB_6BITLEN
) {
3477 /* Read a 6 bit len */
3479 } else if (type
== REDIS_RDB_ENCVAL
) {
3480 /* Read a 6 bit len encoding type */
3481 if (isencoded
) *isencoded
= 1;
3483 } else if (type
== REDIS_RDB_14BITLEN
) {
3484 /* Read a 14 bit len */
3485 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3486 return ((buf
[0]&0x3F)<<8)|buf
[1];
3488 /* Read a 32 bit len */
3489 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3494 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3495 unsigned char enc
[4];
3498 if (enctype
== REDIS_RDB_ENC_INT8
) {
3499 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3500 val
= (signed char)enc
[0];
3501 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3503 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3504 v
= enc
[0]|(enc
[1]<<8);
3506 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3508 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3509 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3512 val
= 0; /* anti-warning */
3515 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3518 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3519 unsigned int len
, clen
;
3520 unsigned char *c
= NULL
;
3523 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3524 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3525 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3526 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3527 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3528 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3530 return createObject(REDIS_STRING
,val
);
3537 static robj
*rdbLoadStringObject(FILE*fp
) {
3542 len
= rdbLoadLen(fp
,&isencoded
);
3545 case REDIS_RDB_ENC_INT8
:
3546 case REDIS_RDB_ENC_INT16
:
3547 case REDIS_RDB_ENC_INT32
:
3548 return tryObjectSharing(rdbLoadIntegerObject(fp
,len
));
3549 case REDIS_RDB_ENC_LZF
:
3550 return tryObjectSharing(rdbLoadLzfStringObject(fp
));
3556 if (len
== REDIS_RDB_LENERR
) return NULL
;
3557 val
= sdsnewlen(NULL
,len
);
3558 if (len
&& fread(val
,len
,1,fp
) == 0) {
3562 return tryObjectSharing(createObject(REDIS_STRING
,val
));
3565 /* For information about double serialization check rdbSaveDoubleValue() */
3566 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3570 if (fread(&len
,1,1,fp
) == 0) return -1;
3572 case 255: *val
= R_NegInf
; return 0;
3573 case 254: *val
= R_PosInf
; return 0;
3574 case 253: *val
= R_Nan
; return 0;
3576 if (fread(buf
,len
,1,fp
) == 0) return -1;
3578 sscanf(buf
, "%lg", val
);
3583 /* Load a Redis object of the specified type from the specified file.
3584 * On success a newly allocated object is returned, otherwise NULL. */
3585 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3588 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3589 if (type
== REDIS_STRING
) {
3590 /* Read string value */
3591 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3592 tryObjectEncoding(o
);
3593 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3594 /* Read list/set value */
3597 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3598 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3599 /* It's faster to expand the dict to the right size asap in order
3600 * to avoid rehashing */
3601 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3602 dictExpand(o
->ptr
,listlen
);
3603 /* Load every single element of the list/set */
3607 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3608 tryObjectEncoding(ele
);
3609 if (type
== REDIS_LIST
) {
3610 listAddNodeTail((list
*)o
->ptr
,ele
);
3612 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3615 } else if (type
== REDIS_ZSET
) {
3616 /* Read list/set value */
3620 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3621 o
= createZsetObject();
3623 /* Load every single element of the list/set */
3626 double *score
= zmalloc(sizeof(double));
3628 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3629 tryObjectEncoding(ele
);
3630 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3631 dictAdd(zs
->dict
,ele
,score
);
3632 zslInsert(zs
->zsl
,*score
,ele
);
3633 incrRefCount(ele
); /* added to skiplist */
3635 } else if (type
== REDIS_HASH
) {
3638 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3639 o
= createHashObject();
3640 /* Too many entries? Use an hash table. */
3641 if (hashlen
> server
.hash_max_zipmap_entries
)
3642 convertToRealHash(o
);
3643 /* Load every key/value, then set it into the zipmap or hash
3644 * table, as needed. */
3648 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3649 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3650 /* If we are using a zipmap and there are too big values
3651 * the object is converted to real hash table encoding. */
3652 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3653 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3654 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3656 convertToRealHash(o
);
3659 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3660 unsigned char *zm
= o
->ptr
;
3662 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3663 val
->ptr
,sdslen(val
->ptr
),NULL
);
3668 tryObjectEncoding(key
);
3669 tryObjectEncoding(val
);
3670 dictAdd((dict
*)o
->ptr
,key
,val
);
3679 static int rdbLoad(char *filename
) {
3681 robj
*keyobj
= NULL
;
3683 int type
, retval
, rdbver
;
3684 dict
*d
= server
.db
[0].dict
;
3685 redisDb
*db
= server
.db
+0;
3687 time_t expiretime
= -1, now
= time(NULL
);
3688 long long loadedkeys
= 0;
3690 fp
= fopen(filename
,"r");
3691 if (!fp
) return REDIS_ERR
;
3692 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3694 if (memcmp(buf
,"REDIS",5) != 0) {
3696 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3699 rdbver
= atoi(buf
+5);
3702 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3709 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3710 if (type
== REDIS_EXPIRETIME
) {
3711 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3712 /* We read the time so we need to read the object type again */
3713 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3715 if (type
== REDIS_EOF
) break;
3716 /* Handle SELECT DB opcode as a special case */
3717 if (type
== REDIS_SELECTDB
) {
3718 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3720 if (dbid
>= (unsigned)server
.dbnum
) {
3721 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3724 db
= server
.db
+dbid
;
3729 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3731 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3732 /* Add the new object in the hash table */
3733 retval
= dictAdd(d
,keyobj
,o
);
3734 if (retval
== DICT_ERR
) {
3735 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3738 /* Set the expire time if needed */
3739 if (expiretime
!= -1) {
3740 setExpire(db
,keyobj
,expiretime
);
3741 /* Delete this key if already expired */
3742 if (expiretime
< now
) deleteKey(db
,keyobj
);
3746 /* Handle swapping while loading big datasets when VM is on */
3748 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3749 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3750 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3757 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3758 if (keyobj
) decrRefCount(keyobj
);
3759 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3761 return REDIS_ERR
; /* Just to avoid warning */
3764 /*================================== Commands =============================== */
3766 static void authCommand(redisClient
*c
) {
3767 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3768 c
->authenticated
= 1;
3769 addReply(c
,shared
.ok
);
3771 c
->authenticated
= 0;
3772 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3776 static void pingCommand(redisClient
*c
) {
3777 addReply(c
,shared
.pong
);
3780 static void echoCommand(redisClient
*c
) {
3781 addReplyBulk(c
,c
->argv
[1]);
3784 /*=================================== Strings =============================== */
3786 static void setGenericCommand(redisClient
*c
, int nx
) {
3789 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3790 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3791 if (retval
== DICT_ERR
) {
3793 /* If the key is about a swapped value, we want a new key object
3794 * to overwrite the old. So we delete the old key in the database.
3795 * This will also make sure that swap pages about the old object
3796 * will be marked as free. */
3797 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3798 incrRefCount(c
->argv
[1]);
3799 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3800 incrRefCount(c
->argv
[2]);
3802 addReply(c
,shared
.czero
);
3806 incrRefCount(c
->argv
[1]);
3807 incrRefCount(c
->argv
[2]);
3810 removeExpire(c
->db
,c
->argv
[1]);
3811 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3814 static void setCommand(redisClient
*c
) {
3815 setGenericCommand(c
,0);
3818 static void setnxCommand(redisClient
*c
) {
3819 setGenericCommand(c
,1);
3822 static int getGenericCommand(redisClient
*c
) {
3825 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3828 if (o
->type
!= REDIS_STRING
) {
3829 addReply(c
,shared
.wrongtypeerr
);
3837 static void getCommand(redisClient
*c
) {
3838 getGenericCommand(c
);
3841 static void getsetCommand(redisClient
*c
) {
3842 if (getGenericCommand(c
) == REDIS_ERR
) return;
3843 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3844 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3846 incrRefCount(c
->argv
[1]);
3848 incrRefCount(c
->argv
[2]);
3850 removeExpire(c
->db
,c
->argv
[1]);
3853 static void mgetCommand(redisClient
*c
) {
3856 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3857 for (j
= 1; j
< c
->argc
; j
++) {
3858 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3860 addReply(c
,shared
.nullbulk
);
3862 if (o
->type
!= REDIS_STRING
) {
3863 addReply(c
,shared
.nullbulk
);
3871 static void msetGenericCommand(redisClient
*c
, int nx
) {
3872 int j
, busykeys
= 0;
3874 if ((c
->argc
% 2) == 0) {
3875 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3878 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3879 * set nothing at all if at least one already key exists. */
3881 for (j
= 1; j
< c
->argc
; j
+= 2) {
3882 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3888 addReply(c
, shared
.czero
);
3892 for (j
= 1; j
< c
->argc
; j
+= 2) {
3895 tryObjectEncoding(c
->argv
[j
+1]);
3896 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3897 if (retval
== DICT_ERR
) {
3898 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3899 incrRefCount(c
->argv
[j
+1]);
3901 incrRefCount(c
->argv
[j
]);
3902 incrRefCount(c
->argv
[j
+1]);
3904 removeExpire(c
->db
,c
->argv
[j
]);
3906 server
.dirty
+= (c
->argc
-1)/2;
3907 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3910 static void msetCommand(redisClient
*c
) {
3911 msetGenericCommand(c
,0);
3914 static void msetnxCommand(redisClient
*c
) {
3915 msetGenericCommand(c
,1);
3918 static void incrDecrCommand(redisClient
*c
, long long incr
) {
3923 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3927 if (o
->type
!= REDIS_STRING
) {
3932 if (o
->encoding
== REDIS_ENCODING_RAW
)
3933 value
= strtoll(o
->ptr
, &eptr
, 10);
3934 else if (o
->encoding
== REDIS_ENCODING_INT
)
3935 value
= (long)o
->ptr
;
3937 redisAssert(1 != 1);
3942 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
3943 tryObjectEncoding(o
);
3944 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
3945 if (retval
== DICT_ERR
) {
3946 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
3947 removeExpire(c
->db
,c
->argv
[1]);
3949 incrRefCount(c
->argv
[1]);
3952 addReply(c
,shared
.colon
);
3954 addReply(c
,shared
.crlf
);
3957 static void incrCommand(redisClient
*c
) {
3958 incrDecrCommand(c
,1);
3961 static void decrCommand(redisClient
*c
) {
3962 incrDecrCommand(c
,-1);
3965 static void incrbyCommand(redisClient
*c
) {
3966 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3967 incrDecrCommand(c
,incr
);
3970 static void decrbyCommand(redisClient
*c
) {
3971 long long incr
= strtoll(c
->argv
[2]->ptr
, NULL
, 10);
3972 incrDecrCommand(c
,-incr
);
3975 static void appendCommand(redisClient
*c
) {
3980 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
3982 /* Create the key */
3983 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3984 incrRefCount(c
->argv
[1]);
3985 incrRefCount(c
->argv
[2]);
3986 totlen
= stringObjectLen(c
->argv
[2]);
3990 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
3993 o
= dictGetEntryVal(de
);
3994 if (o
->type
!= REDIS_STRING
) {
3995 addReply(c
,shared
.wrongtypeerr
);
3998 /* If the object is specially encoded or shared we have to make
4000 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4001 robj
*decoded
= getDecodedObject(o
);
4003 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4004 decrRefCount(decoded
);
4005 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4008 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4009 o
->ptr
= sdscatlen(o
->ptr
,
4010 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4012 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4013 (unsigned long) c
->argv
[2]->ptr
);
4015 totlen
= sdslen(o
->ptr
);
4018 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4021 static void substrCommand(redisClient
*c
) {
4023 long start
= atoi(c
->argv
[2]->ptr
);
4024 long end
= atoi(c
->argv
[3]->ptr
);
4025 size_t rangelen
, strlen
;
4028 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4029 checkType(c
,o
,REDIS_STRING
)) return;
4031 o
= getDecodedObject(o
);
4032 strlen
= sdslen(o
->ptr
);
4034 /* convert negative indexes */
4035 if (start
< 0) start
= strlen
+start
;
4036 if (end
< 0) end
= strlen
+end
;
4037 if (start
< 0) start
= 0;
4038 if (end
< 0) end
= 0;
4040 /* indexes sanity checks */
4041 if (start
> end
|| (size_t)start
>= strlen
) {
4042 /* Out of range start or start > end result in null reply */
4043 addReply(c
,shared
.nullbulk
);
4047 if ((size_t)end
>= strlen
) end
= strlen
-1;
4048 rangelen
= (end
-start
)+1;
4050 /* Return the result */
4051 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4052 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4053 addReplySds(c
,range
);
4054 addReply(c
,shared
.crlf
);
4058 /* ========================= Type agnostic commands ========================= */
4060 static void delCommand(redisClient
*c
) {
4063 for (j
= 1; j
< c
->argc
; j
++) {
4064 if (deleteKey(c
->db
,c
->argv
[j
])) {
4069 addReplyLong(c
,deleted
);
4072 static void existsCommand(redisClient
*c
) {
4073 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4076 static void selectCommand(redisClient
*c
) {
4077 int id
= atoi(c
->argv
[1]->ptr
);
4079 if (selectDb(c
,id
) == REDIS_ERR
) {
4080 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4082 addReply(c
,shared
.ok
);
4086 static void randomkeyCommand(redisClient
*c
) {
4090 de
= dictGetRandomKey(c
->db
->dict
);
4091 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4094 addReply(c
,shared
.plus
);
4095 addReply(c
,shared
.crlf
);
4097 addReply(c
,shared
.plus
);
4098 addReply(c
,dictGetEntryKey(de
));
4099 addReply(c
,shared
.crlf
);
4103 static void keysCommand(redisClient
*c
) {
4106 sds pattern
= c
->argv
[1]->ptr
;
4107 int plen
= sdslen(pattern
);
4108 unsigned long numkeys
= 0;
4109 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4111 di
= dictGetIterator(c
->db
->dict
);
4113 decrRefCount(lenobj
);
4114 while((de
= dictNext(di
)) != NULL
) {
4115 robj
*keyobj
= dictGetEntryKey(de
);
4117 sds key
= keyobj
->ptr
;
4118 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4119 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4120 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4121 addReplyBulk(c
,keyobj
);
4126 dictReleaseIterator(di
);
4127 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4130 static void dbsizeCommand(redisClient
*c
) {
4132 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4135 static void lastsaveCommand(redisClient
*c
) {
4137 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4140 static void typeCommand(redisClient
*c
) {
4144 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4149 case REDIS_STRING
: type
= "+string"; break;
4150 case REDIS_LIST
: type
= "+list"; break;
4151 case REDIS_SET
: type
= "+set"; break;
4152 case REDIS_ZSET
: type
= "+zset"; break;
4153 case REDIS_HASH
: type
= "+hash"; break;
4154 default: type
= "+unknown"; break;
4157 addReplySds(c
,sdsnew(type
));
4158 addReply(c
,shared
.crlf
);
4161 static void saveCommand(redisClient
*c
) {
4162 if (server
.bgsavechildpid
!= -1) {
4163 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4166 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4167 addReply(c
,shared
.ok
);
4169 addReply(c
,shared
.err
);
4173 static void bgsaveCommand(redisClient
*c
) {
4174 if (server
.bgsavechildpid
!= -1) {
4175 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4178 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4179 char *status
= "+Background saving started\r\n";
4180 addReplySds(c
,sdsnew(status
));
4182 addReply(c
,shared
.err
);
4186 static void shutdownCommand(redisClient
*c
) {
4187 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4188 /* Kill the saving child if there is a background saving in progress.
4189 We want to avoid race conditions, for instance our saving child may
4190 overwrite the synchronous saving did by SHUTDOWN. */
4191 if (server
.bgsavechildpid
!= -1) {
4192 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4193 kill(server
.bgsavechildpid
,SIGKILL
);
4194 rdbRemoveTempFile(server
.bgsavechildpid
);
4196 if (server
.appendonly
) {
4197 /* Append only file: fsync() the AOF and exit */
4198 fsync(server
.appendfd
);
4199 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4202 /* Snapshotting. Perform a SYNC SAVE and exit */
4203 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4204 if (server
.daemonize
)
4205 unlink(server
.pidfile
);
4206 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4207 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4208 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4211 /* Ooops.. error saving! The best we can do is to continue
4212 * operating. Note that if there was a background saving process,
4213 * in the next cron() Redis will be notified that the background
4214 * saving aborted, handling special stuff like slaves pending for
4215 * synchronization... */
4216 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4218 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4223 static void renameGenericCommand(redisClient
*c
, int nx
) {
4226 /* To use the same key as src and dst is probably an error */
4227 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4228 addReply(c
,shared
.sameobjecterr
);
4232 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4236 deleteIfVolatile(c
->db
,c
->argv
[2]);
4237 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4240 addReply(c
,shared
.czero
);
4243 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4245 incrRefCount(c
->argv
[2]);
4247 deleteKey(c
->db
,c
->argv
[1]);
4249 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4252 static void renameCommand(redisClient
*c
) {
4253 renameGenericCommand(c
,0);
4256 static void renamenxCommand(redisClient
*c
) {
4257 renameGenericCommand(c
,1);
4260 static void moveCommand(redisClient
*c
) {
4265 /* Obtain source and target DB pointers */
4268 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4269 addReply(c
,shared
.outofrangeerr
);
4273 selectDb(c
,srcid
); /* Back to the source DB */
4275 /* If the user is moving using as target the same
4276 * DB as the source DB it is probably an error. */
4278 addReply(c
,shared
.sameobjecterr
);
4282 /* Check if the element exists and get a reference */
4283 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4285 addReply(c
,shared
.czero
);
4289 /* Try to add the element to the target DB */
4290 deleteIfVolatile(dst
,c
->argv
[1]);
4291 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4292 addReply(c
,shared
.czero
);
4295 incrRefCount(c
->argv
[1]);
4298 /* OK! key moved, free the entry in the source DB */
4299 deleteKey(src
,c
->argv
[1]);
4301 addReply(c
,shared
.cone
);
4304 /* =================================== Lists ================================ */
4305 static void pushGenericCommand(redisClient
*c
, int where
) {
4309 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4311 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4312 addReply(c
,shared
.cone
);
4315 lobj
= createListObject();
4317 if (where
== REDIS_HEAD
) {
4318 listAddNodeHead(list
,c
->argv
[2]);
4320 listAddNodeTail(list
,c
->argv
[2]);
4322 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4323 incrRefCount(c
->argv
[1]);
4324 incrRefCount(c
->argv
[2]);
4326 if (lobj
->type
!= REDIS_LIST
) {
4327 addReply(c
,shared
.wrongtypeerr
);
4330 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4331 addReply(c
,shared
.cone
);
4335 if (where
== REDIS_HEAD
) {
4336 listAddNodeHead(list
,c
->argv
[2]);
4338 listAddNodeTail(list
,c
->argv
[2]);
4340 incrRefCount(c
->argv
[2]);
4343 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4346 static void lpushCommand(redisClient
*c
) {
4347 pushGenericCommand(c
,REDIS_HEAD
);
4350 static void rpushCommand(redisClient
*c
) {
4351 pushGenericCommand(c
,REDIS_TAIL
);
4354 static void llenCommand(redisClient
*c
) {
4358 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4359 checkType(c
,o
,REDIS_LIST
)) return;
4362 addReplyUlong(c
,listLength(l
));
4365 static void lindexCommand(redisClient
*c
) {
4367 int index
= atoi(c
->argv
[2]->ptr
);
4371 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4372 checkType(c
,o
,REDIS_LIST
)) return;
4375 ln
= listIndex(list
, index
);
4377 addReply(c
,shared
.nullbulk
);
4379 robj
*ele
= listNodeValue(ln
);
4380 addReplyBulk(c
,ele
);
4384 static void lsetCommand(redisClient
*c
) {
4386 int index
= atoi(c
->argv
[2]->ptr
);
4390 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4391 checkType(c
,o
,REDIS_LIST
)) return;
4394 ln
= listIndex(list
, index
);
4396 addReply(c
,shared
.outofrangeerr
);
4398 robj
*ele
= listNodeValue(ln
);
4401 listNodeValue(ln
) = c
->argv
[3];
4402 incrRefCount(c
->argv
[3]);
4403 addReply(c
,shared
.ok
);
4408 static void popGenericCommand(redisClient
*c
, int where
) {
4413 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4414 checkType(c
,o
,REDIS_LIST
)) return;
4417 if (where
== REDIS_HEAD
)
4418 ln
= listFirst(list
);
4420 ln
= listLast(list
);
4423 addReply(c
,shared
.nullbulk
);
4425 robj
*ele
= listNodeValue(ln
);
4426 addReplyBulk(c
,ele
);
4427 listDelNode(list
,ln
);
4428 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4433 static void lpopCommand(redisClient
*c
) {
4434 popGenericCommand(c
,REDIS_HEAD
);
4437 static void rpopCommand(redisClient
*c
) {
4438 popGenericCommand(c
,REDIS_TAIL
);
4441 static void lrangeCommand(redisClient
*c
) {
4443 int start
= atoi(c
->argv
[2]->ptr
);
4444 int end
= atoi(c
->argv
[3]->ptr
);
4451 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
4452 checkType(c
,o
,REDIS_LIST
)) return;
4454 llen
= listLength(list
);
4456 /* convert negative indexes */
4457 if (start
< 0) start
= llen
+start
;
4458 if (end
< 0) end
= llen
+end
;
4459 if (start
< 0) start
= 0;
4460 if (end
< 0) end
= 0;
4462 /* indexes sanity checks */
4463 if (start
> end
|| start
>= llen
) {
4464 /* Out of range start or start > end result in empty list */
4465 addReply(c
,shared
.emptymultibulk
);
4468 if (end
>= llen
) end
= llen
-1;
4469 rangelen
= (end
-start
)+1;
4471 /* Return the result in form of a multi-bulk reply */
4472 ln
= listIndex(list
, start
);
4473 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4474 for (j
= 0; j
< rangelen
; j
++) {
4475 ele
= listNodeValue(ln
);
4476 addReplyBulk(c
,ele
);
4481 static void ltrimCommand(redisClient
*c
) {
4483 int start
= atoi(c
->argv
[2]->ptr
);
4484 int end
= atoi(c
->argv
[3]->ptr
);
4486 int j
, ltrim
, rtrim
;
4490 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4491 checkType(c
,o
,REDIS_LIST
)) return;
4493 llen
= listLength(list
);
4495 /* convert negative indexes */
4496 if (start
< 0) start
= llen
+start
;
4497 if (end
< 0) end
= llen
+end
;
4498 if (start
< 0) start
= 0;
4499 if (end
< 0) end
= 0;
4501 /* indexes sanity checks */
4502 if (start
> end
|| start
>= llen
) {
4503 /* Out of range start or start > end result in empty list */
4507 if (end
>= llen
) end
= llen
-1;
4512 /* Remove list elements to perform the trim */
4513 for (j
= 0; j
< ltrim
; j
++) {
4514 ln
= listFirst(list
);
4515 listDelNode(list
,ln
);
4517 for (j
= 0; j
< rtrim
; j
++) {
4518 ln
= listLast(list
);
4519 listDelNode(list
,ln
);
4521 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4523 addReply(c
,shared
.ok
);
4526 static void lremCommand(redisClient
*c
) {
4529 listNode
*ln
, *next
;
4530 int toremove
= atoi(c
->argv
[2]->ptr
);
4534 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4535 checkType(c
,o
,REDIS_LIST
)) return;
4539 toremove
= -toremove
;
4542 ln
= fromtail
? list
->tail
: list
->head
;
4544 robj
*ele
= listNodeValue(ln
);
4546 next
= fromtail
? ln
->prev
: ln
->next
;
4547 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4548 listDelNode(list
,ln
);
4551 if (toremove
&& removed
== toremove
) break;
4555 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4556 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4559 /* This is the semantic of this command:
4560 * RPOPLPUSH srclist dstlist:
4561 * IF LLEN(srclist) > 0
4562 * element = RPOP srclist
4563 * LPUSH dstlist element
4570 * The idea is to be able to get an element from a list in a reliable way
4571 * since the element is not just returned but pushed against another list
4572 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4574 static void rpoplpushcommand(redisClient
*c
) {
4579 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4580 checkType(c
,sobj
,REDIS_LIST
)) return;
4581 srclist
= sobj
->ptr
;
4582 ln
= listLast(srclist
);
4585 addReply(c
,shared
.nullbulk
);
4587 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4588 robj
*ele
= listNodeValue(ln
);
4591 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4592 addReply(c
,shared
.wrongtypeerr
);
4596 /* Add the element to the target list (unless it's directly
4597 * passed to some BLPOP-ing client */
4598 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4600 /* Create the list if the key does not exist */
4601 dobj
= createListObject();
4602 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4603 incrRefCount(c
->argv
[2]);
4605 dstlist
= dobj
->ptr
;
4606 listAddNodeHead(dstlist
,ele
);
4610 /* Send the element to the client as reply as well */
4611 addReplyBulk(c
,ele
);
4613 /* Finally remove the element from the source list */
4614 listDelNode(srclist
,ln
);
4615 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4620 /* ==================================== Sets ================================ */
4622 static void saddCommand(redisClient
*c
) {
4625 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4627 set
= createSetObject();
4628 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4629 incrRefCount(c
->argv
[1]);
4631 if (set
->type
!= REDIS_SET
) {
4632 addReply(c
,shared
.wrongtypeerr
);
4636 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4637 incrRefCount(c
->argv
[2]);
4639 addReply(c
,shared
.cone
);
4641 addReply(c
,shared
.czero
);
4645 static void sremCommand(redisClient
*c
) {
4648 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4649 checkType(c
,set
,REDIS_SET
)) return;
4651 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4653 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4654 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4655 addReply(c
,shared
.cone
);
4657 addReply(c
,shared
.czero
);
4661 static void smoveCommand(redisClient
*c
) {
4662 robj
*srcset
, *dstset
;
4664 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4665 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4667 /* If the source key does not exist return 0, if it's of the wrong type
4669 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4670 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4673 /* Error if the destination key is not a set as well */
4674 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4675 addReply(c
,shared
.wrongtypeerr
);
4678 /* Remove the element from the source set */
4679 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4680 /* Key not found in the src set! return zero */
4681 addReply(c
,shared
.czero
);
4684 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4685 deleteKey(c
->db
,c
->argv
[1]);
4687 /* Add the element to the destination set */
4689 dstset
= createSetObject();
4690 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4691 incrRefCount(c
->argv
[2]);
4693 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4694 incrRefCount(c
->argv
[3]);
4695 addReply(c
,shared
.cone
);
4698 static void sismemberCommand(redisClient
*c
) {
4701 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4702 checkType(c
,set
,REDIS_SET
)) return;
4704 if (dictFind(set
->ptr
,c
->argv
[2]))
4705 addReply(c
,shared
.cone
);
4707 addReply(c
,shared
.czero
);
4710 static void scardCommand(redisClient
*c
) {
4714 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4715 checkType(c
,o
,REDIS_SET
)) return;
4718 addReplyUlong(c
,dictSize(s
));
4721 static void spopCommand(redisClient
*c
) {
4725 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4726 checkType(c
,set
,REDIS_SET
)) return;
4728 de
= dictGetRandomKey(set
->ptr
);
4730 addReply(c
,shared
.nullbulk
);
4732 robj
*ele
= dictGetEntryKey(de
);
4734 addReplyBulk(c
,ele
);
4735 dictDelete(set
->ptr
,ele
);
4736 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4737 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4742 static void srandmemberCommand(redisClient
*c
) {
4746 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4747 checkType(c
,set
,REDIS_SET
)) return;
4749 de
= dictGetRandomKey(set
->ptr
);
4751 addReply(c
,shared
.nullbulk
);
4753 robj
*ele
= dictGetEntryKey(de
);
4755 addReplyBulk(c
,ele
);
4759 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4760 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4762 return dictSize(*d1
)-dictSize(*d2
);
4765 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4766 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4769 robj
*lenobj
= NULL
, *dstset
= NULL
;
4770 unsigned long j
, cardinality
= 0;
4772 for (j
= 0; j
< setsnum
; j
++) {
4776 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4777 lookupKeyRead(c
->db
,setskeys
[j
]);
4781 if (deleteKey(c
->db
,dstkey
))
4783 addReply(c
,shared
.czero
);
4785 addReply(c
,shared
.nullmultibulk
);
4789 if (setobj
->type
!= REDIS_SET
) {
4791 addReply(c
,shared
.wrongtypeerr
);
4794 dv
[j
] = setobj
->ptr
;
4796 /* Sort sets from the smallest to largest, this will improve our
4797 * algorithm's performace */
4798 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4800 /* The first thing we should output is the total number of elements...
4801 * since this is a multi-bulk write, but at this stage we don't know
4802 * the intersection set size, so we use a trick, append an empty object
4803 * to the output list and save the pointer to later modify it with the
4806 lenobj
= createObject(REDIS_STRING
,NULL
);
4808 decrRefCount(lenobj
);
4810 /* If we have a target key where to store the resulting set
4811 * create this key with an empty set inside */
4812 dstset
= createSetObject();
4815 /* Iterate all the elements of the first (smallest) set, and test
4816 * the element against all the other sets, if at least one set does
4817 * not include the element it is discarded */
4818 di
= dictGetIterator(dv
[0]);
4820 while((de
= dictNext(di
)) != NULL
) {
4823 for (j
= 1; j
< setsnum
; j
++)
4824 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4826 continue; /* at least one set does not contain the member */
4827 ele
= dictGetEntryKey(de
);
4829 addReplyBulk(c
,ele
);
4832 dictAdd(dstset
->ptr
,ele
,NULL
);
4836 dictReleaseIterator(di
);
4839 /* Store the resulting set into the target, if the intersection
4840 * is not an empty set. */
4841 deleteKey(c
->db
,dstkey
);
4842 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4843 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4844 incrRefCount(dstkey
);
4845 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4847 decrRefCount(dstset
);
4848 addReply(c
,shared
.czero
);
4852 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4857 static void sinterCommand(redisClient
*c
) {
4858 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4861 static void sinterstoreCommand(redisClient
*c
) {
4862 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4865 #define REDIS_OP_UNION 0
4866 #define REDIS_OP_DIFF 1
4867 #define REDIS_OP_INTER 2
4869 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4870 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4873 robj
*dstset
= NULL
;
4874 int j
, cardinality
= 0;
4876 for (j
= 0; j
< setsnum
; j
++) {
4880 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4881 lookupKeyRead(c
->db
,setskeys
[j
]);
4886 if (setobj
->type
!= REDIS_SET
) {
4888 addReply(c
,shared
.wrongtypeerr
);
4891 dv
[j
] = setobj
->ptr
;
4894 /* We need a temp set object to store our union. If the dstkey
4895 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4896 * this set object will be the resulting object to set into the target key*/
4897 dstset
= createSetObject();
4899 /* Iterate all the elements of all the sets, add every element a single
4900 * time to the result set */
4901 for (j
= 0; j
< setsnum
; j
++) {
4902 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4903 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4905 di
= dictGetIterator(dv
[j
]);
4907 while((de
= dictNext(di
)) != NULL
) {
4910 /* dictAdd will not add the same element multiple times */
4911 ele
= dictGetEntryKey(de
);
4912 if (op
== REDIS_OP_UNION
|| j
== 0) {
4913 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
4917 } else if (op
== REDIS_OP_DIFF
) {
4918 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
4923 dictReleaseIterator(di
);
4925 /* result set is empty? Exit asap. */
4926 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
4929 /* Output the content of the resulting set, if not in STORE mode */
4931 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
4932 di
= dictGetIterator(dstset
->ptr
);
4933 while((de
= dictNext(di
)) != NULL
) {
4936 ele
= dictGetEntryKey(de
);
4937 addReplyBulk(c
,ele
);
4939 dictReleaseIterator(di
);
4940 decrRefCount(dstset
);
4942 /* If we have a target key where to store the resulting set
4943 * create this key with the result set inside */
4944 deleteKey(c
->db
,dstkey
);
4945 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4946 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4947 incrRefCount(dstkey
);
4948 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4950 decrRefCount(dstset
);
4951 addReply(c
,shared
.czero
);
4958 static void sunionCommand(redisClient
*c
) {
4959 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
4962 static void sunionstoreCommand(redisClient
*c
) {
4963 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
4966 static void sdiffCommand(redisClient
*c
) {
4967 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
4970 static void sdiffstoreCommand(redisClient
*c
) {
4971 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
4974 /* ==================================== ZSets =============================== */
4976 /* ZSETs are ordered sets using two data structures to hold the same elements
4977 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4980 * The elements are added to an hash table mapping Redis objects to scores.
4981 * At the same time the elements are added to a skip list mapping scores
4982 * to Redis objects (so objects are sorted by scores in this "view"). */
4984 /* This skiplist implementation is almost a C translation of the original
4985 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4986 * Alternative to Balanced Trees", modified in three ways:
4987 * a) this implementation allows for repeated values.
4988 * b) the comparison is not just by key (our 'score') but by satellite data.
4989 * c) there is a back pointer, so it's a doubly linked list with the back
4990 * pointers being only at "level 1". This allows to traverse the list
4991 * from tail to head, useful for ZREVRANGE. */
4993 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
4994 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
4996 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
4998 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5004 static zskiplist
*zslCreate(void) {
5008 zsl
= zmalloc(sizeof(*zsl
));
5011 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5012 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5013 zsl
->header
->forward
[j
] = NULL
;
5015 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5016 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5017 zsl
->header
->span
[j
] = 0;
5019 zsl
->header
->backward
= NULL
;
5024 static void zslFreeNode(zskiplistNode
*node
) {
5025 decrRefCount(node
->obj
);
5026 zfree(node
->forward
);
5031 static void zslFree(zskiplist
*zsl
) {
5032 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5034 zfree(zsl
->header
->forward
);
5035 zfree(zsl
->header
->span
);
5038 next
= node
->forward
[0];
5045 static int zslRandomLevel(void) {
5047 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5052 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5053 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5054 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5058 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5059 /* store rank that is crossed to reach the insert position */
5060 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5062 while (x
->forward
[i
] &&
5063 (x
->forward
[i
]->score
< score
||
5064 (x
->forward
[i
]->score
== score
&&
5065 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5066 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5071 /* we assume the key is not already inside, since we allow duplicated
5072 * scores, and the re-insertion of score and redis object should never
5073 * happpen since the caller of zslInsert() should test in the hash table
5074 * if the element is already inside or not. */
5075 level
= zslRandomLevel();
5076 if (level
> zsl
->level
) {
5077 for (i
= zsl
->level
; i
< level
; i
++) {
5079 update
[i
] = zsl
->header
;
5080 update
[i
]->span
[i
-1] = zsl
->length
;
5084 x
= zslCreateNode(level
,score
,obj
);
5085 for (i
= 0; i
< level
; i
++) {
5086 x
->forward
[i
] = update
[i
]->forward
[i
];
5087 update
[i
]->forward
[i
] = x
;
5089 /* update span covered by update[i] as x is inserted here */
5091 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5092 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5096 /* increment span for untouched levels */
5097 for (i
= level
; i
< zsl
->level
; i
++) {
5098 update
[i
]->span
[i
-1]++;
5101 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5103 x
->forward
[0]->backward
= x
;
5109 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5110 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5112 for (i
= 0; i
< zsl
->level
; i
++) {
5113 if (update
[i
]->forward
[i
] == x
) {
5115 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5117 update
[i
]->forward
[i
] = x
->forward
[i
];
5119 /* invariant: i > 0, because update[0]->forward[0]
5120 * is always equal to x */
5121 update
[i
]->span
[i
-1] -= 1;
5124 if (x
->forward
[0]) {
5125 x
->forward
[0]->backward
= x
->backward
;
5127 zsl
->tail
= x
->backward
;
5129 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5134 /* Delete an element with matching score/object from the skiplist. */
5135 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5136 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5140 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5141 while (x
->forward
[i
] &&
5142 (x
->forward
[i
]->score
< score
||
5143 (x
->forward
[i
]->score
== score
&&
5144 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5148 /* We may have multiple elements with the same score, what we need
5149 * is to find the element with both the right score and object. */
5151 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5152 zslDeleteNode(zsl
, x
, update
);
5156 return 0; /* not found */
5158 return 0; /* not found */
5161 /* Delete all the elements with score between min and max from the skiplist.
5162 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5163 * Note that this function takes the reference to the hash table view of the
5164 * sorted set, in order to remove the elements from the hash table too. */
5165 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5166 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5167 unsigned long removed
= 0;
5171 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5172 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5176 /* We may have multiple elements with the same score, what we need
5177 * is to find the element with both the right score and object. */
5179 while (x
&& x
->score
<= max
) {
5180 zskiplistNode
*next
= x
->forward
[0];
5181 zslDeleteNode(zsl
, x
, update
);
5182 dictDelete(dict
,x
->obj
);
5187 return removed
; /* not found */
5190 /* Delete all the elements with rank between start and end from the skiplist.
5191 * Start and end are inclusive. Note that start and end need to be 1-based */
5192 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5193 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5194 unsigned long traversed
= 0, removed
= 0;
5198 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5199 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5200 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5208 while (x
&& traversed
<= end
) {
5209 zskiplistNode
*next
= x
->forward
[0];
5210 zslDeleteNode(zsl
, x
, update
);
5211 dictDelete(dict
,x
->obj
);
5220 /* Find the first node having a score equal or greater than the specified one.
5221 * Returns NULL if there is no match. */
5222 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5227 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5228 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5231 /* We may have multiple elements with the same score, what we need
5232 * is to find the element with both the right score and object. */
5233 return x
->forward
[0];
5236 /* Find the rank for an element by both score and key.
5237 * Returns 0 when the element cannot be found, rank otherwise.
5238 * Note that the rank is 1-based due to the span of zsl->header to the
5240 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5242 unsigned long rank
= 0;
5246 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5247 while (x
->forward
[i
] &&
5248 (x
->forward
[i
]->score
< score
||
5249 (x
->forward
[i
]->score
== score
&&
5250 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5251 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5255 /* x might be equal to zsl->header, so test if obj is non-NULL */
5256 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5263 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5264 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5266 unsigned long traversed
= 0;
5270 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5271 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5273 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5276 if (traversed
== rank
) {
5283 /* The actual Z-commands implementations */
5285 /* This generic command implements both ZADD and ZINCRBY.
5286 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5287 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5288 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5293 zsetobj
= lookupKeyWrite(c
->db
,key
);
5294 if (zsetobj
== NULL
) {
5295 zsetobj
= createZsetObject();
5296 dictAdd(c
->db
->dict
,key
,zsetobj
);
5299 if (zsetobj
->type
!= REDIS_ZSET
) {
5300 addReply(c
,shared
.wrongtypeerr
);
5306 /* Ok now since we implement both ZADD and ZINCRBY here the code
5307 * needs to handle the two different conditions. It's all about setting
5308 * '*score', that is, the new score to set, to the right value. */
5309 score
= zmalloc(sizeof(double));
5313 /* Read the old score. If the element was not present starts from 0 */
5314 de
= dictFind(zs
->dict
,ele
);
5316 double *oldscore
= dictGetEntryVal(de
);
5317 *score
= *oldscore
+ scoreval
;
5325 /* What follows is a simple remove and re-insert operation that is common
5326 * to both ZADD and ZINCRBY... */
5327 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5328 /* case 1: New element */
5329 incrRefCount(ele
); /* added to hash */
5330 zslInsert(zs
->zsl
,*score
,ele
);
5331 incrRefCount(ele
); /* added to skiplist */
5334 addReplyDouble(c
,*score
);
5336 addReply(c
,shared
.cone
);
5341 /* case 2: Score update operation */
5342 de
= dictFind(zs
->dict
,ele
);
5343 redisAssert(de
!= NULL
);
5344 oldscore
= dictGetEntryVal(de
);
5345 if (*score
!= *oldscore
) {
5348 /* Remove and insert the element in the skip list with new score */
5349 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5350 redisAssert(deleted
!= 0);
5351 zslInsert(zs
->zsl
,*score
,ele
);
5353 /* Update the score in the hash table */
5354 dictReplace(zs
->dict
,ele
,score
);
5360 addReplyDouble(c
,*score
);
5362 addReply(c
,shared
.czero
);
5366 static void zaddCommand(redisClient
*c
) {
5369 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5370 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5373 static void zincrbyCommand(redisClient
*c
) {
5376 scoreval
= strtod(c
->argv
[2]->ptr
,NULL
);
5377 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5380 static void zremCommand(redisClient
*c
) {
5387 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5388 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5391 de
= dictFind(zs
->dict
,c
->argv
[2]);
5393 addReply(c
,shared
.czero
);
5396 /* Delete from the skiplist */
5397 oldscore
= dictGetEntryVal(de
);
5398 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5399 redisAssert(deleted
!= 0);
5401 /* Delete from the hash table */
5402 dictDelete(zs
->dict
,c
->argv
[2]);
5403 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5404 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5406 addReply(c
,shared
.cone
);
5409 static void zremrangebyscoreCommand(redisClient
*c
) {
5410 double min
= strtod(c
->argv
[2]->ptr
,NULL
);
5411 double max
= strtod(c
->argv
[3]->ptr
,NULL
);
5416 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5417 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5420 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5421 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5422 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5423 server
.dirty
+= deleted
;
5424 addReplyLong(c
,deleted
);
5427 static void zremrangebyrankCommand(redisClient
*c
) {
5428 int start
= atoi(c
->argv
[2]->ptr
);
5429 int end
= atoi(c
->argv
[3]->ptr
);
5435 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5436 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5438 llen
= zs
->zsl
->length
;
5440 /* convert negative indexes */
5441 if (start
< 0) start
= llen
+start
;
5442 if (end
< 0) end
= llen
+end
;
5443 if (start
< 0) start
= 0;
5444 if (end
< 0) end
= 0;
5446 /* indexes sanity checks */
5447 if (start
> end
|| start
>= llen
) {
5448 addReply(c
,shared
.czero
);
5451 if (end
>= llen
) end
= llen
-1;
5453 /* increment start and end because zsl*Rank functions
5454 * use 1-based rank */
5455 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5456 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5457 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5458 server
.dirty
+= deleted
;
5459 addReplyLong(c
, deleted
);
5467 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5468 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5469 unsigned long size1
, size2
;
5470 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5471 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5472 return size1
- size2
;
5475 #define REDIS_AGGR_SUM 1
5476 #define REDIS_AGGR_MIN 2
5477 #define REDIS_AGGR_MAX 3
5479 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5480 if (aggregate
== REDIS_AGGR_SUM
) {
5481 *target
= *target
+ val
;
5482 } else if (aggregate
== REDIS_AGGR_MIN
) {
5483 *target
= val
< *target
? val
: *target
;
5484 } else if (aggregate
== REDIS_AGGR_MAX
) {
5485 *target
= val
> *target
? val
: *target
;
5488 redisAssert(0 != 0);
5492 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5494 int aggregate
= REDIS_AGGR_SUM
;
5501 /* expect zsetnum input keys to be given */
5502 zsetnum
= atoi(c
->argv
[2]->ptr
);
5504 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5508 /* test if the expected number of keys would overflow */
5509 if (3+zsetnum
> c
->argc
) {
5510 addReply(c
,shared
.syntaxerr
);
5514 /* read keys to be used for input */
5515 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5516 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5517 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5521 if (zsetobj
->type
!= REDIS_ZSET
) {
5523 addReply(c
,shared
.wrongtypeerr
);
5526 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5529 /* default all weights to 1 */
5530 src
[i
].weight
= 1.0;
5533 /* parse optional extra arguments */
5535 int remaining
= c
->argc
- j
;
5538 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5540 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5541 src
[i
].weight
= strtod(c
->argv
[j
]->ptr
, NULL
);
5543 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5545 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5546 aggregate
= REDIS_AGGR_SUM
;
5547 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5548 aggregate
= REDIS_AGGR_MIN
;
5549 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5550 aggregate
= REDIS_AGGR_MAX
;
5553 addReply(c
,shared
.syntaxerr
);
5559 addReply(c
,shared
.syntaxerr
);
5565 /* sort sets from the smallest to largest, this will improve our
5566 * algorithm's performance */
5567 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5569 dstobj
= createZsetObject();
5570 dstzset
= dstobj
->ptr
;
5572 if (op
== REDIS_OP_INTER
) {
5573 /* skip going over all entries if the smallest zset is NULL or empty */
5574 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5575 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5576 * from small to large, all src[i > 0].dict are non-empty too */
5577 di
= dictGetIterator(src
[0].dict
);
5578 while((de
= dictNext(di
)) != NULL
) {
5579 double *score
= zmalloc(sizeof(double)), value
;
5580 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5582 for (j
= 1; j
< zsetnum
; j
++) {
5583 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5585 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5586 zunionInterAggregate(score
, value
, aggregate
);
5592 /* skip entry when not present in every source dict */
5596 robj
*o
= dictGetEntryKey(de
);
5597 dictAdd(dstzset
->dict
,o
,score
);
5598 incrRefCount(o
); /* added to dictionary */
5599 zslInsert(dstzset
->zsl
,*score
,o
);
5600 incrRefCount(o
); /* added to skiplist */
5603 dictReleaseIterator(di
);
5605 } else if (op
== REDIS_OP_UNION
) {
5606 for (i
= 0; i
< zsetnum
; i
++) {
5607 if (!src
[i
].dict
) continue;
5609 di
= dictGetIterator(src
[i
].dict
);
5610 while((de
= dictNext(di
)) != NULL
) {
5611 /* skip key when already processed */
5612 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5614 double *score
= zmalloc(sizeof(double)), value
;
5615 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5617 /* because the zsets are sorted by size, its only possible
5618 * for sets at larger indices to hold this entry */
5619 for (j
= (i
+1); j
< zsetnum
; j
++) {
5620 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5622 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5623 zunionInterAggregate(score
, value
, aggregate
);
5627 robj
*o
= dictGetEntryKey(de
);
5628 dictAdd(dstzset
->dict
,o
,score
);
5629 incrRefCount(o
); /* added to dictionary */
5630 zslInsert(dstzset
->zsl
,*score
,o
);
5631 incrRefCount(o
); /* added to skiplist */
5633 dictReleaseIterator(di
);
5636 /* unknown operator */
5637 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5640 deleteKey(c
->db
,dstkey
);
5641 if (dstzset
->zsl
->length
) {
5642 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5643 incrRefCount(dstkey
);
5644 addReplyLong(c
, dstzset
->zsl
->length
);
5647 decrRefCount(dstzset
);
5648 addReply(c
, shared
.czero
);
5653 static void zunionCommand(redisClient
*c
) {
5654 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5657 static void zinterCommand(redisClient
*c
) {
5658 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5661 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5663 int start
= atoi(c
->argv
[2]->ptr
);
5664 int end
= atoi(c
->argv
[3]->ptr
);
5673 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5675 } else if (c
->argc
>= 5) {
5676 addReply(c
,shared
.syntaxerr
);
5680 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
||
5681 checkType(c
,o
,REDIS_ZSET
)) return;
5686 /* convert negative indexes */
5687 if (start
< 0) start
= llen
+start
;
5688 if (end
< 0) end
= llen
+end
;
5689 if (start
< 0) start
= 0;
5690 if (end
< 0) end
= 0;
5692 /* indexes sanity checks */
5693 if (start
> end
|| start
>= llen
) {
5694 /* Out of range start or start > end result in empty list */
5695 addReply(c
,shared
.emptymultibulk
);
5698 if (end
>= llen
) end
= llen
-1;
5699 rangelen
= (end
-start
)+1;
5701 /* check if starting point is trivial, before searching
5702 * the element in log(N) time */
5704 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5707 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5710 /* Return the result in form of a multi-bulk reply */
5711 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5712 withscores
? (rangelen
*2) : rangelen
));
5713 for (j
= 0; j
< rangelen
; j
++) {
5715 addReplyBulk(c
,ele
);
5717 addReplyDouble(c
,ln
->score
);
5718 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5722 static void zrangeCommand(redisClient
*c
) {
5723 zrangeGenericCommand(c
,0);
5726 static void zrevrangeCommand(redisClient
*c
) {
5727 zrangeGenericCommand(c
,1);
5730 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5731 * If justcount is non-zero, just the count is returned. */
5732 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5735 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5736 int offset
= 0, limit
= -1;
5740 /* Parse the min-max interval. If one of the values is prefixed
5741 * by the "(" character, it's considered "open". For instance
5742 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5743 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5744 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5745 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5748 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5750 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5751 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5754 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5757 /* Parse "WITHSCORES": note that if the command was called with
5758 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5759 * enter the following paths to parse WITHSCORES and LIMIT. */
5760 if (c
->argc
== 5 || c
->argc
== 8) {
5761 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5766 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5770 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5775 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5776 addReply(c
,shared
.syntaxerr
);
5778 } else if (c
->argc
== (7 + withscores
)) {
5779 offset
= atoi(c
->argv
[5]->ptr
);
5780 limit
= atoi(c
->argv
[6]->ptr
);
5781 if (offset
< 0) offset
= 0;
5784 /* Ok, lookup the key and get the range */
5785 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5787 addReply(c
,justcount
? shared
.czero
: shared
.nullmultibulk
);
5789 if (o
->type
!= REDIS_ZSET
) {
5790 addReply(c
,shared
.wrongtypeerr
);
5792 zset
*zsetobj
= o
->ptr
;
5793 zskiplist
*zsl
= zsetobj
->zsl
;
5795 robj
*ele
, *lenobj
= NULL
;
5796 unsigned long rangelen
= 0;
5798 /* Get the first node with the score >= min, or with
5799 * score > min if 'minex' is true. */
5800 ln
= zslFirstWithScore(zsl
,min
);
5801 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5804 /* No element matching the speciifed interval */
5805 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5809 /* We don't know in advance how many matching elements there
5810 * are in the list, so we push this object that will represent
5811 * the multi-bulk length in the output buffer, and will "fix"
5814 lenobj
= createObject(REDIS_STRING
,NULL
);
5816 decrRefCount(lenobj
);
5819 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5822 ln
= ln
->forward
[0];
5825 if (limit
== 0) break;
5828 addReplyBulk(c
,ele
);
5830 addReplyDouble(c
,ln
->score
);
5832 ln
= ln
->forward
[0];
5834 if (limit
> 0) limit
--;
5837 addReplyLong(c
,(long)rangelen
);
5839 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5840 withscores
? (rangelen
*2) : rangelen
);
5846 static void zrangebyscoreCommand(redisClient
*c
) {
5847 genericZrangebyscoreCommand(c
,0);
5850 static void zcountCommand(redisClient
*c
) {
5851 genericZrangebyscoreCommand(c
,1);
5854 static void zcardCommand(redisClient
*c
) {
5858 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5859 checkType(c
,o
,REDIS_ZSET
)) return;
5862 addReplyUlong(c
,zs
->zsl
->length
);
5865 static void zscoreCommand(redisClient
*c
) {
5870 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5871 checkType(c
,o
,REDIS_ZSET
)) return;
5874 de
= dictFind(zs
->dict
,c
->argv
[2]);
5876 addReply(c
,shared
.nullbulk
);
5878 double *score
= dictGetEntryVal(de
);
5880 addReplyDouble(c
,*score
);
5884 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5892 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5893 checkType(c
,o
,REDIS_ZSET
)) return;
5897 de
= dictFind(zs
->dict
,c
->argv
[2]);
5899 addReply(c
,shared
.nullbulk
);
5903 score
= dictGetEntryVal(de
);
5904 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
5907 addReplyLong(c
, zsl
->length
- rank
);
5909 addReplyLong(c
, rank
-1);
5912 addReply(c
,shared
.nullbulk
);
5916 static void zrankCommand(redisClient
*c
) {
5917 zrankGenericCommand(c
, 0);
5920 static void zrevrankCommand(redisClient
*c
) {
5921 zrankGenericCommand(c
, 1);
5924 /* =================================== Hashes =============================== */
5925 static void hsetCommand(redisClient
*c
) {
5927 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5930 o
= createHashObject();
5931 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5932 incrRefCount(c
->argv
[1]);
5934 if (o
->type
!= REDIS_HASH
) {
5935 addReply(c
,shared
.wrongtypeerr
);
5939 /* We want to convert the zipmap into an hash table right now if the
5940 * entry to be added is too big. Note that we check if the object
5941 * is integer encoded before to try fetching the length in the test below.
5942 * This is because integers are small, but currently stringObjectLen()
5943 * performs a slow conversion: not worth it. */
5944 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
5945 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
5946 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
5947 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
5948 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
5950 convertToRealHash(o
);
5953 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
5954 unsigned char *zm
= o
->ptr
;
5955 robj
*valobj
= getDecodedObject(c
->argv
[3]);
5957 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
5958 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
5959 decrRefCount(valobj
);
5962 /* And here there is the second check for hash conversion...
5963 * we want to do it only if the operation was not just an update as
5964 * zipmapLen() is O(N). */
5965 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
5966 convertToRealHash(o
);
5968 tryObjectEncoding(c
->argv
[2]);
5969 /* note that c->argv[3] is already encoded, as the latest arg
5970 * of a bulk command is always integer encoded if possible. */
5971 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
5972 incrRefCount(c
->argv
[2]);
5976 incrRefCount(c
->argv
[3]);
5979 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
5982 static void hincrbyCommand(redisClient
*c
) {
5984 long long value
= 0, incr
= 0;
5985 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5988 o
= createHashObject();
5989 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
5990 incrRefCount(c
->argv
[1]);
5992 if (o
->type
!= REDIS_HASH
) {
5993 addReply(c
,shared
.wrongtypeerr
);
5998 robj
*o_incr
= getDecodedObject(c
->argv
[3]);
5999 incr
= strtoll(o_incr
->ptr
, NULL
, 10);
6000 decrRefCount(o_incr
);
6002 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6003 unsigned char *zm
= o
->ptr
;
6004 unsigned char *zval
;
6007 /* Find value if already present in hash */
6008 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6010 /* strtoll needs the char* to have a trailing \0, but
6011 * the zipmap doesn't include them. */
6012 sds szval
= sdsnewlen(zval
, zvlen
);
6013 value
= strtoll(szval
,NULL
,10);
6018 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6019 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6020 (unsigned char*)svalue
,sdslen(svalue
),&update
);
6024 /* Check if the zipmap needs to be converted
6025 * if this was not an update. */
6026 if (!update
&& zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6027 convertToRealHash(o
);
6032 /* Find value if already present in hash */
6033 de
= dictFind(o
->ptr
,c
->argv
[2]);
6035 hval
= dictGetEntryVal(de
);
6036 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6037 value
= strtoll(hval
->ptr
,NULL
,10);
6038 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6039 value
= (long)hval
->ptr
;
6041 redisAssert(1 != 1);
6045 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6046 tryObjectEncoding(hval
);
6047 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6048 incrRefCount(c
->argv
[2]);
6053 addReplyLong(c
, value
);
6056 static void hgetCommand(redisClient
*c
) {
6059 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6060 checkType(c
,o
,REDIS_HASH
)) return;
6062 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6063 unsigned char *zm
= o
->ptr
;
6068 field
= getDecodedObject(c
->argv
[2]);
6069 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6070 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6071 addReplySds(c
,sdsnewlen(val
,vlen
));
6072 addReply(c
,shared
.crlf
);
6073 decrRefCount(field
);
6076 addReply(c
,shared
.nullbulk
);
6077 decrRefCount(field
);
6081 struct dictEntry
*de
;
6083 de
= dictFind(o
->ptr
,c
->argv
[2]);
6085 addReply(c
,shared
.nullbulk
);
6087 robj
*e
= dictGetEntryVal(de
);
6094 static void hdelCommand(redisClient
*c
) {
6098 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6099 checkType(c
,o
,REDIS_HASH
)) return;
6101 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6102 robj
*field
= getDecodedObject(c
->argv
[2]);
6104 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6105 (unsigned char*) field
->ptr
,
6106 sdslen(field
->ptr
), &deleted
);
6107 decrRefCount(field
);
6108 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6109 deleteKey(c
->db
,c
->argv
[1]);
6111 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6112 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6113 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6115 if (deleted
) server
.dirty
++;
6116 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6119 static void hlenCommand(redisClient
*c
) {
6123 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6124 checkType(c
,o
,REDIS_HASH
)) return;
6126 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6127 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6128 addReplyUlong(c
,len
);
6131 #define REDIS_GETALL_KEYS 1
6132 #define REDIS_GETALL_VALS 2
6133 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6135 unsigned long count
= 0;
6137 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullmultibulk
)) == NULL
6138 || checkType(c
,o
,REDIS_HASH
)) return;
6140 lenobj
= createObject(REDIS_STRING
,NULL
);
6142 decrRefCount(lenobj
);
6144 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6145 unsigned char *p
= zipmapRewind(o
->ptr
);
6146 unsigned char *field
, *val
;
6147 unsigned int flen
, vlen
;
6149 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6152 if (flags
& REDIS_GETALL_KEYS
) {
6153 aux
= createStringObject((char*)field
,flen
);
6154 addReplyBulk(c
,aux
);
6158 if (flags
& REDIS_GETALL_VALS
) {
6159 aux
= createStringObject((char*)val
,vlen
);
6160 addReplyBulk(c
,aux
);
6166 dictIterator
*di
= dictGetIterator(o
->ptr
);
6169 while((de
= dictNext(di
)) != NULL
) {
6170 robj
*fieldobj
= dictGetEntryKey(de
);
6171 robj
*valobj
= dictGetEntryVal(de
);
6173 if (flags
& REDIS_GETALL_KEYS
) {
6174 addReplyBulk(c
,fieldobj
);
6177 if (flags
& REDIS_GETALL_VALS
) {
6178 addReplyBulk(c
,valobj
);
6182 dictReleaseIterator(di
);
6184 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6187 static void hkeysCommand(redisClient
*c
) {
6188 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6191 static void hvalsCommand(redisClient
*c
) {
6192 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6195 static void hgetallCommand(redisClient
*c
) {
6196 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6199 static void hexistsCommand(redisClient
*c
) {
6203 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6204 checkType(c
,o
,REDIS_HASH
)) return;
6206 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6208 unsigned char *zm
= o
->ptr
;
6210 field
= getDecodedObject(c
->argv
[2]);
6211 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6212 decrRefCount(field
);
6214 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6216 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6219 static void convertToRealHash(robj
*o
) {
6220 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6221 unsigned int klen
, vlen
;
6222 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6224 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6225 p
= zipmapRewind(zm
);
6226 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6227 robj
*keyobj
, *valobj
;
6229 keyobj
= createStringObject((char*)key
,klen
);
6230 valobj
= createStringObject((char*)val
,vlen
);
6231 tryObjectEncoding(keyobj
);
6232 tryObjectEncoding(valobj
);
6233 dictAdd(dict
,keyobj
,valobj
);
6235 o
->encoding
= REDIS_ENCODING_HT
;
6240 /* ========================= Non type-specific commands ==================== */
6242 static void flushdbCommand(redisClient
*c
) {
6243 server
.dirty
+= dictSize(c
->db
->dict
);
6244 dictEmpty(c
->db
->dict
);
6245 dictEmpty(c
->db
->expires
);
6246 addReply(c
,shared
.ok
);
6249 static void flushallCommand(redisClient
*c
) {
6250 server
.dirty
+= emptyDb();
6251 addReply(c
,shared
.ok
);
6252 if (server
.bgsavechildpid
!= -1) {
6253 kill(server
.bgsavechildpid
,SIGKILL
);
6254 rdbRemoveTempFile(server
.bgsavechildpid
);
6256 rdbSave(server
.dbfilename
);
6260 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6261 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6263 so
->pattern
= pattern
;
6267 /* Return the value associated to the key with a name obtained
6268 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6269 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6273 int prefixlen
, sublen
, postfixlen
;
6274 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6278 char buf
[REDIS_SORTKEY_MAX
+1];
6281 /* If the pattern is "#" return the substitution object itself in order
6282 * to implement the "SORT ... GET #" feature. */
6283 spat
= pattern
->ptr
;
6284 if (spat
[0] == '#' && spat
[1] == '\0') {
6288 /* The substitution object may be specially encoded. If so we create
6289 * a decoded object on the fly. Otherwise getDecodedObject will just
6290 * increment the ref count, that we'll decrement later. */
6291 subst
= getDecodedObject(subst
);
6294 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6295 p
= strchr(spat
,'*');
6297 decrRefCount(subst
);
6302 sublen
= sdslen(ssub
);
6303 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6304 memcpy(keyname
.buf
,spat
,prefixlen
);
6305 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6306 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6307 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6308 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6310 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6311 decrRefCount(subst
);
6313 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6314 return lookupKeyRead(db
,&keyobj
);
6317 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6318 * the additional parameter is not standard but a BSD-specific we have to
6319 * pass sorting parameters via the global 'server' structure */
6320 static int sortCompare(const void *s1
, const void *s2
) {
6321 const redisSortObject
*so1
= s1
, *so2
= s2
;
6324 if (!server
.sort_alpha
) {
6325 /* Numeric sorting. Here it's trivial as we precomputed scores */
6326 if (so1
->u
.score
> so2
->u
.score
) {
6328 } else if (so1
->u
.score
< so2
->u
.score
) {
6334 /* Alphanumeric sorting */
6335 if (server
.sort_bypattern
) {
6336 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6337 /* At least one compare object is NULL */
6338 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6340 else if (so1
->u
.cmpobj
== NULL
)
6345 /* We have both the objects, use strcoll */
6346 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6349 /* Compare elements directly */
6352 dec1
= getDecodedObject(so1
->obj
);
6353 dec2
= getDecodedObject(so2
->obj
);
6354 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6359 return server
.sort_desc
? -cmp
: cmp
;
6362 /* The SORT command is the most complex command in Redis. Warning: this code
6363 * is optimized for speed and a bit less for readability */
6364 static void sortCommand(redisClient
*c
) {
6367 int desc
= 0, alpha
= 0;
6368 int limit_start
= 0, limit_count
= -1, start
, end
;
6369 int j
, dontsort
= 0, vectorlen
;
6370 int getop
= 0; /* GET operation counter */
6371 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6372 redisSortObject
*vector
; /* Resulting vector to sort */
6374 /* Lookup the key to sort. It must be of the right types */
6375 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6376 if (sortval
== NULL
) {
6377 addReply(c
,shared
.nullmultibulk
);
6380 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6381 sortval
->type
!= REDIS_ZSET
)
6383 addReply(c
,shared
.wrongtypeerr
);
6387 /* Create a list of operations to perform for every sorted element.
6388 * Operations can be GET/DEL/INCR/DECR */
6389 operations
= listCreate();
6390 listSetFreeMethod(operations
,zfree
);
6393 /* Now we need to protect sortval incrementing its count, in the future
6394 * SORT may have options able to overwrite/delete keys during the sorting
6395 * and the sorted key itself may get destroied */
6396 incrRefCount(sortval
);
6398 /* The SORT command has an SQL-alike syntax, parse it */
6399 while(j
< c
->argc
) {
6400 int leftargs
= c
->argc
-j
-1;
6401 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6403 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6405 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6407 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6408 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6409 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6411 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6412 storekey
= c
->argv
[j
+1];
6414 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6415 sortby
= c
->argv
[j
+1];
6416 /* If the BY pattern does not contain '*', i.e. it is constant,
6417 * we don't need to sort nor to lookup the weight keys. */
6418 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6420 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6421 listAddNodeTail(operations
,createSortOperation(
6422 REDIS_SORT_GET
,c
->argv
[j
+1]));
6426 decrRefCount(sortval
);
6427 listRelease(operations
);
6428 addReply(c
,shared
.syntaxerr
);
6434 /* Load the sorting vector with all the objects to sort */
6435 switch(sortval
->type
) {
6436 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6437 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6438 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6439 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6441 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6444 if (sortval
->type
== REDIS_LIST
) {
6445 list
*list
= sortval
->ptr
;
6449 listRewind(list
,&li
);
6450 while((ln
= listNext(&li
))) {
6451 robj
*ele
= ln
->value
;
6452 vector
[j
].obj
= ele
;
6453 vector
[j
].u
.score
= 0;
6454 vector
[j
].u
.cmpobj
= NULL
;
6462 if (sortval
->type
== REDIS_SET
) {
6465 zset
*zs
= sortval
->ptr
;
6469 di
= dictGetIterator(set
);
6470 while((setele
= dictNext(di
)) != NULL
) {
6471 vector
[j
].obj
= dictGetEntryKey(setele
);
6472 vector
[j
].u
.score
= 0;
6473 vector
[j
].u
.cmpobj
= NULL
;
6476 dictReleaseIterator(di
);
6478 redisAssert(j
== vectorlen
);
6480 /* Now it's time to load the right scores in the sorting vector */
6481 if (dontsort
== 0) {
6482 for (j
= 0; j
< vectorlen
; j
++) {
6486 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6487 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6489 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6491 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6492 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6494 /* Don't need to decode the object if it's
6495 * integer-encoded (the only encoding supported) so
6496 * far. We can just cast it */
6497 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6498 vector
[j
].u
.score
= (long)byval
->ptr
;
6500 redisAssert(1 != 1);
6505 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6506 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6508 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6509 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6511 redisAssert(1 != 1);
6518 /* We are ready to sort the vector... perform a bit of sanity check
6519 * on the LIMIT option too. We'll use a partial version of quicksort. */
6520 start
= (limit_start
< 0) ? 0 : limit_start
;
6521 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6522 if (start
>= vectorlen
) {
6523 start
= vectorlen
-1;
6526 if (end
>= vectorlen
) end
= vectorlen
-1;
6528 if (dontsort
== 0) {
6529 server
.sort_desc
= desc
;
6530 server
.sort_alpha
= alpha
;
6531 server
.sort_bypattern
= sortby
? 1 : 0;
6532 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6533 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6535 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6538 /* Send command output to the output buffer, performing the specified
6539 * GET/DEL/INCR/DECR operations if any. */
6540 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6541 if (storekey
== NULL
) {
6542 /* STORE option not specified, sent the sorting result to client */
6543 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6544 for (j
= start
; j
<= end
; j
++) {
6548 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6549 listRewind(operations
,&li
);
6550 while((ln
= listNext(&li
))) {
6551 redisSortOperation
*sop
= ln
->value
;
6552 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6555 if (sop
->type
== REDIS_SORT_GET
) {
6556 if (!val
|| val
->type
!= REDIS_STRING
) {
6557 addReply(c
,shared
.nullbulk
);
6559 addReplyBulk(c
,val
);
6562 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6567 robj
*listObject
= createListObject();
6568 list
*listPtr
= (list
*) listObject
->ptr
;
6570 /* STORE option specified, set the sorting result as a List object */
6571 for (j
= start
; j
<= end
; j
++) {
6576 listAddNodeTail(listPtr
,vector
[j
].obj
);
6577 incrRefCount(vector
[j
].obj
);
6579 listRewind(operations
,&li
);
6580 while((ln
= listNext(&li
))) {
6581 redisSortOperation
*sop
= ln
->value
;
6582 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6585 if (sop
->type
== REDIS_SORT_GET
) {
6586 if (!val
|| val
->type
!= REDIS_STRING
) {
6587 listAddNodeTail(listPtr
,createStringObject("",0));
6589 listAddNodeTail(listPtr
,val
);
6593 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6597 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6598 incrRefCount(storekey
);
6600 /* Note: we add 1 because the DB is dirty anyway since even if the
6601 * SORT result is empty a new key is set and maybe the old content
6603 server
.dirty
+= 1+outputlen
;
6604 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6608 decrRefCount(sortval
);
6609 listRelease(operations
);
6610 for (j
= 0; j
< vectorlen
; j
++) {
6611 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6612 decrRefCount(vector
[j
].u
.cmpobj
);
6617 /* Convert an amount of bytes into a human readable string in the form
6618 * of 100B, 2G, 100M, 4K, and so forth. */
6619 static void bytesToHuman(char *s
, unsigned long long n
) {
6624 sprintf(s
,"%lluB",n
);
6626 } else if (n
< (1024*1024)) {
6627 d
= (double)n
/(1024);
6628 sprintf(s
,"%.2fK",d
);
6629 } else if (n
< (1024LL*1024*1024)) {
6630 d
= (double)n
/(1024*1024);
6631 sprintf(s
,"%.2fM",d
);
6632 } else if (n
< (1024LL*1024*1024*1024)) {
6633 d
= (double)n
/(1024LL*1024*1024);
6634 sprintf(s
,"%.2fG",d
);
6638 /* Create the string returned by the INFO command. This is decoupled
6639 * by the INFO command itself as we need to report the same information
6640 * on memory corruption problems. */
6641 static sds
genRedisInfoString(void) {
6643 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6647 bytesToHuman(hmem
,zmalloc_used_memory());
6648 info
= sdscatprintf(sdsempty(),
6649 "redis_version:%s\r\n"
6651 "multiplexing_api:%s\r\n"
6652 "process_id:%ld\r\n"
6653 "uptime_in_seconds:%ld\r\n"
6654 "uptime_in_days:%ld\r\n"
6655 "connected_clients:%d\r\n"
6656 "connected_slaves:%d\r\n"
6657 "blocked_clients:%d\r\n"
6658 "used_memory:%zu\r\n"
6659 "used_memory_human:%s\r\n"
6660 "changes_since_last_save:%lld\r\n"
6661 "bgsave_in_progress:%d\r\n"
6662 "last_save_time:%ld\r\n"
6663 "bgrewriteaof_in_progress:%d\r\n"
6664 "total_connections_received:%lld\r\n"
6665 "total_commands_processed:%lld\r\n"
6666 "expired_keys:%lld\r\n"
6667 "hash_max_zipmap_entries:%ld\r\n"
6668 "hash_max_zipmap_value:%ld\r\n"
6669 "pubsub_classes:%ld\r\n"
6673 (sizeof(long) == 8) ? "64" : "32",
6678 listLength(server
.clients
)-listLength(server
.slaves
),
6679 listLength(server
.slaves
),
6680 server
.blpop_blocked_clients
,
6681 zmalloc_used_memory(),
6684 server
.bgsavechildpid
!= -1,
6686 server
.bgrewritechildpid
!= -1,
6687 server
.stat_numconnections
,
6688 server
.stat_numcommands
,
6689 server
.stat_expiredkeys
,
6690 server
.hash_max_zipmap_entries
,
6691 server
.hash_max_zipmap_value
,
6692 dictSize(server
.pubsub_classes
),
6693 server
.vm_enabled
!= 0,
6694 server
.masterhost
== NULL
? "master" : "slave"
6696 if (server
.masterhost
) {
6697 info
= sdscatprintf(info
,
6698 "master_host:%s\r\n"
6699 "master_port:%d\r\n"
6700 "master_link_status:%s\r\n"
6701 "master_last_io_seconds_ago:%d\r\n"
6704 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6706 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6709 if (server
.vm_enabled
) {
6711 info
= sdscatprintf(info
,
6712 "vm_conf_max_memory:%llu\r\n"
6713 "vm_conf_page_size:%llu\r\n"
6714 "vm_conf_pages:%llu\r\n"
6715 "vm_stats_used_pages:%llu\r\n"
6716 "vm_stats_swapped_objects:%llu\r\n"
6717 "vm_stats_swappin_count:%llu\r\n"
6718 "vm_stats_swappout_count:%llu\r\n"
6719 "vm_stats_io_newjobs_len:%lu\r\n"
6720 "vm_stats_io_processing_len:%lu\r\n"
6721 "vm_stats_io_processed_len:%lu\r\n"
6722 "vm_stats_io_active_threads:%lu\r\n"
6723 "vm_stats_blocked_clients:%lu\r\n"
6724 ,(unsigned long long) server
.vm_max_memory
,
6725 (unsigned long long) server
.vm_page_size
,
6726 (unsigned long long) server
.vm_pages
,
6727 (unsigned long long) server
.vm_stats_used_pages
,
6728 (unsigned long long) server
.vm_stats_swapped_objects
,
6729 (unsigned long long) server
.vm_stats_swapins
,
6730 (unsigned long long) server
.vm_stats_swapouts
,
6731 (unsigned long) listLength(server
.io_newjobs
),
6732 (unsigned long) listLength(server
.io_processing
),
6733 (unsigned long) listLength(server
.io_processed
),
6734 (unsigned long) server
.io_active_threads
,
6735 (unsigned long) server
.vm_blocked_clients
6739 for (j
= 0; j
< server
.dbnum
; j
++) {
6740 long long keys
, vkeys
;
6742 keys
= dictSize(server
.db
[j
].dict
);
6743 vkeys
= dictSize(server
.db
[j
].expires
);
6744 if (keys
|| vkeys
) {
6745 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6752 static void infoCommand(redisClient
*c
) {
6753 sds info
= genRedisInfoString();
6754 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6755 (unsigned long)sdslen(info
)));
6756 addReplySds(c
,info
);
6757 addReply(c
,shared
.crlf
);
6760 static void monitorCommand(redisClient
*c
) {
6761 /* ignore MONITOR if aleady slave or in monitor mode */
6762 if (c
->flags
& REDIS_SLAVE
) return;
6764 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6766 listAddNodeTail(server
.monitors
,c
);
6767 addReply(c
,shared
.ok
);
6770 /* ================================= Expire ================================= */
6771 static int removeExpire(redisDb
*db
, robj
*key
) {
6772 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6779 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6780 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6788 /* Return the expire time of the specified key, or -1 if no expire
6789 * is associated with this key (i.e. the key is non volatile) */
6790 static time_t getExpire(redisDb
*db
, robj
*key
) {
6793 /* No expire? return ASAP */
6794 if (dictSize(db
->expires
) == 0 ||
6795 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
6797 return (time_t) dictGetEntryVal(de
);
6800 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
6804 /* No expire? return ASAP */
6805 if (dictSize(db
->expires
) == 0 ||
6806 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6808 /* Lookup the expire */
6809 when
= (time_t) dictGetEntryVal(de
);
6810 if (time(NULL
) <= when
) return 0;
6812 /* Delete the key */
6813 dictDelete(db
->expires
,key
);
6814 server
.stat_expiredkeys
++;
6815 return dictDelete(db
->dict
,key
) == DICT_OK
;
6818 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
6821 /* No expire? return ASAP */
6822 if (dictSize(db
->expires
) == 0 ||
6823 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
6825 /* Delete the key */
6827 server
.stat_expiredkeys
++;
6828 dictDelete(db
->expires
,key
);
6829 return dictDelete(db
->dict
,key
) == DICT_OK
;
6832 static void expireGenericCommand(redisClient
*c
, robj
*key
, time_t seconds
) {
6835 de
= dictFind(c
->db
->dict
,key
);
6837 addReply(c
,shared
.czero
);
6841 if (deleteKey(c
->db
,key
)) server
.dirty
++;
6842 addReply(c
, shared
.cone
);
6845 time_t when
= time(NULL
)+seconds
;
6846 if (setExpire(c
->db
,key
,when
)) {
6847 addReply(c
,shared
.cone
);
6850 addReply(c
,shared
.czero
);
6856 static void expireCommand(redisClient
*c
) {
6857 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10));
6860 static void expireatCommand(redisClient
*c
) {
6861 expireGenericCommand(c
,c
->argv
[1],strtol(c
->argv
[2]->ptr
,NULL
,10)-time(NULL
));
6864 static void ttlCommand(redisClient
*c
) {
6868 expire
= getExpire(c
->db
,c
->argv
[1]);
6870 ttl
= (int) (expire
-time(NULL
));
6871 if (ttl
< 0) ttl
= -1;
6873 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
6876 /* ================================ MULTI/EXEC ============================== */
6878 /* Client state initialization for MULTI/EXEC */
6879 static void initClientMultiState(redisClient
*c
) {
6880 c
->mstate
.commands
= NULL
;
6881 c
->mstate
.count
= 0;
6884 /* Release all the resources associated with MULTI/EXEC state */
6885 static void freeClientMultiState(redisClient
*c
) {
6888 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6890 multiCmd
*mc
= c
->mstate
.commands
+j
;
6892 for (i
= 0; i
< mc
->argc
; i
++)
6893 decrRefCount(mc
->argv
[i
]);
6896 zfree(c
->mstate
.commands
);
6899 /* Add a new command into the MULTI commands queue */
6900 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
6904 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
6905 sizeof(multiCmd
)*(c
->mstate
.count
+1));
6906 mc
= c
->mstate
.commands
+c
->mstate
.count
;
6909 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
6910 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
6911 for (j
= 0; j
< c
->argc
; j
++)
6912 incrRefCount(mc
->argv
[j
]);
6916 static void multiCommand(redisClient
*c
) {
6917 c
->flags
|= REDIS_MULTI
;
6918 addReply(c
,shared
.ok
);
6921 static void discardCommand(redisClient
*c
) {
6922 if (!(c
->flags
& REDIS_MULTI
)) {
6923 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
6927 freeClientMultiState(c
);
6928 initClientMultiState(c
);
6929 c
->flags
&= (~REDIS_MULTI
);
6930 addReply(c
,shared
.ok
);
6933 static void execCommand(redisClient
*c
) {
6938 if (!(c
->flags
& REDIS_MULTI
)) {
6939 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
6943 orig_argv
= c
->argv
;
6944 orig_argc
= c
->argc
;
6945 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
6946 for (j
= 0; j
< c
->mstate
.count
; j
++) {
6947 c
->argc
= c
->mstate
.commands
[j
].argc
;
6948 c
->argv
= c
->mstate
.commands
[j
].argv
;
6949 call(c
,c
->mstate
.commands
[j
].cmd
);
6951 c
->argv
= orig_argv
;
6952 c
->argc
= orig_argc
;
6953 freeClientMultiState(c
);
6954 initClientMultiState(c
);
6955 c
->flags
&= (~REDIS_MULTI
);
6958 /* =========================== Blocking Operations ========================= */
6960 /* Currently Redis blocking operations support is limited to list POP ops,
6961 * so the current implementation is not fully generic, but it is also not
6962 * completely specific so it will not require a rewrite to support new
6963 * kind of blocking operations in the future.
6965 * Still it's important to note that list blocking operations can be already
6966 * used as a notification mechanism in order to implement other blocking
6967 * operations at application level, so there must be a very strong evidence
6968 * of usefulness and generality before new blocking operations are implemented.
6970 * This is how the current blocking POP works, we use BLPOP as example:
6971 * - If the user calls BLPOP and the key exists and contains a non empty list
6972 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
6973 * if there is not to block.
6974 * - If instead BLPOP is called and the key does not exists or the list is
6975 * empty we need to block. In order to do so we remove the notification for
6976 * new data to read in the client socket (so that we'll not serve new
6977 * requests if the blocking request is not served). Also we put the client
6978 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
6979 * blocking for this keys.
6980 * - If a PUSH operation against a key with blocked clients waiting is
6981 * performed, we serve the first in the list: basically instead to push
6982 * the new element inside the list we return it to the (first / oldest)
6983 * blocking client, unblock the client, and remove it form the list.
6985 * The above comment and the source code should be enough in order to understand
6986 * the implementation and modify / fix it later.
6989 /* Set a client in blocking mode for the specified key, with the specified
6991 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
6996 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
6997 c
->blockingkeysnum
= numkeys
;
6998 c
->blockingto
= timeout
;
6999 for (j
= 0; j
< numkeys
; j
++) {
7000 /* Add the key in the client structure, to map clients -> keys */
7001 c
->blockingkeys
[j
] = keys
[j
];
7002 incrRefCount(keys
[j
]);
7004 /* And in the other "side", to map keys -> clients */
7005 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7009 /* For every key we take a list of clients blocked for it */
7011 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7012 incrRefCount(keys
[j
]);
7013 assert(retval
== DICT_OK
);
7015 l
= dictGetEntryVal(de
);
7017 listAddNodeTail(l
,c
);
7019 /* Mark the client as a blocked client */
7020 c
->flags
|= REDIS_BLOCKED
;
7021 server
.blpop_blocked_clients
++;
7024 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7025 static void unblockClientWaitingData(redisClient
*c
) {
7030 assert(c
->blockingkeys
!= NULL
);
7031 /* The client may wait for multiple keys, so unblock it for every key. */
7032 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7033 /* Remove this client from the list of clients waiting for this key. */
7034 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7036 l
= dictGetEntryVal(de
);
7037 listDelNode(l
,listSearchKey(l
,c
));
7038 /* If the list is empty we need to remove it to avoid wasting memory */
7039 if (listLength(l
) == 0)
7040 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7041 decrRefCount(c
->blockingkeys
[j
]);
7043 /* Cleanup the client structure */
7044 zfree(c
->blockingkeys
);
7045 c
->blockingkeys
= NULL
;
7046 c
->flags
&= (~REDIS_BLOCKED
);
7047 server
.blpop_blocked_clients
--;
7048 /* We want to process data if there is some command waiting
7049 * in the input buffer. Note that this is safe even if
7050 * unblockClientWaitingData() gets called from freeClient() because
7051 * freeClient() will be smart enough to call this function
7052 * *after* c->querybuf was set to NULL. */
7053 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7056 /* This should be called from any function PUSHing into lists.
7057 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7058 * 'ele' is the element pushed.
7060 * If the function returns 0 there was no client waiting for a list push
7063 * If the function returns 1 there was a client waiting for a list push
7064 * against this key, the element was passed to this client thus it's not
7065 * needed to actually add it to the list and the caller should return asap. */
7066 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7067 struct dictEntry
*de
;
7068 redisClient
*receiver
;
7072 de
= dictFind(c
->db
->blockingkeys
,key
);
7073 if (de
== NULL
) return 0;
7074 l
= dictGetEntryVal(de
);
7077 receiver
= ln
->value
;
7079 addReplySds(receiver
,sdsnew("*2\r\n"));
7080 addReplyBulk(receiver
,key
);
7081 addReplyBulk(receiver
,ele
);
7082 unblockClientWaitingData(receiver
);
7086 /* Blocking RPOP/LPOP */
7087 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7092 for (j
= 1; j
< c
->argc
-1; j
++) {
7093 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7095 if (o
->type
!= REDIS_LIST
) {
7096 addReply(c
,shared
.wrongtypeerr
);
7099 list
*list
= o
->ptr
;
7100 if (listLength(list
) != 0) {
7101 /* If the list contains elements fall back to the usual
7102 * non-blocking POP operation */
7103 robj
*argv
[2], **orig_argv
;
7106 /* We need to alter the command arguments before to call
7107 * popGenericCommand() as the command takes a single key. */
7108 orig_argv
= c
->argv
;
7109 orig_argc
= c
->argc
;
7110 argv
[1] = c
->argv
[j
];
7114 /* Also the return value is different, we need to output
7115 * the multi bulk reply header and the key name. The
7116 * "real" command will add the last element (the value)
7117 * for us. If this souds like an hack to you it's just
7118 * because it is... */
7119 addReplySds(c
,sdsnew("*2\r\n"));
7120 addReplyBulk(c
,argv
[1]);
7121 popGenericCommand(c
,where
);
7123 /* Fix the client structure with the original stuff */
7124 c
->argv
= orig_argv
;
7125 c
->argc
= orig_argc
;
7131 /* If the list is empty or the key does not exists we must block */
7132 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7133 if (timeout
> 0) timeout
+= time(NULL
);
7134 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7137 static void blpopCommand(redisClient
*c
) {
7138 blockingPopGenericCommand(c
,REDIS_HEAD
);
7141 static void brpopCommand(redisClient
*c
) {
7142 blockingPopGenericCommand(c
,REDIS_TAIL
);
7145 /* =============================== Replication ============================= */
7147 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7148 ssize_t nwritten
, ret
= size
;
7149 time_t start
= time(NULL
);
7153 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7154 nwritten
= write(fd
,ptr
,size
);
7155 if (nwritten
== -1) return -1;
7159 if ((time(NULL
)-start
) > timeout
) {
7167 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7168 ssize_t nread
, totread
= 0;
7169 time_t start
= time(NULL
);
7173 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7174 nread
= read(fd
,ptr
,size
);
7175 if (nread
== -1) return -1;
7180 if ((time(NULL
)-start
) > timeout
) {
7188 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7195 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7198 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7209 static void syncCommand(redisClient
*c
) {
7210 /* ignore SYNC if aleady slave or in monitor mode */
7211 if (c
->flags
& REDIS_SLAVE
) return;
7213 /* SYNC can't be issued when the server has pending data to send to
7214 * the client about already issued commands. We need a fresh reply
7215 * buffer registering the differences between the BGSAVE and the current
7216 * dataset, so that we can copy to other slaves if needed. */
7217 if (listLength(c
->reply
) != 0) {
7218 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7222 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7223 /* Here we need to check if there is a background saving operation
7224 * in progress, or if it is required to start one */
7225 if (server
.bgsavechildpid
!= -1) {
7226 /* Ok a background save is in progress. Let's check if it is a good
7227 * one for replication, i.e. if there is another slave that is
7228 * registering differences since the server forked to save */
7233 listRewind(server
.slaves
,&li
);
7234 while((ln
= listNext(&li
))) {
7236 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7239 /* Perfect, the server is already registering differences for
7240 * another slave. Set the right state, and copy the buffer. */
7241 listRelease(c
->reply
);
7242 c
->reply
= listDup(slave
->reply
);
7243 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7244 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7246 /* No way, we need to wait for the next BGSAVE in order to
7247 * register differences */
7248 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7249 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7252 /* Ok we don't have a BGSAVE in progress, let's start one */
7253 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7254 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7255 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7256 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7259 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7262 c
->flags
|= REDIS_SLAVE
;
7264 listAddNodeTail(server
.slaves
,c
);
7268 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7269 redisClient
*slave
= privdata
;
7271 REDIS_NOTUSED(mask
);
7272 char buf
[REDIS_IOBUF_LEN
];
7273 ssize_t nwritten
, buflen
;
7275 if (slave
->repldboff
== 0) {
7276 /* Write the bulk write count before to transfer the DB. In theory here
7277 * we don't know how much room there is in the output buffer of the
7278 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7279 * operations) will never be smaller than the few bytes we need. */
7282 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7284 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7292 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7293 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7295 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7296 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7300 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7301 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7306 slave
->repldboff
+= nwritten
;
7307 if (slave
->repldboff
== slave
->repldbsize
) {
7308 close(slave
->repldbfd
);
7309 slave
->repldbfd
= -1;
7310 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7311 slave
->replstate
= REDIS_REPL_ONLINE
;
7312 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7313 sendReplyToClient
, slave
) == AE_ERR
) {
7317 addReplySds(slave
,sdsempty());
7318 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7322 /* This function is called at the end of every backgrond saving.
7323 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7324 * otherwise REDIS_ERR is passed to the function.
7326 * The goal of this function is to handle slaves waiting for a successful
7327 * background saving in order to perform non-blocking synchronization. */
7328 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7330 int startbgsave
= 0;
7333 listRewind(server
.slaves
,&li
);
7334 while((ln
= listNext(&li
))) {
7335 redisClient
*slave
= ln
->value
;
7337 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7339 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7340 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7341 struct redis_stat buf
;
7343 if (bgsaveerr
!= REDIS_OK
) {
7345 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7348 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7349 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7351 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7354 slave
->repldboff
= 0;
7355 slave
->repldbsize
= buf
.st_size
;
7356 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7357 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7358 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7365 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7368 listRewind(server
.slaves
,&li
);
7369 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7370 while((ln
= listNext(&li
))) {
7371 redisClient
*slave
= ln
->value
;
7373 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7380 static int syncWithMaster(void) {
7381 char buf
[1024], tmpfile
[256], authcmd
[1024];
7383 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7384 int dfd
, maxtries
= 5;
7387 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7392 /* AUTH with the master if required. */
7393 if(server
.masterauth
) {
7394 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7395 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7397 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7401 /* Read the AUTH result. */
7402 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7404 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7408 if (buf
[0] != '+') {
7410 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7415 /* Issue the SYNC command */
7416 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7418 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7422 /* Read the bulk write count */
7423 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7425 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7429 if (buf
[0] != '$') {
7431 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7434 dumpsize
= strtol(buf
+1,NULL
,10);
7435 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7436 /* Read the bulk write data on a temp file */
7438 snprintf(tmpfile
,256,
7439 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7440 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7441 if (dfd
!= -1) break;
7446 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7450 int nread
, nwritten
;
7452 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7454 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7460 nwritten
= write(dfd
,buf
,nread
);
7461 if (nwritten
== -1) {
7462 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7470 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7471 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7477 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7478 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7482 server
.master
= createClient(fd
);
7483 server
.master
->flags
|= REDIS_MASTER
;
7484 server
.master
->authenticated
= 1;
7485 server
.replstate
= REDIS_REPL_CONNECTED
;
7489 static void slaveofCommand(redisClient
*c
) {
7490 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7491 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7492 if (server
.masterhost
) {
7493 sdsfree(server
.masterhost
);
7494 server
.masterhost
= NULL
;
7495 if (server
.master
) freeClient(server
.master
);
7496 server
.replstate
= REDIS_REPL_NONE
;
7497 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7500 sdsfree(server
.masterhost
);
7501 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7502 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7503 if (server
.master
) freeClient(server
.master
);
7504 server
.replstate
= REDIS_REPL_CONNECT
;
7505 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7506 server
.masterhost
, server
.masterport
);
7508 addReply(c
,shared
.ok
);
7511 /* ============================ Maxmemory directive ======================== */
7513 /* Try to free one object form the pre-allocated objects free list.
7514 * This is useful under low mem conditions as by default we take 1 million
7515 * free objects allocated. On success REDIS_OK is returned, otherwise
7517 static int tryFreeOneObjectFromFreelist(void) {
7520 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7521 if (listLength(server
.objfreelist
)) {
7522 listNode
*head
= listFirst(server
.objfreelist
);
7523 o
= listNodeValue(head
);
7524 listDelNode(server
.objfreelist
,head
);
7525 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7529 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7534 /* This function gets called when 'maxmemory' is set on the config file to limit
7535 * the max memory used by the server, and we are out of memory.
7536 * This function will try to, in order:
7538 * - Free objects from the free list
7539 * - Try to remove keys with an EXPIRE set
7541 * It is not possible to free enough memory to reach used-memory < maxmemory
7542 * the server will start refusing commands that will enlarge even more the
7545 static void freeMemoryIfNeeded(void) {
7546 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7547 int j
, k
, freed
= 0;
7549 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7550 for (j
= 0; j
< server
.dbnum
; j
++) {
7552 robj
*minkey
= NULL
;
7553 struct dictEntry
*de
;
7555 if (dictSize(server
.db
[j
].expires
)) {
7557 /* From a sample of three keys drop the one nearest to
7558 * the natural expire */
7559 for (k
= 0; k
< 3; k
++) {
7562 de
= dictGetRandomKey(server
.db
[j
].expires
);
7563 t
= (time_t) dictGetEntryVal(de
);
7564 if (minttl
== -1 || t
< minttl
) {
7565 minkey
= dictGetEntryKey(de
);
7569 deleteKey(server
.db
+j
,minkey
);
7572 if (!freed
) return; /* nothing to free... */
7576 /* ============================== Append Only file ========================== */
7578 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7579 sds buf
= sdsempty();
7585 /* The DB this command was targetting is not the same as the last command
7586 * we appendend. To issue a SELECT command is needed. */
7587 if (dictid
!= server
.appendseldb
) {
7590 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7591 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7592 (unsigned long)strlen(seldb
),seldb
);
7593 server
.appendseldb
= dictid
;
7596 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7597 * EXPIREs into EXPIREATs calls */
7598 if (cmd
->proc
== expireCommand
) {
7601 tmpargv
[0] = createStringObject("EXPIREAT",8);
7602 tmpargv
[1] = argv
[1];
7603 incrRefCount(argv
[1]);
7604 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7605 tmpargv
[2] = createObject(REDIS_STRING
,
7606 sdscatprintf(sdsempty(),"%ld",when
));
7610 /* Append the actual command */
7611 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7612 for (j
= 0; j
< argc
; j
++) {
7615 o
= getDecodedObject(o
);
7616 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7617 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7618 buf
= sdscatlen(buf
,"\r\n",2);
7622 /* Free the objects from the modified argv for EXPIREAT */
7623 if (cmd
->proc
== expireCommand
) {
7624 for (j
= 0; j
< 3; j
++)
7625 decrRefCount(argv
[j
]);
7628 /* We want to perform a single write. This should be guaranteed atomic
7629 * at least if the filesystem we are writing is a real physical one.
7630 * While this will save us against the server being killed I don't think
7631 * there is much to do about the whole server stopping for power problems
7633 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7634 if (nwritten
!= (signed)sdslen(buf
)) {
7635 /* Ooops, we are in troubles. The best thing to do for now is
7636 * to simply exit instead to give the illusion that everything is
7637 * working as expected. */
7638 if (nwritten
== -1) {
7639 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7641 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7645 /* If a background append only file rewriting is in progress we want to
7646 * accumulate the differences between the child DB and the current one
7647 * in a buffer, so that when the child process will do its work we
7648 * can append the differences to the new append only file. */
7649 if (server
.bgrewritechildpid
!= -1)
7650 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7654 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7655 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7656 now
-server
.lastfsync
> 1))
7658 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7659 server
.lastfsync
= now
;
7663 /* In Redis commands are always executed in the context of a client, so in
7664 * order to load the append only file we need to create a fake client. */
7665 static struct redisClient
*createFakeClient(void) {
7666 struct redisClient
*c
= zmalloc(sizeof(*c
));
7670 c
->querybuf
= sdsempty();
7674 /* We set the fake client as a slave waiting for the synchronization
7675 * so that Redis will not try to send replies to this client. */
7676 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7677 c
->reply
= listCreate();
7678 listSetFreeMethod(c
->reply
,decrRefCount
);
7679 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7683 static void freeFakeClient(struct redisClient
*c
) {
7684 sdsfree(c
->querybuf
);
7685 listRelease(c
->reply
);
7689 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7690 * error (the append only file is zero-length) REDIS_ERR is returned. On
7691 * fatal error an error message is logged and the program exists. */
7692 int loadAppendOnlyFile(char *filename
) {
7693 struct redisClient
*fakeClient
;
7694 FILE *fp
= fopen(filename
,"r");
7695 struct redis_stat sb
;
7696 unsigned long long loadedkeys
= 0;
7698 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7702 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7706 fakeClient
= createFakeClient();
7713 struct redisCommand
*cmd
;
7715 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7721 if (buf
[0] != '*') goto fmterr
;
7723 argv
= zmalloc(sizeof(robj
*)*argc
);
7724 for (j
= 0; j
< argc
; j
++) {
7725 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7726 if (buf
[0] != '$') goto fmterr
;
7727 len
= strtol(buf
+1,NULL
,10);
7728 argsds
= sdsnewlen(NULL
,len
);
7729 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7730 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7731 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7734 /* Command lookup */
7735 cmd
= lookupCommand(argv
[0]->ptr
);
7737 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7740 /* Try object sharing and encoding */
7741 if (server
.shareobjects
) {
7743 for(j
= 1; j
< argc
; j
++)
7744 argv
[j
] = tryObjectSharing(argv
[j
]);
7746 if (cmd
->flags
& REDIS_CMD_BULK
)
7747 tryObjectEncoding(argv
[argc
-1]);
7748 /* Run the command in the context of a fake client */
7749 fakeClient
->argc
= argc
;
7750 fakeClient
->argv
= argv
;
7751 cmd
->proc(fakeClient
);
7752 /* Discard the reply objects list from the fake client */
7753 while(listLength(fakeClient
->reply
))
7754 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7755 /* Clean up, ready for the next command */
7756 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7758 /* Handle swapping while loading big datasets when VM is on */
7760 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7761 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7762 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7767 freeFakeClient(fakeClient
);
7772 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7774 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7778 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7782 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7783 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7787 /* Avoid the incr/decr ref count business if possible to help
7788 * copy-on-write (we are often in a child process when this function
7790 * Also makes sure that key objects don't get incrRefCount-ed when VM
7792 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
7793 obj
= getDecodedObject(obj
);
7796 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
7797 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
7798 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
7800 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
7801 if (decrrc
) decrRefCount(obj
);
7804 if (decrrc
) decrRefCount(obj
);
7808 /* Write binary-safe string into a file in the bulkformat
7809 * $<count>\r\n<payload>\r\n */
7810 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
7813 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
7814 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7815 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
7816 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
7820 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
7821 static int fwriteBulkDouble(FILE *fp
, double d
) {
7822 char buf
[128], dbuf
[128];
7824 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
7825 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
7826 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7827 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
7831 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
7832 static int fwriteBulkLong(FILE *fp
, long l
) {
7833 char buf
[128], lbuf
[128];
7835 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
7836 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
7837 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
7838 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
7842 /* Write a sequence of commands able to fully rebuild the dataset into
7843 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
7844 static int rewriteAppendOnlyFile(char *filename
) {
7845 dictIterator
*di
= NULL
;
7850 time_t now
= time(NULL
);
7852 /* Note that we have to use a different temp name here compared to the
7853 * one used by rewriteAppendOnlyFileBackground() function. */
7854 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
7855 fp
= fopen(tmpfile
,"w");
7857 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
7860 for (j
= 0; j
< server
.dbnum
; j
++) {
7861 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
7862 redisDb
*db
= server
.db
+j
;
7864 if (dictSize(d
) == 0) continue;
7865 di
= dictGetIterator(d
);
7871 /* SELECT the new DB */
7872 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
7873 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
7875 /* Iterate this DB writing every entry */
7876 while((de
= dictNext(di
)) != NULL
) {
7881 key
= dictGetEntryKey(de
);
7882 /* If the value for this key is swapped, load a preview in memory.
7883 * We use a "swapped" flag to remember if we need to free the
7884 * value object instead to just increment the ref count anyway
7885 * in order to avoid copy-on-write of pages if we are forked() */
7886 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
7887 key
->storage
== REDIS_VM_SWAPPING
) {
7888 o
= dictGetEntryVal(de
);
7891 o
= vmPreviewObject(key
);
7894 expiretime
= getExpire(db
,key
);
7896 /* Save the key and associated value */
7897 if (o
->type
== REDIS_STRING
) {
7898 /* Emit a SET command */
7899 char cmd
[]="*3\r\n$3\r\nSET\r\n";
7900 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7902 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7903 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
7904 } else if (o
->type
== REDIS_LIST
) {
7905 /* Emit the RPUSHes needed to rebuild the list */
7906 list
*list
= o
->ptr
;
7910 listRewind(list
,&li
);
7911 while((ln
= listNext(&li
))) {
7912 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
7913 robj
*eleobj
= listNodeValue(ln
);
7915 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7916 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7917 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7919 } else if (o
->type
== REDIS_SET
) {
7920 /* Emit the SADDs needed to rebuild the set */
7922 dictIterator
*di
= dictGetIterator(set
);
7925 while((de
= dictNext(di
)) != NULL
) {
7926 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
7927 robj
*eleobj
= dictGetEntryKey(de
);
7929 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7930 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7931 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7933 dictReleaseIterator(di
);
7934 } else if (o
->type
== REDIS_ZSET
) {
7935 /* Emit the ZADDs needed to rebuild the sorted set */
7937 dictIterator
*di
= dictGetIterator(zs
->dict
);
7940 while((de
= dictNext(di
)) != NULL
) {
7941 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
7942 robj
*eleobj
= dictGetEntryKey(de
);
7943 double *score
= dictGetEntryVal(de
);
7945 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7946 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7947 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
7948 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
7950 dictReleaseIterator(di
);
7951 } else if (o
->type
== REDIS_HASH
) {
7952 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
7954 /* Emit the HSETs needed to rebuild the hash */
7955 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7956 unsigned char *p
= zipmapRewind(o
->ptr
);
7957 unsigned char *field
, *val
;
7958 unsigned int flen
, vlen
;
7960 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
7961 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7962 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7963 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
7965 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
7969 dictIterator
*di
= dictGetIterator(o
->ptr
);
7972 while((de
= dictNext(di
)) != NULL
) {
7973 robj
*field
= dictGetEntryKey(de
);
7974 robj
*val
= dictGetEntryVal(de
);
7976 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7977 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7978 if (fwriteBulkObject(fp
,field
) == -1) return -1;
7979 if (fwriteBulkObject(fp
,val
) == -1) return -1;
7981 dictReleaseIterator(di
);
7986 /* Save the expire time */
7987 if (expiretime
!= -1) {
7988 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
7989 /* If this key is already expired skip it */
7990 if (expiretime
< now
) continue;
7991 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
7992 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
7993 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
7995 if (swapped
) decrRefCount(o
);
7997 dictReleaseIterator(di
);
8000 /* Make sure data will not remain on the OS's output buffers */
8005 /* Use RENAME to make sure the DB file is changed atomically only
8006 * if the generate DB file is ok. */
8007 if (rename(tmpfile
,filename
) == -1) {
8008 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8012 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8018 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8019 if (di
) dictReleaseIterator(di
);
8023 /* This is how rewriting of the append only file in background works:
8025 * 1) The user calls BGREWRITEAOF
8026 * 2) Redis calls this function, that forks():
8027 * 2a) the child rewrite the append only file in a temp file.
8028 * 2b) the parent accumulates differences in server.bgrewritebuf.
8029 * 3) When the child finished '2a' exists.
8030 * 4) The parent will trap the exit code, if it's OK, will append the
8031 * data accumulated into server.bgrewritebuf into the temp file, and
8032 * finally will rename(2) the temp file in the actual file name.
8033 * The the new file is reopened as the new append only file. Profit!
8035 static int rewriteAppendOnlyFileBackground(void) {
8038 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8039 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8040 if ((childpid
= fork()) == 0) {
8044 if (server
.vm_enabled
) vmReopenSwapFile();
8046 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8047 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8054 if (childpid
== -1) {
8055 redisLog(REDIS_WARNING
,
8056 "Can't rewrite append only file in background: fork: %s",
8060 redisLog(REDIS_NOTICE
,
8061 "Background append only file rewriting started by pid %d",childpid
);
8062 server
.bgrewritechildpid
= childpid
;
8063 /* We set appendseldb to -1 in order to force the next call to the
8064 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8065 * accumulated by the parent into server.bgrewritebuf will start
8066 * with a SELECT statement and it will be safe to merge. */
8067 server
.appendseldb
= -1;
8070 return REDIS_OK
; /* unreached */
8073 static void bgrewriteaofCommand(redisClient
*c
) {
8074 if (server
.bgrewritechildpid
!= -1) {
8075 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8078 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8079 char *status
= "+Background append only file rewriting started\r\n";
8080 addReplySds(c
,sdsnew(status
));
8082 addReply(c
,shared
.err
);
8086 static void aofRemoveTempFile(pid_t childpid
) {
8089 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8093 /* Virtual Memory is composed mainly of two subsystems:
8094 * - Blocking Virutal Memory
8095 * - Threaded Virtual Memory I/O
8096 * The two parts are not fully decoupled, but functions are split among two
8097 * different sections of the source code (delimited by comments) in order to
8098 * make more clear what functionality is about the blocking VM and what about
8099 * the threaded (not blocking) VM.
8103 * Redis VM is a blocking VM (one that blocks reading swapped values from
8104 * disk into memory when a value swapped out is needed in memory) that is made
8105 * unblocking by trying to examine the command argument vector in order to
8106 * load in background values that will likely be needed in order to exec
8107 * the command. The command is executed only once all the relevant keys
8108 * are loaded into memory.
8110 * This basically is almost as simple of a blocking VM, but almost as parallel
8111 * as a fully non-blocking VM.
8114 /* =================== Virtual Memory - Blocking Side ====================== */
8116 /* substitute the first occurrence of '%p' with the process pid in the
8117 * swap file name. */
8118 static void expandVmSwapFilename(void) {
8119 char *p
= strstr(server
.vm_swap_file
,"%p");
8125 new = sdscat(new,server
.vm_swap_file
);
8126 new = sdscatprintf(new,"%ld",(long) getpid());
8127 new = sdscat(new,p
+2);
8128 zfree(server
.vm_swap_file
);
8129 server
.vm_swap_file
= new;
8132 static void vmInit(void) {
8137 if (server
.vm_max_threads
!= 0)
8138 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8140 expandVmSwapFilename();
8141 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8142 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8143 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8145 if (server
.vm_fp
== NULL
) {
8146 redisLog(REDIS_WARNING
,
8147 "Impossible to open the swap file: %s. Exiting.",
8151 server
.vm_fd
= fileno(server
.vm_fp
);
8152 server
.vm_next_page
= 0;
8153 server
.vm_near_pages
= 0;
8154 server
.vm_stats_used_pages
= 0;
8155 server
.vm_stats_swapped_objects
= 0;
8156 server
.vm_stats_swapouts
= 0;
8157 server
.vm_stats_swapins
= 0;
8158 totsize
= server
.vm_pages
*server
.vm_page_size
;
8159 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8160 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8161 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8165 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8167 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8168 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8169 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8170 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8172 /* Initialize threaded I/O (used by Virtual Memory) */
8173 server
.io_newjobs
= listCreate();
8174 server
.io_processing
= listCreate();
8175 server
.io_processed
= listCreate();
8176 server
.io_ready_clients
= listCreate();
8177 pthread_mutex_init(&server
.io_mutex
,NULL
);
8178 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8179 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8180 server
.io_active_threads
= 0;
8181 if (pipe(pipefds
) == -1) {
8182 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8186 server
.io_ready_pipe_read
= pipefds
[0];
8187 server
.io_ready_pipe_write
= pipefds
[1];
8188 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8189 /* LZF requires a lot of stack */
8190 pthread_attr_init(&server
.io_threads_attr
);
8191 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8192 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8193 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8194 /* Listen for events in the threaded I/O pipe */
8195 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8196 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8197 oom("creating file event");
8200 /* Mark the page as used */
8201 static void vmMarkPageUsed(off_t page
) {
8202 off_t byte
= page
/8;
8204 redisAssert(vmFreePage(page
) == 1);
8205 server
.vm_bitmap
[byte
] |= 1<<bit
;
8208 /* Mark N contiguous pages as used, with 'page' being the first. */
8209 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8212 for (j
= 0; j
< count
; j
++)
8213 vmMarkPageUsed(page
+j
);
8214 server
.vm_stats_used_pages
+= count
;
8215 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8216 (long long)count
, (long long)page
);
8219 /* Mark the page as free */
8220 static void vmMarkPageFree(off_t page
) {
8221 off_t byte
= page
/8;
8223 redisAssert(vmFreePage(page
) == 0);
8224 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8227 /* Mark N contiguous pages as free, with 'page' being the first. */
8228 static void vmMarkPagesFree(off_t page
, off_t count
) {
8231 for (j
= 0; j
< count
; j
++)
8232 vmMarkPageFree(page
+j
);
8233 server
.vm_stats_used_pages
-= count
;
8234 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8235 (long long)count
, (long long)page
);
8238 /* Test if the page is free */
8239 static int vmFreePage(off_t page
) {
8240 off_t byte
= page
/8;
8242 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8245 /* Find N contiguous free pages storing the first page of the cluster in *first.
8246 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8247 * REDIS_ERR is returned.
8249 * This function uses a simple algorithm: we try to allocate
8250 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8251 * again from the start of the swap file searching for free spaces.
8253 * If it looks pretty clear that there are no free pages near our offset
8254 * we try to find less populated places doing a forward jump of
8255 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8256 * without hurry, and then we jump again and so forth...
8258 * This function can be improved using a free list to avoid to guess
8259 * too much, since we could collect data about freed pages.
8261 * note: I implemented this function just after watching an episode of
8262 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8264 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8265 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8267 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8268 server
.vm_near_pages
= 0;
8269 server
.vm_next_page
= 0;
8271 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8272 base
= server
.vm_next_page
;
8274 while(offset
< server
.vm_pages
) {
8275 off_t
this = base
+offset
;
8277 /* If we overflow, restart from page zero */
8278 if (this >= server
.vm_pages
) {
8279 this -= server
.vm_pages
;
8281 /* Just overflowed, what we found on tail is no longer
8282 * interesting, as it's no longer contiguous. */
8286 if (vmFreePage(this)) {
8287 /* This is a free page */
8289 /* Already got N free pages? Return to the caller, with success */
8291 *first
= this-(n
-1);
8292 server
.vm_next_page
= this+1;
8293 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8297 /* The current one is not a free page */
8301 /* Fast-forward if the current page is not free and we already
8302 * searched enough near this place. */
8304 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8305 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8307 /* Note that even if we rewind after the jump, we are don't need
8308 * to make sure numfree is set to zero as we only jump *if* it
8309 * is set to zero. */
8311 /* Otherwise just check the next page */
8318 /* Write the specified object at the specified page of the swap file */
8319 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8320 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8321 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8322 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8323 redisLog(REDIS_WARNING
,
8324 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8328 rdbSaveObject(server
.vm_fp
,o
);
8329 fflush(server
.vm_fp
);
8330 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8334 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8335 * needed to later retrieve the object into the key object.
8336 * If we can't find enough contiguous empty pages to swap the object on disk
8337 * REDIS_ERR is returned. */
8338 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8339 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8342 assert(key
->storage
== REDIS_VM_MEMORY
);
8343 assert(key
->refcount
== 1);
8344 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8345 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8346 key
->vm
.page
= page
;
8347 key
->vm
.usedpages
= pages
;
8348 key
->storage
= REDIS_VM_SWAPPED
;
8349 key
->vtype
= val
->type
;
8350 decrRefCount(val
); /* Deallocate the object from memory. */
8351 vmMarkPagesUsed(page
,pages
);
8352 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8353 (unsigned char*) key
->ptr
,
8354 (unsigned long long) page
, (unsigned long long) pages
);
8355 server
.vm_stats_swapped_objects
++;
8356 server
.vm_stats_swapouts
++;
8360 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8363 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8364 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8365 redisLog(REDIS_WARNING
,
8366 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8370 o
= rdbLoadObject(type
,server
.vm_fp
);
8372 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8375 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8379 /* Load the value object relative to the 'key' object from swap to memory.
8380 * The newly allocated object is returned.
8382 * If preview is true the unserialized object is returned to the caller but
8383 * no changes are made to the key object, nor the pages are marked as freed */
8384 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8387 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8388 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8390 key
->storage
= REDIS_VM_MEMORY
;
8391 key
->vm
.atime
= server
.unixtime
;
8392 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8393 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8394 (unsigned char*) key
->ptr
);
8395 server
.vm_stats_swapped_objects
--;
8397 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8398 (unsigned char*) key
->ptr
);
8400 server
.vm_stats_swapins
++;
8404 /* Plain object loading, from swap to memory */
8405 static robj
*vmLoadObject(robj
*key
) {
8406 /* If we are loading the object in background, stop it, we
8407 * need to load this object synchronously ASAP. */
8408 if (key
->storage
== REDIS_VM_LOADING
)
8409 vmCancelThreadedIOJob(key
);
8410 return vmGenericLoadObject(key
,0);
8413 /* Just load the value on disk, without to modify the key.
8414 * This is useful when we want to perform some operation on the value
8415 * without to really bring it from swap to memory, like while saving the
8416 * dataset or rewriting the append only log. */
8417 static robj
*vmPreviewObject(robj
*key
) {
8418 return vmGenericLoadObject(key
,1);
8421 /* How a good candidate is this object for swapping?
8422 * The better candidate it is, the greater the returned value.
8424 * Currently we try to perform a fast estimation of the object size in
8425 * memory, and combine it with aging informations.
8427 * Basically swappability = idle-time * log(estimated size)
8429 * Bigger objects are preferred over smaller objects, but not
8430 * proportionally, this is why we use the logarithm. This algorithm is
8431 * just a first try and will probably be tuned later. */
8432 static double computeObjectSwappability(robj
*o
) {
8433 time_t age
= server
.unixtime
- o
->vm
.atime
;
8437 struct dictEntry
*de
;
8440 if (age
<= 0) return 0;
8443 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8446 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8451 listNode
*ln
= listFirst(l
);
8453 asize
= sizeof(list
);
8455 robj
*ele
= ln
->value
;
8458 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8459 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8461 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8466 z
= (o
->type
== REDIS_ZSET
);
8467 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8469 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8470 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8475 de
= dictGetRandomKey(d
);
8476 ele
= dictGetEntryKey(de
);
8477 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8478 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8480 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8481 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8485 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8486 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8487 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8488 unsigned int klen
, vlen
;
8489 unsigned char *key
, *val
;
8491 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8495 asize
= len
*(klen
+vlen
+3);
8496 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8498 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8503 de
= dictGetRandomKey(d
);
8504 ele
= dictGetEntryKey(de
);
8505 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8506 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8508 ele
= dictGetEntryVal(de
);
8509 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8510 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8512 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8517 return (double)age
*log(1+asize
);
8520 /* Try to swap an object that's a good candidate for swapping.
8521 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8522 * to swap any object at all.
8524 * If 'usethreaded' is true, Redis will try to swap the object in background
8525 * using I/O threads. */
8526 static int vmSwapOneObject(int usethreads
) {
8528 struct dictEntry
*best
= NULL
;
8529 double best_swappability
= 0;
8530 redisDb
*best_db
= NULL
;
8533 for (j
= 0; j
< server
.dbnum
; j
++) {
8534 redisDb
*db
= server
.db
+j
;
8535 /* Why maxtries is set to 100?
8536 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8537 * are swappable objects */
8540 if (dictSize(db
->dict
) == 0) continue;
8541 for (i
= 0; i
< 5; i
++) {
8543 double swappability
;
8545 if (maxtries
) maxtries
--;
8546 de
= dictGetRandomKey(db
->dict
);
8547 key
= dictGetEntryKey(de
);
8548 val
= dictGetEntryVal(de
);
8549 /* Only swap objects that are currently in memory.
8551 * Also don't swap shared objects if threaded VM is on, as we
8552 * try to ensure that the main thread does not touch the
8553 * object while the I/O thread is using it, but we can't
8554 * control other keys without adding additional mutex. */
8555 if (key
->storage
!= REDIS_VM_MEMORY
||
8556 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8557 if (maxtries
) i
--; /* don't count this try */
8560 swappability
= computeObjectSwappability(val
);
8561 if (!best
|| swappability
> best_swappability
) {
8563 best_swappability
= swappability
;
8568 if (best
== NULL
) return REDIS_ERR
;
8569 key
= dictGetEntryKey(best
);
8570 val
= dictGetEntryVal(best
);
8572 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8573 key
->ptr
, best_swappability
);
8575 /* Unshare the key if needed */
8576 if (key
->refcount
> 1) {
8577 robj
*newkey
= dupStringObject(key
);
8579 key
= dictGetEntryKey(best
) = newkey
;
8583 vmSwapObjectThreaded(key
,val
,best_db
);
8586 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8587 dictGetEntryVal(best
) = NULL
;
8595 static int vmSwapOneObjectBlocking() {
8596 return vmSwapOneObject(0);
8599 static int vmSwapOneObjectThreaded() {
8600 return vmSwapOneObject(1);
8603 /* Return true if it's safe to swap out objects in a given moment.
8604 * Basically we don't want to swap objects out while there is a BGSAVE
8605 * or a BGAEOREWRITE running in backgroud. */
8606 static int vmCanSwapOut(void) {
8607 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8610 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8611 * and was deleted. Otherwise 0 is returned. */
8612 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8616 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8617 foundkey
= dictGetEntryKey(de
);
8618 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8623 /* =================== Virtual Memory - Threaded I/O ======================= */
8625 static void freeIOJob(iojob
*j
) {
8626 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8627 j
->type
== REDIS_IOJOB_DO_SWAP
||
8628 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8629 decrRefCount(j
->val
);
8630 decrRefCount(j
->key
);
8634 /* Every time a thread finished a Job, it writes a byte into the write side
8635 * of an unix pipe in order to "awake" the main thread, and this function
8637 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8641 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8643 REDIS_NOTUSED(mask
);
8644 REDIS_NOTUSED(privdata
);
8646 /* For every byte we read in the read side of the pipe, there is one
8647 * I/O job completed to process. */
8648 while((retval
= read(fd
,buf
,1)) == 1) {
8652 struct dictEntry
*de
;
8654 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8656 /* Get the processed element (the oldest one) */
8658 assert(listLength(server
.io_processed
) != 0);
8659 if (toprocess
== -1) {
8660 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8661 if (toprocess
<= 0) toprocess
= 1;
8663 ln
= listFirst(server
.io_processed
);
8665 listDelNode(server
.io_processed
,ln
);
8667 /* If this job is marked as canceled, just ignore it */
8672 /* Post process it in the main thread, as there are things we
8673 * can do just here to avoid race conditions and/or invasive locks */
8674 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8675 de
= dictFind(j
->db
->dict
,j
->key
);
8677 key
= dictGetEntryKey(de
);
8678 if (j
->type
== REDIS_IOJOB_LOAD
) {
8681 /* Key loaded, bring it at home */
8682 key
->storage
= REDIS_VM_MEMORY
;
8683 key
->vm
.atime
= server
.unixtime
;
8684 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8685 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8686 (unsigned char*) key
->ptr
);
8687 server
.vm_stats_swapped_objects
--;
8688 server
.vm_stats_swapins
++;
8689 dictGetEntryVal(de
) = j
->val
;
8690 incrRefCount(j
->val
);
8693 /* Handle clients waiting for this key to be loaded. */
8694 handleClientsBlockedOnSwappedKey(db
,key
);
8695 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8696 /* Now we know the amount of pages required to swap this object.
8697 * Let's find some space for it, and queue this task again
8698 * rebranded as REDIS_IOJOB_DO_SWAP. */
8699 if (!vmCanSwapOut() ||
8700 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8702 /* Ooops... no space or we can't swap as there is
8703 * a fork()ed Redis trying to save stuff on disk. */
8705 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8707 /* Note that we need to mark this pages as used now,
8708 * if the job will be canceled, we'll mark them as freed
8710 vmMarkPagesUsed(j
->page
,j
->pages
);
8711 j
->type
= REDIS_IOJOB_DO_SWAP
;
8716 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8719 /* Key swapped. We can finally free some memory. */
8720 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8721 printf("key->storage: %d\n",key
->storage
);
8722 printf("key->name: %s\n",(char*)key
->ptr
);
8723 printf("key->refcount: %d\n",key
->refcount
);
8724 printf("val: %p\n",(void*)j
->val
);
8725 printf("val->type: %d\n",j
->val
->type
);
8726 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8728 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8729 val
= dictGetEntryVal(de
);
8730 key
->vm
.page
= j
->page
;
8731 key
->vm
.usedpages
= j
->pages
;
8732 key
->storage
= REDIS_VM_SWAPPED
;
8733 key
->vtype
= j
->val
->type
;
8734 decrRefCount(val
); /* Deallocate the object from memory. */
8735 dictGetEntryVal(de
) = NULL
;
8736 redisLog(REDIS_DEBUG
,
8737 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8738 (unsigned char*) key
->ptr
,
8739 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8740 server
.vm_stats_swapped_objects
++;
8741 server
.vm_stats_swapouts
++;
8743 /* Put a few more swap requests in queue if we are still
8745 if (trytoswap
&& vmCanSwapOut() &&
8746 zmalloc_used_memory() > server
.vm_max_memory
)
8751 more
= listLength(server
.io_newjobs
) <
8752 (unsigned) server
.vm_max_threads
;
8754 /* Don't waste CPU time if swappable objects are rare. */
8755 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8763 if (processed
== toprocess
) return;
8765 if (retval
< 0 && errno
!= EAGAIN
) {
8766 redisLog(REDIS_WARNING
,
8767 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8772 static void lockThreadedIO(void) {
8773 pthread_mutex_lock(&server
.io_mutex
);
8776 static void unlockThreadedIO(void) {
8777 pthread_mutex_unlock(&server
.io_mutex
);
8780 /* Remove the specified object from the threaded I/O queue if still not
8781 * processed, otherwise make sure to flag it as canceled. */
8782 static void vmCancelThreadedIOJob(robj
*o
) {
8784 server
.io_newjobs
, /* 0 */
8785 server
.io_processing
, /* 1 */
8786 server
.io_processed
/* 2 */
8790 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
8793 /* Search for a matching key in one of the queues */
8794 for (i
= 0; i
< 3; i
++) {
8798 listRewind(lists
[i
],&li
);
8799 while ((ln
= listNext(&li
)) != NULL
) {
8800 iojob
*job
= ln
->value
;
8802 if (job
->canceled
) continue; /* Skip this, already canceled. */
8803 if (compareStringObjects(job
->key
,o
) == 0) {
8804 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
8805 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
8806 /* Mark the pages as free since the swap didn't happened
8807 * or happened but is now discarded. */
8808 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
8809 vmMarkPagesFree(job
->page
,job
->pages
);
8810 /* Cancel the job. It depends on the list the job is
8813 case 0: /* io_newjobs */
8814 /* If the job was yet not processed the best thing to do
8815 * is to remove it from the queue at all */
8817 listDelNode(lists
[i
],ln
);
8819 case 1: /* io_processing */
8820 /* Oh Shi- the thread is messing with the Job:
8822 * Probably it's accessing the object if this is a
8823 * PREPARE_SWAP or DO_SWAP job.
8824 * If it's a LOAD job it may be reading from disk and
8825 * if we don't wait for the job to terminate before to
8826 * cancel it, maybe in a few microseconds data can be
8827 * corrupted in this pages. So the short story is:
8829 * Better to wait for the job to move into the
8830 * next queue (processed)... */
8832 /* We try again and again until the job is completed. */
8834 /* But let's wait some time for the I/O thread
8835 * to finish with this job. After all this condition
8836 * should be very rare. */
8839 case 2: /* io_processed */
8840 /* The job was already processed, that's easy...
8841 * just mark it as canceled so that we'll ignore it
8842 * when processing completed jobs. */
8846 /* Finally we have to adjust the storage type of the object
8847 * in order to "UNDO" the operaiton. */
8848 if (o
->storage
== REDIS_VM_LOADING
)
8849 o
->storage
= REDIS_VM_SWAPPED
;
8850 else if (o
->storage
== REDIS_VM_SWAPPING
)
8851 o
->storage
= REDIS_VM_MEMORY
;
8858 assert(1 != 1); /* We should never reach this */
8861 static void *IOThreadEntryPoint(void *arg
) {
8866 pthread_detach(pthread_self());
8868 /* Get a new job to process */
8870 if (listLength(server
.io_newjobs
) == 0) {
8871 /* No new jobs in queue, exit. */
8872 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
8873 (long) pthread_self());
8874 server
.io_active_threads
--;
8878 ln
= listFirst(server
.io_newjobs
);
8880 listDelNode(server
.io_newjobs
,ln
);
8881 /* Add the job in the processing queue */
8882 j
->thread
= pthread_self();
8883 listAddNodeTail(server
.io_processing
,j
);
8884 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
8886 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
8887 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
8889 /* Process the Job */
8890 if (j
->type
== REDIS_IOJOB_LOAD
) {
8891 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
8892 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8893 FILE *fp
= fopen("/dev/null","w+");
8894 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
8896 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8897 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
8901 /* Done: insert the job into the processed queue */
8902 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
8903 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
8905 listDelNode(server
.io_processing
,ln
);
8906 listAddNodeTail(server
.io_processed
,j
);
8909 /* Signal the main thread there is new stuff to process */
8910 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
8912 return NULL
; /* never reached */
8915 static void spawnIOThread(void) {
8917 sigset_t mask
, omask
;
8921 sigaddset(&mask
,SIGCHLD
);
8922 sigaddset(&mask
,SIGHUP
);
8923 sigaddset(&mask
,SIGPIPE
);
8924 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
8925 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
8926 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
8930 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
8931 server
.io_active_threads
++;
8934 /* We need to wait for the last thread to exit before we are able to
8935 * fork() in order to BGSAVE or BGREWRITEAOF. */
8936 static void waitEmptyIOJobsQueue(void) {
8938 int io_processed_len
;
8941 if (listLength(server
.io_newjobs
) == 0 &&
8942 listLength(server
.io_processing
) == 0 &&
8943 server
.io_active_threads
== 0)
8948 /* While waiting for empty jobs queue condition we post-process some
8949 * finshed job, as I/O threads may be hanging trying to write against
8950 * the io_ready_pipe_write FD but there are so much pending jobs that
8952 io_processed_len
= listLength(server
.io_processed
);
8954 if (io_processed_len
) {
8955 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
8956 usleep(1000); /* 1 millisecond */
8958 usleep(10000); /* 10 milliseconds */
8963 static void vmReopenSwapFile(void) {
8964 /* Note: we don't close the old one as we are in the child process
8965 * and don't want to mess at all with the original file object. */
8966 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
8967 if (server
.vm_fp
== NULL
) {
8968 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
8969 server
.vm_swap_file
);
8972 server
.vm_fd
= fileno(server
.vm_fp
);
8975 /* This function must be called while with threaded IO locked */
8976 static void queueIOJob(iojob
*j
) {
8977 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
8978 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
8979 listAddNodeTail(server
.io_newjobs
,j
);
8980 if (server
.io_active_threads
< server
.vm_max_threads
)
8984 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
8987 assert(key
->storage
== REDIS_VM_MEMORY
);
8988 assert(key
->refcount
== 1);
8990 j
= zmalloc(sizeof(*j
));
8991 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
8993 j
->key
= dupStringObject(key
);
8997 j
->thread
= (pthread_t
) -1;
8998 key
->storage
= REDIS_VM_SWAPPING
;
9006 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9008 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9009 * If there is not already a job loading the key, it is craeted.
9010 * The key is added to the io_keys list in the client structure, and also
9011 * in the hash table mapping swapped keys to waiting clients, that is,
9012 * server.io_waited_keys. */
9013 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9014 struct dictEntry
*de
;
9018 /* If the key does not exist or is already in RAM we don't need to
9019 * block the client at all. */
9020 de
= dictFind(c
->db
->dict
,key
);
9021 if (de
== NULL
) return 0;
9022 o
= dictGetEntryKey(de
);
9023 if (o
->storage
== REDIS_VM_MEMORY
) {
9025 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9026 /* We were swapping the key, undo it! */
9027 vmCancelThreadedIOJob(o
);
9031 /* OK: the key is either swapped, or being loaded just now. */
9033 /* Add the key to the list of keys this client is waiting for.
9034 * This maps clients to keys they are waiting for. */
9035 listAddNodeTail(c
->io_keys
,key
);
9038 /* Add the client to the swapped keys => clients waiting map. */
9039 de
= dictFind(c
->db
->io_keys
,key
);
9043 /* For every key we take a list of clients blocked for it */
9045 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9047 assert(retval
== DICT_OK
);
9049 l
= dictGetEntryVal(de
);
9051 listAddNodeTail(l
,c
);
9053 /* Are we already loading the key from disk? If not create a job */
9054 if (o
->storage
== REDIS_VM_SWAPPED
) {
9057 o
->storage
= REDIS_VM_LOADING
;
9058 j
= zmalloc(sizeof(*j
));
9059 j
->type
= REDIS_IOJOB_LOAD
;
9061 j
->key
= dupStringObject(key
);
9062 j
->key
->vtype
= o
->vtype
;
9063 j
->page
= o
->vm
.page
;
9066 j
->thread
= (pthread_t
) -1;
9074 /* Preload keys needed for the ZUNION and ZINTER commands. */
9075 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9077 num
= atoi(c
->argv
[2]->ptr
);
9078 for (i
= 0; i
< num
; i
++) {
9079 waitForSwappedKey(c
,c
->argv
[3+i
]);
9083 /* Is this client attempting to run a command against swapped keys?
9084 * If so, block it ASAP, load the keys in background, then resume it.
9086 * The important idea about this function is that it can fail! If keys will
9087 * still be swapped when the client is resumed, this key lookups will
9088 * just block loading keys from disk. In practical terms this should only
9089 * happen with SORT BY command or if there is a bug in this function.
9091 * Return 1 if the client is marked as blocked, 0 if the client can
9092 * continue as the keys it is going to access appear to be in memory. */
9093 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9096 if (cmd
->vm_preload_proc
!= NULL
) {
9097 cmd
->vm_preload_proc(c
);
9099 if (cmd
->vm_firstkey
== 0) return 0;
9100 last
= cmd
->vm_lastkey
;
9101 if (last
< 0) last
= c
->argc
+last
;
9102 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9103 waitForSwappedKey(c
,c
->argv
[j
]);
9106 /* If the client was blocked for at least one key, mark it as blocked. */
9107 if (listLength(c
->io_keys
)) {
9108 c
->flags
|= REDIS_IO_WAIT
;
9109 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9110 server
.vm_blocked_clients
++;
9117 /* Remove the 'key' from the list of blocked keys for a given client.
9119 * The function returns 1 when there are no longer blocking keys after
9120 * the current one was removed (and the client can be unblocked). */
9121 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9125 struct dictEntry
*de
;
9127 /* Remove the key from the list of keys this client is waiting for. */
9128 listRewind(c
->io_keys
,&li
);
9129 while ((ln
= listNext(&li
)) != NULL
) {
9130 if (compareStringObjects(ln
->value
,key
) == 0) {
9131 listDelNode(c
->io_keys
,ln
);
9137 /* Remove the client form the key => waiting clients map. */
9138 de
= dictFind(c
->db
->io_keys
,key
);
9140 l
= dictGetEntryVal(de
);
9141 ln
= listSearchKey(l
,c
);
9144 if (listLength(l
) == 0)
9145 dictDelete(c
->db
->io_keys
,key
);
9147 return listLength(c
->io_keys
) == 0;
9150 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9151 struct dictEntry
*de
;
9156 de
= dictFind(db
->io_keys
,key
);
9159 l
= dictGetEntryVal(de
);
9160 len
= listLength(l
);
9161 /* Note: we can't use something like while(listLength(l)) as the list
9162 * can be freed by the calling function when we remove the last element. */
9165 redisClient
*c
= ln
->value
;
9167 if (dontWaitForSwappedKey(c
,key
)) {
9168 /* Put the client in the list of clients ready to go as we
9169 * loaded all the keys about it. */
9170 listAddNodeTail(server
.io_ready_clients
,c
);
9175 /* =========================== Remote Configuration ========================= */
9177 static void configSetCommand(redisClient
*c
) {
9178 robj
*o
= getDecodedObject(c
->argv
[3]);
9179 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9180 zfree(server
.dbfilename
);
9181 server
.dbfilename
= zstrdup(o
->ptr
);
9182 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9183 zfree(server
.requirepass
);
9184 server
.requirepass
= zstrdup(o
->ptr
);
9185 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9186 zfree(server
.masterauth
);
9187 server
.masterauth
= zstrdup(o
->ptr
);
9188 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9189 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9191 addReplySds(c
,sdscatprintf(sdsempty(),
9192 "-ERR not supported CONFIG parameter %s\r\n",
9193 (char*)c
->argv
[2]->ptr
));
9198 addReply(c
,shared
.ok
);
9201 static void configGetCommand(redisClient
*c
) {
9202 robj
*o
= getDecodedObject(c
->argv
[2]);
9203 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9204 char *pattern
= o
->ptr
;
9208 decrRefCount(lenobj
);
9210 if (stringmatch(pattern
,"dbfilename",0)) {
9211 addReplyBulkCString(c
,"dbfilename");
9212 addReplyBulkCString(c
,server
.dbfilename
);
9215 if (stringmatch(pattern
,"requirepass",0)) {
9216 addReplyBulkCString(c
,"requirepass");
9217 addReplyBulkCString(c
,server
.requirepass
);
9220 if (stringmatch(pattern
,"masterauth",0)) {
9221 addReplyBulkCString(c
,"masterauth");
9222 addReplyBulkCString(c
,server
.masterauth
);
9225 if (stringmatch(pattern
,"maxmemory",0)) {
9228 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9229 addReplyBulkCString(c
,"maxmemory");
9230 addReplyBulkCString(c
,buf
);
9234 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9237 static void configCommand(redisClient
*c
) {
9238 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9239 if (c
->argc
!= 4) goto badarity
;
9240 configSetCommand(c
);
9241 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9242 if (c
->argc
!= 3) goto badarity
;
9243 configGetCommand(c
);
9244 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9245 if (c
->argc
!= 2) goto badarity
;
9246 server
.stat_numcommands
= 0;
9247 server
.stat_numconnections
= 0;
9248 server
.stat_expiredkeys
= 0;
9249 server
.stat_starttime
= time(NULL
);
9250 addReply(c
,shared
.ok
);
9252 addReplySds(c
,sdscatprintf(sdsempty(),
9253 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9258 addReplySds(c
,sdscatprintf(sdsempty(),
9259 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9260 (char*) c
->argv
[1]->ptr
));
9263 /* =========================== Pubsub implementation ======================== */
9265 /* Subscribe a client to a class. Returns 1 if the operation succeeded, or
9266 * 0 if the client was already subscribed to that class. */
9267 static int pubsubSubscribe(redisClient
*c
, robj
*class) {
9268 struct dictEntry
*de
;
9269 list
*clients
= NULL
;
9272 /* Add the class to the client -> classes hash table */
9273 if (dictAdd(c
->pubsub_classes
,class,NULL
) == DICT_OK
) {
9275 incrRefCount(class);
9276 /* Add the client to the class -> list of clients hash table */
9277 de
= dictFind(server
.pubsub_classes
,class);
9279 clients
= listCreate();
9280 dictAdd(server
.pubsub_classes
,class,clients
);
9281 incrRefCount(class);
9283 clients
= dictGetEntryVal(de
);
9285 listAddNodeTail(clients
,c
);
9287 /* Notify the client */
9288 addReply(c
,shared
.mbulk3
);
9289 addReply(c
,shared
.subscribebulk
);
9290 addReplyBulk(c
,class);
9291 addReplyLong(c
,dictSize(c
->pubsub_classes
));
9295 /* Unsubscribe a client from a class. Returns 1 if the operation succeeded, or
9296 * 0 if the client was not subscribed to the specified class. */
9297 static int pubsubUnsubscribe(redisClient
*c
, robj
*class, int notify
) {
9298 struct dictEntry
*de
;
9303 /* Remove the class from the client -> classes hash table */
9304 incrRefCount(class); /* class may be just a pointer to the same object
9305 we have in the hash tables. Protect it... */
9306 if (dictDelete(c
->pubsub_classes
,class) == DICT_OK
) {
9308 /* Remove the client from the class -> clients list hash table */
9309 de
= dictFind(server
.pubsub_classes
,class);
9311 clients
= dictGetEntryVal(de
);
9312 ln
= listSearchKey(clients
,c
);
9314 listDelNode(clients
,ln
);
9316 /* Notify the client */
9318 addReply(c
,shared
.mbulk3
);
9319 addReply(c
,shared
.unsubscribebulk
);
9320 addReplyBulk(c
,class);
9321 addReplyLong(c
,dictSize(c
->pubsub_classes
));
9323 decrRefCount(class); /* it is finally safe to release it */
9327 /* Unsubscribe from all the classes. Return the number of classes the
9328 * client was subscribed to. */
9329 static int pubsubUnsubscribeAll(redisClient
*c
, int notify
) {
9330 dictIterator
*di
= dictGetIterator(c
->pubsub_classes
);
9334 while((de
= dictNext(di
)) != NULL
) {
9335 robj
*class = dictGetEntryKey(de
);
9337 count
+= pubsubUnsubscribe(c
,class,notify
);
9339 dictReleaseIterator(di
);
9343 /* Publish a message */
9344 static int pubsubPublishMessage(robj
*class, robj
*message
) {
9346 struct dictEntry
*de
;
9348 de
= dictFind(server
.pubsub_classes
,class);
9350 list
*list
= dictGetEntryVal(de
);
9354 listRewind(list
,&li
);
9355 while ((ln
= listNext(&li
)) != NULL
) {
9356 redisClient
*c
= ln
->value
;
9358 addReply(c
,shared
.mbulk3
);
9359 addReply(c
,shared
.messagebulk
);
9360 addReplyBulk(c
,class);
9361 addReplyBulk(c
,message
);
9368 static void subscribeCommand(redisClient
*c
) {
9371 for (j
= 1; j
< c
->argc
; j
++)
9372 pubsubSubscribe(c
,c
->argv
[j
]);
9375 static void unsubscribeCommand(redisClient
*c
) {
9377 pubsubUnsubscribeAll(c
,1);
9382 for (j
= 1; j
< c
->argc
; j
++)
9383 pubsubUnsubscribe(c
,c
->argv
[j
],1);
9387 static void publishCommand(redisClient
*c
) {
9388 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9389 addReplyLong(c
,receivers
);
9392 /* ================================= Debugging ============================== */
9394 static void debugCommand(redisClient
*c
) {
9395 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9397 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9398 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9399 addReply(c
,shared
.err
);
9403 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9404 addReply(c
,shared
.err
);
9407 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9408 addReply(c
,shared
.ok
);
9409 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9411 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9412 addReply(c
,shared
.err
);
9415 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9416 addReply(c
,shared
.ok
);
9417 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9418 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9422 addReply(c
,shared
.nokeyerr
);
9425 key
= dictGetEntryKey(de
);
9426 val
= dictGetEntryVal(de
);
9427 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9428 key
->storage
== REDIS_VM_SWAPPING
)) {
9432 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9433 strenc
= strencoding
[val
->encoding
];
9435 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9438 addReplySds(c
,sdscatprintf(sdsempty(),
9439 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9440 "encoding:%s serializedlength:%lld\r\n",
9441 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9442 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9444 addReplySds(c
,sdscatprintf(sdsempty(),
9445 "+Key at:%p refcount:%d, value swapped at: page %llu "
9446 "using %llu pages\r\n",
9447 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9448 (unsigned long long) key
->vm
.usedpages
));
9450 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9451 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9454 if (!server
.vm_enabled
) {
9455 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9459 addReply(c
,shared
.nokeyerr
);
9462 key
= dictGetEntryKey(de
);
9463 val
= dictGetEntryVal(de
);
9464 /* If the key is shared we want to create a copy */
9465 if (key
->refcount
> 1) {
9466 robj
*newkey
= dupStringObject(key
);
9468 key
= dictGetEntryKey(de
) = newkey
;
9471 if (key
->storage
!= REDIS_VM_MEMORY
) {
9472 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9473 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9474 dictGetEntryVal(de
) = NULL
;
9475 addReply(c
,shared
.ok
);
9477 addReply(c
,shared
.err
);
9480 addReplySds(c
,sdsnew(
9481 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
9485 static void _redisAssert(char *estr
, char *file
, int line
) {
9486 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9487 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9488 #ifdef HAVE_BACKTRACE
9489 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9494 /* =================================== Main! ================================ */
9497 int linuxOvercommitMemoryValue(void) {
9498 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9502 if (fgets(buf
,64,fp
) == NULL
) {
9511 void linuxOvercommitMemoryWarning(void) {
9512 if (linuxOvercommitMemoryValue() == 0) {
9513 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9516 #endif /* __linux__ */
9518 static void daemonize(void) {
9522 if (fork() != 0) exit(0); /* parent exits */
9523 setsid(); /* create a new session */
9525 /* Every output goes to /dev/null. If Redis is daemonized but
9526 * the 'logfile' is set to 'stdout' in the configuration file
9527 * it will not log at all. */
9528 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9529 dup2(fd
, STDIN_FILENO
);
9530 dup2(fd
, STDOUT_FILENO
);
9531 dup2(fd
, STDERR_FILENO
);
9532 if (fd
> STDERR_FILENO
) close(fd
);
9534 /* Try to write the pid file */
9535 fp
= fopen(server
.pidfile
,"w");
9537 fprintf(fp
,"%d\n",getpid());
9542 static void version() {
9543 printf("Redis server version %s\n", REDIS_VERSION
);
9547 static void usage() {
9548 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9549 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9553 int main(int argc
, char **argv
) {
9558 if (strcmp(argv
[1], "-v") == 0 ||
9559 strcmp(argv
[1], "--version") == 0) version();
9560 if (strcmp(argv
[1], "--help") == 0) usage();
9561 resetServerSaveParams();
9562 loadServerConfig(argv
[1]);
9563 } else if ((argc
> 2)) {
9566 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9568 if (server
.daemonize
) daemonize();
9570 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9572 linuxOvercommitMemoryWarning();
9575 if (server
.appendonly
) {
9576 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9577 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9579 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9580 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9582 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9583 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9585 aeDeleteEventLoop(server
.el
);
9589 /* ============================= Backtrace support ========================= */
9591 #ifdef HAVE_BACKTRACE
9592 static char *findFuncName(void *pointer
, unsigned long *offset
);
9594 static void *getMcontextEip(ucontext_t
*uc
) {
9595 #if defined(__FreeBSD__)
9596 return (void*) uc
->uc_mcontext
.mc_eip
;
9597 #elif defined(__dietlibc__)
9598 return (void*) uc
->uc_mcontext
.eip
;
9599 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9601 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9603 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9605 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9606 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9607 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9609 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9611 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9612 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9613 #elif defined(__ia64__) /* Linux IA64 */
9614 return (void*) uc
->uc_mcontext
.sc_ip
;
9620 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9622 char **messages
= NULL
;
9623 int i
, trace_size
= 0;
9624 unsigned long offset
=0;
9625 ucontext_t
*uc
= (ucontext_t
*) secret
;
9627 REDIS_NOTUSED(info
);
9629 redisLog(REDIS_WARNING
,
9630 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9631 infostring
= genRedisInfoString();
9632 redisLog(REDIS_WARNING
, "%s",infostring
);
9633 /* It's not safe to sdsfree() the returned string under memory
9634 * corruption conditions. Let it leak as we are going to abort */
9636 trace_size
= backtrace(trace
, 100);
9637 /* overwrite sigaction with caller's address */
9638 if (getMcontextEip(uc
) != NULL
) {
9639 trace
[1] = getMcontextEip(uc
);
9641 messages
= backtrace_symbols(trace
, trace_size
);
9643 for (i
=1; i
<trace_size
; ++i
) {
9644 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9646 p
= strchr(messages
[i
],'+');
9647 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9648 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9650 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
9653 /* free(messages); Don't call free() with possibly corrupted memory. */
9657 static void setupSigSegvAction(void) {
9658 struct sigaction act
;
9660 sigemptyset (&act
.sa_mask
);
9661 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
9662 * is used. Otherwise, sa_handler is used */
9663 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
9664 act
.sa_sigaction
= segvHandler
;
9665 sigaction (SIGSEGV
, &act
, NULL
);
9666 sigaction (SIGBUS
, &act
, NULL
);
9667 sigaction (SIGFPE
, &act
, NULL
);
9668 sigaction (SIGILL
, &act
, NULL
);
9669 sigaction (SIGBUS
, &act
, NULL
);
9673 #include "staticsymbols.h"
9674 /* This function try to convert a pointer into a function name. It's used in
9675 * oreder to provide a backtrace under segmentation fault that's able to
9676 * display functions declared as static (otherwise the backtrace is useless). */
9677 static char *findFuncName(void *pointer
, unsigned long *offset
){
9679 unsigned long off
, minoff
= 0;
9681 /* Try to match against the Symbol with the smallest offset */
9682 for (i
=0; symsTable
[i
].pointer
; i
++) {
9683 unsigned long lp
= (unsigned long) pointer
;
9685 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
9686 off
=lp
-symsTable
[i
].pointer
;
9687 if (ret
< 0 || off
< minoff
) {
9693 if (ret
== -1) return NULL
;
9695 return symsTable
[ret
].name
;
9697 #else /* HAVE_BACKTRACE */
9698 static void setupSigSegvAction(void) {
9700 #endif /* HAVE_BACKTRACE */