2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.10"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
241 static void _redisAssert(char *estr
, char *file
, int line
);
242 static void _redisPanic(char *msg
, char *file
, int line
);
244 /*================================= Data types ============================== */
246 /* A redis object, that is a type able to hold a string / list / set */
248 /* The VM object structure */
249 struct redisObjectVM
{
250 off_t page
; /* the page at witch the object is stored on disk */
251 off_t usedpages
; /* number of pages used on disk */
252 time_t atime
; /* Last access time */
255 /* The actual Redis Object */
256 typedef struct redisObject
{
259 unsigned char encoding
;
260 unsigned char storage
; /* If this object is a key, where is the value?
261 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
262 unsigned char vtype
; /* If this object is a key, and value is swapped out,
263 * this is the type of the swapped out object. */
265 /* VM fields, this are only allocated if VM is active, otherwise the
266 * object allocation function will just allocate
267 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
268 * Redis without VM active will not have any overhead. */
269 struct redisObjectVM vm
;
272 /* Macro used to initalize a Redis object allocated on the stack.
273 * Note that this macro is taken near the structure definition to make sure
274 * we'll update it when the structure is changed, to avoid bugs like
275 * bug #85 introduced exactly in this way. */
276 #define initStaticStringObject(_var,_ptr) do { \
278 _var.type = REDIS_STRING; \
279 _var.encoding = REDIS_ENCODING_RAW; \
281 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
284 typedef struct redisDb
{
285 dict
*dict
; /* The keyspace for this DB */
286 dict
*expires
; /* Timeout of keys with a timeout set */
287 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
288 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
292 /* Client MULTI/EXEC state */
293 typedef struct multiCmd
{
296 struct redisCommand
*cmd
;
299 typedef struct multiState
{
300 multiCmd
*commands
; /* Array of MULTI commands */
301 int count
; /* Total number of MULTI commands */
304 /* With multiplexing we need to take per-clinet state.
305 * Clients are taken in a liked list. */
306 typedef struct redisClient
{
311 robj
**argv
, **mbargv
;
313 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
314 int multibulk
; /* multi bulk command format active */
317 time_t lastinteraction
; /* time of the last interaction, used for timeout */
318 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
319 int slaveseldb
; /* slave selected db, if this client is a slave */
320 int authenticated
; /* when requirepass is non-NULL */
321 int replstate
; /* replication state if this is a slave */
322 int repldbfd
; /* replication DB file descriptor */
323 long repldboff
; /* replication DB file offset */
324 off_t repldbsize
; /* replication DB file size */
325 multiState mstate
; /* MULTI/EXEC state */
326 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
327 * operation such as BLPOP. Otherwise NULL. */
328 int blockingkeysnum
; /* Number of blocking keys */
329 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
330 * is >= blockingto then the operation timed out. */
331 list
*io_keys
; /* Keys this client is waiting to be loaded from the
332 * swap file in order to continue. */
333 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
334 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
342 /* Global server state structure */
347 long long dirty
; /* changes to DB from the last save */
349 list
*slaves
, *monitors
;
350 char neterr
[ANET_ERR_LEN
];
352 int cronloops
; /* number of times the cron function run */
353 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
354 time_t lastsave
; /* Unix time of last save succeeede */
355 /* Fields used only for stats */
356 time_t stat_starttime
; /* server start time */
357 long long stat_numcommands
; /* number of processed commands */
358 long long stat_numconnections
; /* number of connections received */
359 long long stat_expiredkeys
; /* number of expired keys */
372 pid_t bgsavechildpid
;
373 pid_t bgrewritechildpid
;
374 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
375 struct saveparam
*saveparams
;
380 char *appendfilename
;
384 /* Replication related */
389 redisClient
*master
; /* client that is master for this slave */
391 unsigned int maxclients
;
392 unsigned long long maxmemory
;
393 unsigned int blpop_blocked_clients
;
394 unsigned int vm_blocked_clients
;
395 /* Sort parameters - qsort_r() is only available under BSD so we
396 * have to take this state global, in order to pass it to sortCompare() */
400 /* Virtual memory configuration */
405 unsigned long long vm_max_memory
;
407 size_t hash_max_zipmap_entries
;
408 size_t hash_max_zipmap_value
;
409 /* Virtual memory state */
412 off_t vm_next_page
; /* Next probably empty page */
413 off_t vm_near_pages
; /* Number of pages allocated sequentially */
414 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
415 time_t unixtime
; /* Unix time sampled every second. */
416 /* Virtual memory I/O threads stuff */
417 /* An I/O thread process an element taken from the io_jobs queue and
418 * put the result of the operation in the io_done list. While the
419 * job is being processed, it's put on io_processing queue. */
420 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
421 list
*io_processing
; /* List of VM I/O jobs being processed */
422 list
*io_processed
; /* List of VM I/O jobs already processed */
423 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
424 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
425 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
426 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
427 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
428 int io_active_threads
; /* Number of running I/O threads */
429 int vm_max_threads
; /* Max number of I/O threads running at the same time */
430 /* Our main thread is blocked on the event loop, locking for sockets ready
431 * to be read or written, so when a threaded I/O operation is ready to be
432 * processed by the main thread, the I/O thread will use a unix pipe to
433 * awake the main thread. The followings are the two pipe FDs. */
434 int io_ready_pipe_read
;
435 int io_ready_pipe_write
;
436 /* Virtual memory stats */
437 unsigned long long vm_stats_used_pages
;
438 unsigned long long vm_stats_swapped_objects
;
439 unsigned long long vm_stats_swapouts
;
440 unsigned long long vm_stats_swapins
;
442 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
443 list
*pubsub_patterns
; /* A list of pubsub_patterns */
448 typedef struct pubsubPattern
{
453 typedef void redisCommandProc(redisClient
*c
);
454 struct redisCommand
{
456 redisCommandProc
*proc
;
459 /* Use a function to determine which keys need to be loaded
460 * in the background prior to executing this command. Takes precedence
461 * over vm_firstkey and others, ignored when NULL */
462 redisCommandProc
*vm_preload_proc
;
463 /* What keys should be loaded in background when calling this command? */
464 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
465 int vm_lastkey
; /* THe last argument that's a key */
466 int vm_keystep
; /* The step between first and last key */
469 struct redisFunctionSym
{
471 unsigned long pointer
;
474 typedef struct _redisSortObject
{
482 typedef struct _redisSortOperation
{
485 } redisSortOperation
;
487 /* ZSETs use a specialized version of Skiplists */
489 typedef struct zskiplistNode
{
490 struct zskiplistNode
**forward
;
491 struct zskiplistNode
*backward
;
497 typedef struct zskiplist
{
498 struct zskiplistNode
*header
, *tail
;
499 unsigned long length
;
503 typedef struct zset
{
508 /* Our shared "common" objects */
510 #define REDIS_SHARED_INTEGERS 10000
511 struct sharedObjectsStruct
{
512 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
513 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
514 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
515 *outofrangeerr
, *plus
,
516 *select0
, *select1
, *select2
, *select3
, *select4
,
517 *select5
, *select6
, *select7
, *select8
, *select9
,
518 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
519 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
520 *integers
[REDIS_SHARED_INTEGERS
];
523 /* Global vars that are actally used as constants. The following double
524 * values are used for double on-disk serialization, and are initialized
525 * at runtime to avoid strange compiler optimizations. */
527 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
529 /* VM threaded I/O request message */
530 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
531 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
532 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
533 typedef struct iojob
{
534 int type
; /* Request type, REDIS_IOJOB_* */
535 redisDb
*db
;/* Redis database */
536 robj
*key
; /* This I/O request is about swapping this key */
537 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
538 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
539 off_t page
; /* Swap page where to read/write the object */
540 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
541 int canceled
; /* True if this command was canceled by blocking side of VM */
542 pthread_t thread
; /* ID of the thread processing this entry */
545 /*================================ Prototypes =============================== */
547 static void freeStringObject(robj
*o
);
548 static void freeListObject(robj
*o
);
549 static void freeSetObject(robj
*o
);
550 static void decrRefCount(void *o
);
551 static robj
*createObject(int type
, void *ptr
);
552 static void freeClient(redisClient
*c
);
553 static int rdbLoad(char *filename
);
554 static void addReply(redisClient
*c
, robj
*obj
);
555 static void addReplySds(redisClient
*c
, sds s
);
556 static void incrRefCount(robj
*o
);
557 static int rdbSaveBackground(char *filename
);
558 static robj
*createStringObject(char *ptr
, size_t len
);
559 static robj
*dupStringObject(robj
*o
);
560 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
561 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
562 static int syncWithMaster(void);
563 static robj
*tryObjectEncoding(robj
*o
);
564 static robj
*getDecodedObject(robj
*o
);
565 static int removeExpire(redisDb
*db
, robj
*key
);
566 static int expireIfNeeded(redisDb
*db
, robj
*key
);
567 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
568 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
569 static int deleteKey(redisDb
*db
, robj
*key
);
570 static time_t getExpire(redisDb
*db
, robj
*key
);
571 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
572 static void updateSlavesWaitingBgsave(int bgsaveerr
);
573 static void freeMemoryIfNeeded(void);
574 static int processCommand(redisClient
*c
);
575 static void setupSigSegvAction(void);
576 static void rdbRemoveTempFile(pid_t childpid
);
577 static void aofRemoveTempFile(pid_t childpid
);
578 static size_t stringObjectLen(robj
*o
);
579 static void processInputBuffer(redisClient
*c
);
580 static zskiplist
*zslCreate(void);
581 static void zslFree(zskiplist
*zsl
);
582 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
583 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
584 static void initClientMultiState(redisClient
*c
);
585 static void freeClientMultiState(redisClient
*c
);
586 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
587 static void unblockClientWaitingData(redisClient
*c
);
588 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
589 static void vmInit(void);
590 static void vmMarkPagesFree(off_t page
, off_t count
);
591 static robj
*vmLoadObject(robj
*key
);
592 static robj
*vmPreviewObject(robj
*key
);
593 static int vmSwapOneObjectBlocking(void);
594 static int vmSwapOneObjectThreaded(void);
595 static int vmCanSwapOut(void);
596 static int tryFreeOneObjectFromFreelist(void);
597 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
598 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
599 static void vmCancelThreadedIOJob(robj
*o
);
600 static void lockThreadedIO(void);
601 static void unlockThreadedIO(void);
602 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
603 static void freeIOJob(iojob
*j
);
604 static void queueIOJob(iojob
*j
);
605 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
606 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
607 static void waitEmptyIOJobsQueue(void);
608 static void vmReopenSwapFile(void);
609 static int vmFreePage(off_t page
);
610 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
611 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
612 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
613 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
614 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
615 static struct redisCommand
*lookupCommand(char *name
);
616 static void call(redisClient
*c
, struct redisCommand
*cmd
);
617 static void resetClient(redisClient
*c
);
618 static void convertToRealHash(robj
*o
);
619 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
620 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
621 static void freePubsubPattern(void *p
);
622 static int listMatchPubsubPattern(void *a
, void *b
);
623 static int compareStringObjects(robj
*a
, robj
*b
);
625 static int rewriteAppendOnlyFileBackground(void);
627 static void authCommand(redisClient
*c
);
628 static void pingCommand(redisClient
*c
);
629 static void echoCommand(redisClient
*c
);
630 static void setCommand(redisClient
*c
);
631 static void setnxCommand(redisClient
*c
);
632 static void setexCommand(redisClient
*c
);
633 static void getCommand(redisClient
*c
);
634 static void delCommand(redisClient
*c
);
635 static void existsCommand(redisClient
*c
);
636 static void incrCommand(redisClient
*c
);
637 static void decrCommand(redisClient
*c
);
638 static void incrbyCommand(redisClient
*c
);
639 static void decrbyCommand(redisClient
*c
);
640 static void selectCommand(redisClient
*c
);
641 static void randomkeyCommand(redisClient
*c
);
642 static void keysCommand(redisClient
*c
);
643 static void dbsizeCommand(redisClient
*c
);
644 static void lastsaveCommand(redisClient
*c
);
645 static void saveCommand(redisClient
*c
);
646 static void bgsaveCommand(redisClient
*c
);
647 static void bgrewriteaofCommand(redisClient
*c
);
648 static void shutdownCommand(redisClient
*c
);
649 static void moveCommand(redisClient
*c
);
650 static void renameCommand(redisClient
*c
);
651 static void renamenxCommand(redisClient
*c
);
652 static void lpushCommand(redisClient
*c
);
653 static void rpushCommand(redisClient
*c
);
654 static void lpopCommand(redisClient
*c
);
655 static void rpopCommand(redisClient
*c
);
656 static void llenCommand(redisClient
*c
);
657 static void lindexCommand(redisClient
*c
);
658 static void lrangeCommand(redisClient
*c
);
659 static void ltrimCommand(redisClient
*c
);
660 static void typeCommand(redisClient
*c
);
661 static void lsetCommand(redisClient
*c
);
662 static void saddCommand(redisClient
*c
);
663 static void sremCommand(redisClient
*c
);
664 static void smoveCommand(redisClient
*c
);
665 static void sismemberCommand(redisClient
*c
);
666 static void scardCommand(redisClient
*c
);
667 static void spopCommand(redisClient
*c
);
668 static void srandmemberCommand(redisClient
*c
);
669 static void sinterCommand(redisClient
*c
);
670 static void sinterstoreCommand(redisClient
*c
);
671 static void sunionCommand(redisClient
*c
);
672 static void sunionstoreCommand(redisClient
*c
);
673 static void sdiffCommand(redisClient
*c
);
674 static void sdiffstoreCommand(redisClient
*c
);
675 static void syncCommand(redisClient
*c
);
676 static void flushdbCommand(redisClient
*c
);
677 static void flushallCommand(redisClient
*c
);
678 static void sortCommand(redisClient
*c
);
679 static void lremCommand(redisClient
*c
);
680 static void rpoplpushcommand(redisClient
*c
);
681 static void infoCommand(redisClient
*c
);
682 static void mgetCommand(redisClient
*c
);
683 static void monitorCommand(redisClient
*c
);
684 static void expireCommand(redisClient
*c
);
685 static void expireatCommand(redisClient
*c
);
686 static void getsetCommand(redisClient
*c
);
687 static void ttlCommand(redisClient
*c
);
688 static void slaveofCommand(redisClient
*c
);
689 static void debugCommand(redisClient
*c
);
690 static void msetCommand(redisClient
*c
);
691 static void msetnxCommand(redisClient
*c
);
692 static void zaddCommand(redisClient
*c
);
693 static void zincrbyCommand(redisClient
*c
);
694 static void zrangeCommand(redisClient
*c
);
695 static void zrangebyscoreCommand(redisClient
*c
);
696 static void zcountCommand(redisClient
*c
);
697 static void zrevrangeCommand(redisClient
*c
);
698 static void zcardCommand(redisClient
*c
);
699 static void zremCommand(redisClient
*c
);
700 static void zscoreCommand(redisClient
*c
);
701 static void zremrangebyscoreCommand(redisClient
*c
);
702 static void multiCommand(redisClient
*c
);
703 static void execCommand(redisClient
*c
);
704 static void discardCommand(redisClient
*c
);
705 static void blpopCommand(redisClient
*c
);
706 static void brpopCommand(redisClient
*c
);
707 static void appendCommand(redisClient
*c
);
708 static void substrCommand(redisClient
*c
);
709 static void zrankCommand(redisClient
*c
);
710 static void zrevrankCommand(redisClient
*c
);
711 static void hsetCommand(redisClient
*c
);
712 static void hsetnxCommand(redisClient
*c
);
713 static void hgetCommand(redisClient
*c
);
714 static void hmsetCommand(redisClient
*c
);
715 static void hmgetCommand(redisClient
*c
);
716 static void hdelCommand(redisClient
*c
);
717 static void hlenCommand(redisClient
*c
);
718 static void zremrangebyrankCommand(redisClient
*c
);
719 static void zunionCommand(redisClient
*c
);
720 static void zinterCommand(redisClient
*c
);
721 static void hkeysCommand(redisClient
*c
);
722 static void hvalsCommand(redisClient
*c
);
723 static void hgetallCommand(redisClient
*c
);
724 static void hexistsCommand(redisClient
*c
);
725 static void configCommand(redisClient
*c
);
726 static void hincrbyCommand(redisClient
*c
);
727 static void subscribeCommand(redisClient
*c
);
728 static void unsubscribeCommand(redisClient
*c
);
729 static void psubscribeCommand(redisClient
*c
);
730 static void punsubscribeCommand(redisClient
*c
);
731 static void publishCommand(redisClient
*c
);
733 /*================================= Globals ================================= */
736 static struct redisServer server
; /* server global state */
737 static struct redisCommand cmdTable
[] = {
738 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
739 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
740 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
741 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
742 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
743 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
744 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
745 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
747 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
748 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
749 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
750 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
751 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
758 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
761 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
762 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
763 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
764 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
765 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
766 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
767 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
768 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
769 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
770 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
771 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
772 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
773 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
774 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
775 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
777 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
778 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
779 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
781 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
782 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
783 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
784 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
789 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
791 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
794 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
796 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
798 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
803 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
804 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
805 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
806 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
807 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
808 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
812 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
813 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
820 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
826 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
828 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
833 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
836 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
839 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
844 {NULL
,NULL
,0,0,NULL
,0,0,0}
847 /*============================ Utility functions ============================ */
849 /* Glob-style pattern matching. */
850 static int stringmatchlen(const char *pattern
, int patternLen
,
851 const char *string
, int stringLen
, int nocase
)
856 while (pattern
[1] == '*') {
861 return 1; /* match */
863 if (stringmatchlen(pattern
+1, patternLen
-1,
864 string
, stringLen
, nocase
))
865 return 1; /* match */
869 return 0; /* no match */
873 return 0; /* no match */
883 not = pattern
[0] == '^';
890 if (pattern
[0] == '\\') {
893 if (pattern
[0] == string
[0])
895 } else if (pattern
[0] == ']') {
897 } else if (patternLen
== 0) {
901 } else if (pattern
[1] == '-' && patternLen
>= 3) {
902 int start
= pattern
[0];
903 int end
= pattern
[2];
911 start
= tolower(start
);
917 if (c
>= start
&& c
<= end
)
921 if (pattern
[0] == string
[0])
924 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
934 return 0; /* no match */
940 if (patternLen
>= 2) {
947 if (pattern
[0] != string
[0])
948 return 0; /* no match */
950 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
951 return 0; /* no match */
959 if (stringLen
== 0) {
960 while(*pattern
== '*') {
967 if (patternLen
== 0 && stringLen
== 0)
972 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
973 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
976 /* Convert a string representing an amount of memory into the number of
977 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
980 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
982 static long long memtoll(const char *p
, int *err
) {
985 long mul
; /* unit multiplier */
990 /* Search the first non digit character. */
993 while(*u
&& isdigit(*u
)) u
++;
994 if (*u
== '\0' || !strcasecmp(u
,"b")) {
996 } else if (!strcasecmp(u
,"k")) {
998 } else if (!strcasecmp(u
,"kb")) {
1000 } else if (!strcasecmp(u
,"m")) {
1002 } else if (!strcasecmp(u
,"mb")) {
1004 } else if (!strcasecmp(u
,"g")) {
1005 mul
= 1000L*1000*1000;
1006 } else if (!strcasecmp(u
,"gb")) {
1007 mul
= 1024L*1024*1024;
1013 if (digits
>= sizeof(buf
)) {
1017 memcpy(buf
,p
,digits
);
1019 val
= strtoll(buf
,NULL
,10);
1023 static void redisLog(int level
, const char *fmt
, ...) {
1027 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1031 if (level
>= server
.verbosity
) {
1037 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1038 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1039 vfprintf(fp
, fmt
, ap
);
1045 if (server
.logfile
) fclose(fp
);
1048 /*====================== Hash table type implementation ==================== */
1050 /* This is an hash table type that uses the SDS dynamic strings libary as
1051 * keys and radis objects as values (objects can hold SDS strings,
1054 static void dictVanillaFree(void *privdata
, void *val
)
1056 DICT_NOTUSED(privdata
);
1060 static void dictListDestructor(void *privdata
, void *val
)
1062 DICT_NOTUSED(privdata
);
1063 listRelease((list
*)val
);
1066 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1070 DICT_NOTUSED(privdata
);
1072 l1
= sdslen((sds
)key1
);
1073 l2
= sdslen((sds
)key2
);
1074 if (l1
!= l2
) return 0;
1075 return memcmp(key1
, key2
, l1
) == 0;
1078 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1080 DICT_NOTUSED(privdata
);
1082 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1086 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1089 const robj
*o1
= key1
, *o2
= key2
;
1090 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1093 static unsigned int dictObjHash(const void *key
) {
1094 const robj
*o
= key
;
1095 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1098 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1101 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1104 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1105 o2
->encoding
== REDIS_ENCODING_INT
&&
1106 o1
->ptr
== o2
->ptr
) return 1;
1108 o1
= getDecodedObject(o1
);
1109 o2
= getDecodedObject(o2
);
1110 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1116 static unsigned int dictEncObjHash(const void *key
) {
1117 robj
*o
= (robj
*) key
;
1119 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1120 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1122 if (o
->encoding
== REDIS_ENCODING_INT
) {
1126 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1127 return dictGenHashFunction((unsigned char*)buf
, len
);
1131 o
= getDecodedObject(o
);
1132 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1139 /* Sets type and expires */
1140 static dictType setDictType
= {
1141 dictEncObjHash
, /* hash function */
1144 dictEncObjKeyCompare
, /* key compare */
1145 dictRedisObjectDestructor
, /* key destructor */
1146 NULL
/* val destructor */
1149 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1150 static dictType zsetDictType
= {
1151 dictEncObjHash
, /* hash function */
1154 dictEncObjKeyCompare
, /* key compare */
1155 dictRedisObjectDestructor
, /* key destructor */
1156 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1160 static dictType dbDictType
= {
1161 dictObjHash
, /* hash function */
1164 dictObjKeyCompare
, /* key compare */
1165 dictRedisObjectDestructor
, /* key destructor */
1166 dictRedisObjectDestructor
/* val destructor */
1170 static dictType keyptrDictType
= {
1171 dictObjHash
, /* hash function */
1174 dictObjKeyCompare
, /* key compare */
1175 dictRedisObjectDestructor
, /* key destructor */
1176 NULL
/* val destructor */
1179 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1180 static dictType hashDictType
= {
1181 dictEncObjHash
, /* hash function */
1184 dictEncObjKeyCompare
, /* key compare */
1185 dictRedisObjectDestructor
, /* key destructor */
1186 dictRedisObjectDestructor
/* val destructor */
1189 /* Keylist hash table type has unencoded redis objects as keys and
1190 * lists as values. It's used for blocking operations (BLPOP) and to
1191 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1192 static dictType keylistDictType
= {
1193 dictObjHash
, /* hash function */
1196 dictObjKeyCompare
, /* key compare */
1197 dictRedisObjectDestructor
, /* key destructor */
1198 dictListDestructor
/* val destructor */
1201 static void version();
1203 /* ========================= Random utility functions ======================= */
1205 /* Redis generally does not try to recover from out of memory conditions
1206 * when allocating objects or strings, it is not clear if it will be possible
1207 * to report this condition to the client since the networking layer itself
1208 * is based on heap allocation for send buffers, so we simply abort.
1209 * At least the code will be simpler to read... */
1210 static void oom(const char *msg
) {
1211 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1216 /* ====================== Redis server networking stuff ===================== */
1217 static void closeTimedoutClients(void) {
1220 time_t now
= time(NULL
);
1223 listRewind(server
.clients
,&li
);
1224 while ((ln
= listNext(&li
)) != NULL
) {
1225 c
= listNodeValue(ln
);
1226 if (server
.maxidletime
&&
1227 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1228 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1229 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1230 listLength(c
->pubsub_patterns
) == 0 &&
1231 (now
- c
->lastinteraction
> server
.maxidletime
))
1233 redisLog(REDIS_VERBOSE
,"Closing idle client");
1235 } else if (c
->flags
& REDIS_BLOCKED
) {
1236 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1237 addReply(c
,shared
.nullmultibulk
);
1238 unblockClientWaitingData(c
);
1244 static int htNeedsResize(dict
*dict
) {
1245 long long size
, used
;
1247 size
= dictSlots(dict
);
1248 used
= dictSize(dict
);
1249 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1250 (used
*100/size
< REDIS_HT_MINFILL
));
1253 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1254 * we resize the hash table to save memory */
1255 static void tryResizeHashTables(void) {
1258 for (j
= 0; j
< server
.dbnum
; j
++) {
1259 if (htNeedsResize(server
.db
[j
].dict
))
1260 dictResize(server
.db
[j
].dict
);
1261 if (htNeedsResize(server
.db
[j
].expires
))
1262 dictResize(server
.db
[j
].expires
);
1266 /* Our hash table implementation performs rehashing incrementally while
1267 * we write/read from the hash table. Still if the server is idle, the hash
1268 * table will use two tables for a long time. So we try to use 1 millisecond
1269 * of CPU time at every serverCron() loop in order to rehash some key. */
1270 static void incrementallyRehash(void) {
1273 for (j
= 0; j
< server
.dbnum
; j
++) {
1274 if (dictIsRehashing(server
.db
[j
].dict
)) {
1275 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1276 break; /* already used our millisecond for this loop... */
1281 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1282 void backgroundSaveDoneHandler(int statloc
) {
1283 int exitcode
= WEXITSTATUS(statloc
);
1284 int bysignal
= WIFSIGNALED(statloc
);
1286 if (!bysignal
&& exitcode
== 0) {
1287 redisLog(REDIS_NOTICE
,
1288 "Background saving terminated with success");
1290 server
.lastsave
= time(NULL
);
1291 } else if (!bysignal
&& exitcode
!= 0) {
1292 redisLog(REDIS_WARNING
, "Background saving error");
1294 redisLog(REDIS_WARNING
,
1295 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1296 rdbRemoveTempFile(server
.bgsavechildpid
);
1298 server
.bgsavechildpid
= -1;
1299 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1300 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1301 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1304 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1306 void backgroundRewriteDoneHandler(int statloc
) {
1307 int exitcode
= WEXITSTATUS(statloc
);
1308 int bysignal
= WIFSIGNALED(statloc
);
1310 if (!bysignal
&& exitcode
== 0) {
1314 redisLog(REDIS_NOTICE
,
1315 "Background append only file rewriting terminated with success");
1316 /* Now it's time to flush the differences accumulated by the parent */
1317 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1318 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1320 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1323 /* Flush our data... */
1324 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1325 (signed) sdslen(server
.bgrewritebuf
)) {
1326 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1330 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1331 /* Now our work is to rename the temp file into the stable file. And
1332 * switch the file descriptor used by the server for append only. */
1333 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1334 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1338 /* Mission completed... almost */
1339 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1340 if (server
.appendfd
!= -1) {
1341 /* If append only is actually enabled... */
1342 close(server
.appendfd
);
1343 server
.appendfd
= fd
;
1345 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1346 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1348 /* If append only is disabled we just generate a dump in this
1349 * format. Why not? */
1352 } else if (!bysignal
&& exitcode
!= 0) {
1353 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1355 redisLog(REDIS_WARNING
,
1356 "Background append only file rewriting terminated by signal %d",
1360 sdsfree(server
.bgrewritebuf
);
1361 server
.bgrewritebuf
= sdsempty();
1362 aofRemoveTempFile(server
.bgrewritechildpid
);
1363 server
.bgrewritechildpid
= -1;
1366 /* This function is called once a background process of some kind terminates,
1367 * as we want to avoid resizing the hash tables when there is a child in order
1368 * to play well with copy-on-write (otherwise when a resize happens lots of
1369 * memory pages are copied). The goal of this function is to update the ability
1370 * for dict.c to resize the hash tables accordingly to the fact we have o not
1371 * running childs. */
1372 static void updateDictResizePolicy(void) {
1373 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1376 dictDisableResize();
1379 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1380 int j
, loops
= server
.cronloops
++;
1381 REDIS_NOTUSED(eventLoop
);
1383 REDIS_NOTUSED(clientData
);
1385 /* We take a cached value of the unix time in the global state because
1386 * with virtual memory and aging there is to store the current time
1387 * in objects at every object access, and accuracy is not needed.
1388 * To access a global var is faster than calling time(NULL) */
1389 server
.unixtime
= time(NULL
);
1391 /* Show some info about non-empty databases */
1392 for (j
= 0; j
< server
.dbnum
; j
++) {
1393 long long size
, used
, vkeys
;
1395 size
= dictSlots(server
.db
[j
].dict
);
1396 used
= dictSize(server
.db
[j
].dict
);
1397 vkeys
= dictSize(server
.db
[j
].expires
);
1398 if (!(loops
% 50) && (used
|| vkeys
)) {
1399 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1400 /* dictPrintStats(server.dict); */
1404 /* We don't want to resize the hash tables while a bacground saving
1405 * is in progress: the saving child is created using fork() that is
1406 * implemented with a copy-on-write semantic in most modern systems, so
1407 * if we resize the HT while there is the saving child at work actually
1408 * a lot of memory movements in the parent will cause a lot of pages
1410 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1411 if (!(loops
% 10)) tryResizeHashTables();
1412 if (server
.activerehashing
) incrementallyRehash();
1415 /* Show information about connected clients */
1416 if (!(loops
% 50)) {
1417 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1418 listLength(server
.clients
)-listLength(server
.slaves
),
1419 listLength(server
.slaves
),
1420 zmalloc_used_memory());
1423 /* Close connections of timedout clients */
1424 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1425 closeTimedoutClients();
1427 /* Check if a background saving or AOF rewrite in progress terminated */
1428 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1432 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1433 if (pid
== server
.bgsavechildpid
) {
1434 backgroundSaveDoneHandler(statloc
);
1436 backgroundRewriteDoneHandler(statloc
);
1438 updateDictResizePolicy();
1441 /* If there is not a background saving in progress check if
1442 * we have to save now */
1443 time_t now
= time(NULL
);
1444 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1445 struct saveparam
*sp
= server
.saveparams
+j
;
1447 if (server
.dirty
>= sp
->changes
&&
1448 now
-server
.lastsave
> sp
->seconds
) {
1449 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1450 sp
->changes
, sp
->seconds
);
1451 rdbSaveBackground(server
.dbfilename
);
1457 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1458 * will use few CPU cycles if there are few expiring keys, otherwise
1459 * it will get more aggressive to avoid that too much memory is used by
1460 * keys that can be removed from the keyspace. */
1461 for (j
= 0; j
< server
.dbnum
; j
++) {
1463 redisDb
*db
= server
.db
+j
;
1465 /* Continue to expire if at the end of the cycle more than 25%
1466 * of the keys were expired. */
1468 long num
= dictSize(db
->expires
);
1469 time_t now
= time(NULL
);
1472 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1473 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1478 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1479 t
= (time_t) dictGetEntryVal(de
);
1481 deleteKey(db
,dictGetEntryKey(de
));
1483 server
.stat_expiredkeys
++;
1486 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1489 /* Swap a few keys on disk if we are over the memory limit and VM
1490 * is enbled. Try to free objects from the free list first. */
1491 if (vmCanSwapOut()) {
1492 while (server
.vm_enabled
&& zmalloc_used_memory() >
1493 server
.vm_max_memory
)
1497 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1498 retval
= (server
.vm_max_threads
== 0) ?
1499 vmSwapOneObjectBlocking() :
1500 vmSwapOneObjectThreaded();
1501 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1502 zmalloc_used_memory() >
1503 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1505 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1507 /* Note that when using threade I/O we free just one object,
1508 * because anyway when the I/O thread in charge to swap this
1509 * object out will finish, the handler of completed jobs
1510 * will try to swap more objects if we are still out of memory. */
1511 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1515 /* Check if we should connect to a MASTER */
1516 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1517 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1518 if (syncWithMaster() == REDIS_OK
) {
1519 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1520 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1526 /* This function gets called every time Redis is entering the
1527 * main loop of the event driven library, that is, before to sleep
1528 * for ready file descriptors. */
1529 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1530 REDIS_NOTUSED(eventLoop
);
1532 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1536 listRewind(server
.io_ready_clients
,&li
);
1537 while((ln
= listNext(&li
))) {
1538 redisClient
*c
= ln
->value
;
1539 struct redisCommand
*cmd
;
1541 /* Resume the client. */
1542 listDelNode(server
.io_ready_clients
,ln
);
1543 c
->flags
&= (~REDIS_IO_WAIT
);
1544 server
.vm_blocked_clients
--;
1545 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1546 readQueryFromClient
, c
);
1547 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1548 assert(cmd
!= NULL
);
1551 /* There may be more data to process in the input buffer. */
1552 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1553 processInputBuffer(c
);
1558 static void createSharedObjects(void) {
1561 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1562 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1563 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1564 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1565 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1566 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1567 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1568 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1569 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1570 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1571 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1572 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1573 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1574 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1575 "-ERR no such key\r\n"));
1576 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1577 "-ERR syntax error\r\n"));
1578 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1579 "-ERR source and destination objects are the same\r\n"));
1580 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1581 "-ERR index out of range\r\n"));
1582 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1583 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1584 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1585 shared
.select0
= createStringObject("select 0\r\n",10);
1586 shared
.select1
= createStringObject("select 1\r\n",10);
1587 shared
.select2
= createStringObject("select 2\r\n",10);
1588 shared
.select3
= createStringObject("select 3\r\n",10);
1589 shared
.select4
= createStringObject("select 4\r\n",10);
1590 shared
.select5
= createStringObject("select 5\r\n",10);
1591 shared
.select6
= createStringObject("select 6\r\n",10);
1592 shared
.select7
= createStringObject("select 7\r\n",10);
1593 shared
.select8
= createStringObject("select 8\r\n",10);
1594 shared
.select9
= createStringObject("select 9\r\n",10);
1595 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1596 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1597 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1598 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1599 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1600 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1601 shared
.mbulk3
= createStringObject("*3\r\n",4);
1602 shared
.mbulk4
= createStringObject("*4\r\n",4);
1603 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1604 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1605 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1609 static void appendServerSaveParams(time_t seconds
, int changes
) {
1610 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1611 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1612 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1613 server
.saveparamslen
++;
1616 static void resetServerSaveParams() {
1617 zfree(server
.saveparams
);
1618 server
.saveparams
= NULL
;
1619 server
.saveparamslen
= 0;
1622 static void initServerConfig() {
1623 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1624 server
.port
= REDIS_SERVERPORT
;
1625 server
.verbosity
= REDIS_VERBOSE
;
1626 server
.maxidletime
= REDIS_MAXIDLETIME
;
1627 server
.saveparams
= NULL
;
1628 server
.logfile
= NULL
; /* NULL = log on standard output */
1629 server
.bindaddr
= NULL
;
1630 server
.glueoutputbuf
= 1;
1631 server
.daemonize
= 0;
1632 server
.appendonly
= 0;
1633 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1634 server
.lastfsync
= time(NULL
);
1635 server
.appendfd
= -1;
1636 server
.appendseldb
= -1; /* Make sure the first time will not match */
1637 server
.pidfile
= zstrdup("/var/run/redis.pid");
1638 server
.dbfilename
= zstrdup("dump.rdb");
1639 server
.appendfilename
= zstrdup("appendonly.aof");
1640 server
.requirepass
= NULL
;
1641 server
.rdbcompression
= 1;
1642 server
.activerehashing
= 1;
1643 server
.maxclients
= 0;
1644 server
.blpop_blocked_clients
= 0;
1645 server
.maxmemory
= 0;
1646 server
.vm_enabled
= 0;
1647 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1648 server
.vm_page_size
= 256; /* 256 bytes per page */
1649 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1650 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1651 server
.vm_max_threads
= 4;
1652 server
.vm_blocked_clients
= 0;
1653 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1654 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1656 resetServerSaveParams();
1658 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1659 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1660 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1661 /* Replication related */
1663 server
.masterauth
= NULL
;
1664 server
.masterhost
= NULL
;
1665 server
.masterport
= 6379;
1666 server
.master
= NULL
;
1667 server
.replstate
= REDIS_REPL_NONE
;
1669 /* Double constants initialization */
1671 R_PosInf
= 1.0/R_Zero
;
1672 R_NegInf
= -1.0/R_Zero
;
1673 R_Nan
= R_Zero
/R_Zero
;
1676 static void initServer() {
1679 signal(SIGHUP
, SIG_IGN
);
1680 signal(SIGPIPE
, SIG_IGN
);
1681 setupSigSegvAction();
1683 server
.devnull
= fopen("/dev/null","w");
1684 if (server
.devnull
== NULL
) {
1685 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1688 server
.clients
= listCreate();
1689 server
.slaves
= listCreate();
1690 server
.monitors
= listCreate();
1691 server
.objfreelist
= listCreate();
1692 createSharedObjects();
1693 server
.el
= aeCreateEventLoop();
1694 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1695 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1696 if (server
.fd
== -1) {
1697 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1700 for (j
= 0; j
< server
.dbnum
; j
++) {
1701 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1702 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1703 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1704 if (server
.vm_enabled
)
1705 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1706 server
.db
[j
].id
= j
;
1708 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1709 server
.pubsub_patterns
= listCreate();
1710 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1711 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1712 server
.cronloops
= 0;
1713 server
.bgsavechildpid
= -1;
1714 server
.bgrewritechildpid
= -1;
1715 server
.bgrewritebuf
= sdsempty();
1716 server
.lastsave
= time(NULL
);
1718 server
.stat_numcommands
= 0;
1719 server
.stat_numconnections
= 0;
1720 server
.stat_expiredkeys
= 0;
1721 server
.stat_starttime
= time(NULL
);
1722 server
.unixtime
= time(NULL
);
1723 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1724 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1725 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1727 if (server
.appendonly
) {
1728 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1729 if (server
.appendfd
== -1) {
1730 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1736 if (server
.vm_enabled
) vmInit();
1739 /* Empty the whole database */
1740 static long long emptyDb() {
1742 long long removed
= 0;
1744 for (j
= 0; j
< server
.dbnum
; j
++) {
1745 removed
+= dictSize(server
.db
[j
].dict
);
1746 dictEmpty(server
.db
[j
].dict
);
1747 dictEmpty(server
.db
[j
].expires
);
1752 static int yesnotoi(char *s
) {
1753 if (!strcasecmp(s
,"yes")) return 1;
1754 else if (!strcasecmp(s
,"no")) return 0;
1758 /* I agree, this is a very rudimental way to load a configuration...
1759 will improve later if the config gets more complex */
1760 static void loadServerConfig(char *filename
) {
1762 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1766 if (filename
[0] == '-' && filename
[1] == '\0')
1769 if ((fp
= fopen(filename
,"r")) == NULL
) {
1770 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1775 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1781 line
= sdstrim(line
," \t\r\n");
1783 /* Skip comments and blank lines*/
1784 if (line
[0] == '#' || line
[0] == '\0') {
1789 /* Split into arguments */
1790 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1791 sdstolower(argv
[0]);
1793 /* Execute config directives */
1794 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1795 server
.maxidletime
= atoi(argv
[1]);
1796 if (server
.maxidletime
< 0) {
1797 err
= "Invalid timeout value"; goto loaderr
;
1799 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1800 server
.port
= atoi(argv
[1]);
1801 if (server
.port
< 1 || server
.port
> 65535) {
1802 err
= "Invalid port"; goto loaderr
;
1804 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1805 server
.bindaddr
= zstrdup(argv
[1]);
1806 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1807 int seconds
= atoi(argv
[1]);
1808 int changes
= atoi(argv
[2]);
1809 if (seconds
< 1 || changes
< 0) {
1810 err
= "Invalid save parameters"; goto loaderr
;
1812 appendServerSaveParams(seconds
,changes
);
1813 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1814 if (chdir(argv
[1]) == -1) {
1815 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1816 argv
[1], strerror(errno
));
1819 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1820 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1821 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1822 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1823 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1825 err
= "Invalid log level. Must be one of debug, notice, warning";
1828 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1831 server
.logfile
= zstrdup(argv
[1]);
1832 if (!strcasecmp(server
.logfile
,"stdout")) {
1833 zfree(server
.logfile
);
1834 server
.logfile
= NULL
;
1836 if (server
.logfile
) {
1837 /* Test if we are able to open the file. The server will not
1838 * be able to abort just for this problem later... */
1839 logfp
= fopen(server
.logfile
,"a");
1840 if (logfp
== NULL
) {
1841 err
= sdscatprintf(sdsempty(),
1842 "Can't open the log file: %s", strerror(errno
));
1847 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1848 server
.dbnum
= atoi(argv
[1]);
1849 if (server
.dbnum
< 1) {
1850 err
= "Invalid number of databases"; goto loaderr
;
1852 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1853 loadServerConfig(argv
[1]);
1854 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1855 server
.maxclients
= atoi(argv
[1]);
1856 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1857 server
.maxmemory
= memtoll(argv
[1],NULL
);
1858 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1859 server
.masterhost
= sdsnew(argv
[1]);
1860 server
.masterport
= atoi(argv
[2]);
1861 server
.replstate
= REDIS_REPL_CONNECT
;
1862 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1863 server
.masterauth
= zstrdup(argv
[1]);
1864 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1865 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1866 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1868 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1869 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1870 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1872 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1873 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1874 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1876 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1877 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1878 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1880 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1881 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1882 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1884 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1885 if (!strcasecmp(argv
[1],"no")) {
1886 server
.appendfsync
= APPENDFSYNC_NO
;
1887 } else if (!strcasecmp(argv
[1],"always")) {
1888 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1889 } else if (!strcasecmp(argv
[1],"everysec")) {
1890 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1892 err
= "argument must be 'no', 'always' or 'everysec'";
1895 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1896 server
.requirepass
= zstrdup(argv
[1]);
1897 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1898 zfree(server
.pidfile
);
1899 server
.pidfile
= zstrdup(argv
[1]);
1900 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1901 zfree(server
.dbfilename
);
1902 server
.dbfilename
= zstrdup(argv
[1]);
1903 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1904 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1905 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1907 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1908 zfree(server
.vm_swap_file
);
1909 server
.vm_swap_file
= zstrdup(argv
[1]);
1910 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1911 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1912 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1913 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1914 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1915 server
.vm_pages
= memtoll(argv
[1], NULL
);
1916 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1917 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1918 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1919 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1920 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1921 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1923 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1925 for (j
= 0; j
< argc
; j
++)
1930 if (fp
!= stdin
) fclose(fp
);
1934 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1935 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1936 fprintf(stderr
, ">>> '%s'\n", line
);
1937 fprintf(stderr
, "%s\n", err
);
1941 static void freeClientArgv(redisClient
*c
) {
1944 for (j
= 0; j
< c
->argc
; j
++)
1945 decrRefCount(c
->argv
[j
]);
1946 for (j
= 0; j
< c
->mbargc
; j
++)
1947 decrRefCount(c
->mbargv
[j
]);
1952 static void freeClient(redisClient
*c
) {
1955 /* Note that if the client we are freeing is blocked into a blocking
1956 * call, we have to set querybuf to NULL *before* to call
1957 * unblockClientWaitingData() to avoid processInputBuffer() will get
1958 * called. Also it is important to remove the file events after
1959 * this, because this call adds the READABLE event. */
1960 sdsfree(c
->querybuf
);
1962 if (c
->flags
& REDIS_BLOCKED
)
1963 unblockClientWaitingData(c
);
1965 /* Unsubscribe from all the pubsub channels */
1966 pubsubUnsubscribeAllChannels(c
,0);
1967 pubsubUnsubscribeAllPatterns(c
,0);
1968 dictRelease(c
->pubsub_channels
);
1969 listRelease(c
->pubsub_patterns
);
1970 /* Obvious cleanup */
1971 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1972 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1973 listRelease(c
->reply
);
1976 /* Remove from the list of clients */
1977 ln
= listSearchKey(server
.clients
,c
);
1978 redisAssert(ln
!= NULL
);
1979 listDelNode(server
.clients
,ln
);
1980 /* Remove from the list of clients waiting for swapped keys */
1981 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1982 ln
= listSearchKey(server
.io_ready_clients
,c
);
1984 listDelNode(server
.io_ready_clients
,ln
);
1985 server
.vm_blocked_clients
--;
1988 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1989 ln
= listFirst(c
->io_keys
);
1990 dontWaitForSwappedKey(c
,ln
->value
);
1992 listRelease(c
->io_keys
);
1993 /* Master/slave cleanup */
1994 if (c
->flags
& REDIS_SLAVE
) {
1995 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1997 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1998 ln
= listSearchKey(l
,c
);
1999 redisAssert(ln
!= NULL
);
2002 if (c
->flags
& REDIS_MASTER
) {
2003 server
.master
= NULL
;
2004 server
.replstate
= REDIS_REPL_CONNECT
;
2006 /* Release memory */
2009 freeClientMultiState(c
);
2013 #define GLUEREPLY_UP_TO (1024)
2014 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2016 char buf
[GLUEREPLY_UP_TO
];
2021 listRewind(c
->reply
,&li
);
2022 while((ln
= listNext(&li
))) {
2026 objlen
= sdslen(o
->ptr
);
2027 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2028 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2030 listDelNode(c
->reply
,ln
);
2032 if (copylen
== 0) return;
2036 /* Now the output buffer is empty, add the new single element */
2037 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2038 listAddNodeHead(c
->reply
,o
);
2041 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2042 redisClient
*c
= privdata
;
2043 int nwritten
= 0, totwritten
= 0, objlen
;
2046 REDIS_NOTUSED(mask
);
2048 /* Use writev() if we have enough buffers to send */
2049 if (!server
.glueoutputbuf
&&
2050 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2051 !(c
->flags
& REDIS_MASTER
))
2053 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2057 while(listLength(c
->reply
)) {
2058 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2059 glueReplyBuffersIfNeeded(c
);
2061 o
= listNodeValue(listFirst(c
->reply
));
2062 objlen
= sdslen(o
->ptr
);
2065 listDelNode(c
->reply
,listFirst(c
->reply
));
2069 if (c
->flags
& REDIS_MASTER
) {
2070 /* Don't reply to a master */
2071 nwritten
= objlen
- c
->sentlen
;
2073 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2074 if (nwritten
<= 0) break;
2076 c
->sentlen
+= nwritten
;
2077 totwritten
+= nwritten
;
2078 /* If we fully sent the object on head go to the next one */
2079 if (c
->sentlen
== objlen
) {
2080 listDelNode(c
->reply
,listFirst(c
->reply
));
2083 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2084 * bytes, in a single threaded server it's a good idea to serve
2085 * other clients as well, even if a very large request comes from
2086 * super fast link that is always able to accept data (in real world
2087 * scenario think about 'KEYS *' against the loopback interfae) */
2088 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2090 if (nwritten
== -1) {
2091 if (errno
== EAGAIN
) {
2094 redisLog(REDIS_VERBOSE
,
2095 "Error writing to client: %s", strerror(errno
));
2100 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2101 if (listLength(c
->reply
) == 0) {
2103 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2107 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2109 redisClient
*c
= privdata
;
2110 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2112 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2113 int offset
, ion
= 0;
2115 REDIS_NOTUSED(mask
);
2118 while (listLength(c
->reply
)) {
2119 offset
= c
->sentlen
;
2123 /* fill-in the iov[] array */
2124 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2125 o
= listNodeValue(node
);
2126 objlen
= sdslen(o
->ptr
);
2128 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2131 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2132 break; /* no more iovecs */
2134 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2135 iov
[ion
].iov_len
= objlen
- offset
;
2136 willwrite
+= objlen
- offset
;
2137 offset
= 0; /* just for the first item */
2144 /* write all collected blocks at once */
2145 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2146 if (errno
!= EAGAIN
) {
2147 redisLog(REDIS_VERBOSE
,
2148 "Error writing to client: %s", strerror(errno
));
2155 totwritten
+= nwritten
;
2156 offset
= c
->sentlen
;
2158 /* remove written robjs from c->reply */
2159 while (nwritten
&& listLength(c
->reply
)) {
2160 o
= listNodeValue(listFirst(c
->reply
));
2161 objlen
= sdslen(o
->ptr
);
2163 if(nwritten
>= objlen
- offset
) {
2164 listDelNode(c
->reply
, listFirst(c
->reply
));
2165 nwritten
-= objlen
- offset
;
2169 c
->sentlen
+= nwritten
;
2177 c
->lastinteraction
= time(NULL
);
2179 if (listLength(c
->reply
) == 0) {
2181 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2185 static struct redisCommand
*lookupCommand(char *name
) {
2187 while(cmdTable
[j
].name
!= NULL
) {
2188 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2194 /* resetClient prepare the client to process the next command */
2195 static void resetClient(redisClient
*c
) {
2201 /* Call() is the core of Redis execution of a command */
2202 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2205 dirty
= server
.dirty
;
2207 dirty
= server
.dirty
-dirty
;
2209 if (server
.appendonly
&& dirty
)
2210 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2211 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2212 listLength(server
.slaves
))
2213 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2214 if (listLength(server
.monitors
))
2215 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2216 server
.stat_numcommands
++;
2219 /* If this function gets called we already read a whole
2220 * command, argments are in the client argv/argc fields.
2221 * processCommand() execute the command or prepare the
2222 * server for a bulk read from the client.
2224 * If 1 is returned the client is still alive and valid and
2225 * and other operations can be performed by the caller. Otherwise
2226 * if 0 is returned the client was destroied (i.e. after QUIT). */
2227 static int processCommand(redisClient
*c
) {
2228 struct redisCommand
*cmd
;
2230 /* Free some memory if needed (maxmemory setting) */
2231 if (server
.maxmemory
) freeMemoryIfNeeded();
2233 /* Handle the multi bulk command type. This is an alternative protocol
2234 * supported by Redis in order to receive commands that are composed of
2235 * multiple binary-safe "bulk" arguments. The latency of processing is
2236 * a bit higher but this allows things like multi-sets, so if this
2237 * protocol is used only for MSET and similar commands this is a big win. */
2238 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2239 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2240 if (c
->multibulk
<= 0) {
2244 decrRefCount(c
->argv
[c
->argc
-1]);
2248 } else if (c
->multibulk
) {
2249 if (c
->bulklen
== -1) {
2250 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2251 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2255 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2256 decrRefCount(c
->argv
[0]);
2257 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2259 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2264 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2268 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2269 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2273 if (c
->multibulk
== 0) {
2277 /* Here we need to swap the multi-bulk argc/argv with the
2278 * normal argc/argv of the client structure. */
2280 c
->argv
= c
->mbargv
;
2281 c
->mbargv
= auxargv
;
2284 c
->argc
= c
->mbargc
;
2285 c
->mbargc
= auxargc
;
2287 /* We need to set bulklen to something different than -1
2288 * in order for the code below to process the command without
2289 * to try to read the last argument of a bulk command as
2290 * a special argument. */
2292 /* continue below and process the command */
2299 /* -- end of multi bulk commands processing -- */
2301 /* The QUIT command is handled as a special case. Normal command
2302 * procs are unable to close the client connection safely */
2303 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2308 /* Now lookup the command and check ASAP about trivial error conditions
2309 * such wrong arity, bad command name and so forth. */
2310 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2313 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2314 (char*)c
->argv
[0]->ptr
));
2317 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2318 (c
->argc
< -cmd
->arity
)) {
2320 sdscatprintf(sdsempty(),
2321 "-ERR wrong number of arguments for '%s' command\r\n",
2325 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2326 /* This is a bulk command, we have to read the last argument yet. */
2327 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2329 decrRefCount(c
->argv
[c
->argc
-1]);
2330 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2332 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2337 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2338 /* It is possible that the bulk read is already in the
2339 * buffer. Check this condition and handle it accordingly.
2340 * This is just a fast path, alternative to call processInputBuffer().
2341 * It's a good idea since the code is small and this condition
2342 * happens most of the times. */
2343 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2344 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2346 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2348 /* Otherwise return... there is to read the last argument
2349 * from the socket. */
2353 /* Let's try to encode the bulk object to save space. */
2354 if (cmd
->flags
& REDIS_CMD_BULK
)
2355 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2357 /* Check if the user is authenticated */
2358 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2359 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2364 /* Handle the maxmemory directive */
2365 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2366 zmalloc_used_memory() > server
.maxmemory
)
2368 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2373 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2374 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2376 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2377 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2378 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2383 /* Exec the command */
2384 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2385 queueMultiCommand(c
,cmd
);
2386 addReply(c
,shared
.queued
);
2388 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2389 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2393 /* Prepare the client for the next command */
2398 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2403 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2404 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2405 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2406 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2409 if (argc
<= REDIS_STATIC_ARGS
) {
2412 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2415 lenobj
= createObject(REDIS_STRING
,
2416 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2417 lenobj
->refcount
= 0;
2418 outv
[outc
++] = lenobj
;
2419 for (j
= 0; j
< argc
; j
++) {
2420 lenobj
= createObject(REDIS_STRING
,
2421 sdscatprintf(sdsempty(),"$%lu\r\n",
2422 (unsigned long) stringObjectLen(argv
[j
])));
2423 lenobj
->refcount
= 0;
2424 outv
[outc
++] = lenobj
;
2425 outv
[outc
++] = argv
[j
];
2426 outv
[outc
++] = shared
.crlf
;
2429 /* Increment all the refcounts at start and decrement at end in order to
2430 * be sure to free objects if there is no slave in a replication state
2431 * able to be feed with commands */
2432 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2433 listRewind(slaves
,&li
);
2434 while((ln
= listNext(&li
))) {
2435 redisClient
*slave
= ln
->value
;
2437 /* Don't feed slaves that are still waiting for BGSAVE to start */
2438 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2440 /* Feed all the other slaves, MONITORs and so on */
2441 if (slave
->slaveseldb
!= dictid
) {
2445 case 0: selectcmd
= shared
.select0
; break;
2446 case 1: selectcmd
= shared
.select1
; break;
2447 case 2: selectcmd
= shared
.select2
; break;
2448 case 3: selectcmd
= shared
.select3
; break;
2449 case 4: selectcmd
= shared
.select4
; break;
2450 case 5: selectcmd
= shared
.select5
; break;
2451 case 6: selectcmd
= shared
.select6
; break;
2452 case 7: selectcmd
= shared
.select7
; break;
2453 case 8: selectcmd
= shared
.select8
; break;
2454 case 9: selectcmd
= shared
.select9
; break;
2456 selectcmd
= createObject(REDIS_STRING
,
2457 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2458 selectcmd
->refcount
= 0;
2461 addReply(slave
,selectcmd
);
2462 slave
->slaveseldb
= dictid
;
2464 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2466 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2467 if (outv
!= static_outv
) zfree(outv
);
2470 static void processInputBuffer(redisClient
*c
) {
2472 /* Before to process the input buffer, make sure the client is not
2473 * waitig for a blocking operation such as BLPOP. Note that the first
2474 * iteration the client is never blocked, otherwise the processInputBuffer
2475 * would not be called at all, but after the execution of the first commands
2476 * in the input buffer the client may be blocked, and the "goto again"
2477 * will try to reiterate. The following line will make it return asap. */
2478 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2479 if (c
->bulklen
== -1) {
2480 /* Read the first line of the query */
2481 char *p
= strchr(c
->querybuf
,'\n');
2488 query
= c
->querybuf
;
2489 c
->querybuf
= sdsempty();
2490 querylen
= 1+(p
-(query
));
2491 if (sdslen(query
) > querylen
) {
2492 /* leave data after the first line of the query in the buffer */
2493 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2495 *p
= '\0'; /* remove "\n" */
2496 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2497 sdsupdatelen(query
);
2499 /* Now we can split the query in arguments */
2500 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2503 if (c
->argv
) zfree(c
->argv
);
2504 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2506 for (j
= 0; j
< argc
; j
++) {
2507 if (sdslen(argv
[j
])) {
2508 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2516 /* Execute the command. If the client is still valid
2517 * after processCommand() return and there is something
2518 * on the query buffer try to process the next command. */
2519 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2521 /* Nothing to process, argc == 0. Just process the query
2522 * buffer if it's not empty or return to the caller */
2523 if (sdslen(c
->querybuf
)) goto again
;
2526 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2527 redisLog(REDIS_VERBOSE
, "Client protocol error");
2532 /* Bulk read handling. Note that if we are at this point
2533 the client already sent a command terminated with a newline,
2534 we are reading the bulk data that is actually the last
2535 argument of the command. */
2536 int qbl
= sdslen(c
->querybuf
);
2538 if (c
->bulklen
<= qbl
) {
2539 /* Copy everything but the final CRLF as final argument */
2540 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2542 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2543 /* Process the command. If the client is still valid after
2544 * the processing and there is more data in the buffer
2545 * try to parse it. */
2546 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2552 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2553 redisClient
*c
= (redisClient
*) privdata
;
2554 char buf
[REDIS_IOBUF_LEN
];
2557 REDIS_NOTUSED(mask
);
2559 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2561 if (errno
== EAGAIN
) {
2564 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2568 } else if (nread
== 0) {
2569 redisLog(REDIS_VERBOSE
, "Client closed connection");
2574 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2575 c
->lastinteraction
= time(NULL
);
2579 processInputBuffer(c
);
2582 static int selectDb(redisClient
*c
, int id
) {
2583 if (id
< 0 || id
>= server
.dbnum
)
2585 c
->db
= &server
.db
[id
];
2589 static void *dupClientReplyValue(void *o
) {
2590 incrRefCount((robj
*)o
);
2594 static int listMatchObjects(void *a
, void *b
) {
2595 return compareStringObjects(a
,b
) == 0;
2598 static redisClient
*createClient(int fd
) {
2599 redisClient
*c
= zmalloc(sizeof(*c
));
2601 anetNonBlock(NULL
,fd
);
2602 anetTcpNoDelay(NULL
,fd
);
2603 if (!c
) return NULL
;
2606 c
->querybuf
= sdsempty();
2615 c
->lastinteraction
= time(NULL
);
2616 c
->authenticated
= 0;
2617 c
->replstate
= REDIS_REPL_NONE
;
2618 c
->reply
= listCreate();
2619 listSetFreeMethod(c
->reply
,decrRefCount
);
2620 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2621 c
->blockingkeys
= NULL
;
2622 c
->blockingkeysnum
= 0;
2623 c
->io_keys
= listCreate();
2624 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2625 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2626 c
->pubsub_patterns
= listCreate();
2627 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2628 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2629 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2630 readQueryFromClient
, c
) == AE_ERR
) {
2634 listAddNodeTail(server
.clients
,c
);
2635 initClientMultiState(c
);
2639 static void addReply(redisClient
*c
, robj
*obj
) {
2640 if (listLength(c
->reply
) == 0 &&
2641 (c
->replstate
== REDIS_REPL_NONE
||
2642 c
->replstate
== REDIS_REPL_ONLINE
) &&
2643 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2644 sendReplyToClient
, c
) == AE_ERR
) return;
2646 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2647 obj
= dupStringObject(obj
);
2648 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2650 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2653 static void addReplySds(redisClient
*c
, sds s
) {
2654 robj
*o
= createObject(REDIS_STRING
,s
);
2659 static void addReplyDouble(redisClient
*c
, double d
) {
2662 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2663 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2664 (unsigned long) strlen(buf
),buf
));
2667 static void addReplyLong(redisClient
*c
, long l
) {
2672 addReply(c
,shared
.czero
);
2674 } else if (l
== 1) {
2675 addReply(c
,shared
.cone
);
2678 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2679 addReplySds(c
,sdsnewlen(buf
,len
));
2682 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2687 addReply(c
,shared
.czero
);
2689 } else if (ll
== 1) {
2690 addReply(c
,shared
.cone
);
2693 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2694 addReplySds(c
,sdsnewlen(buf
,len
));
2697 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2702 addReply(c
,shared
.czero
);
2704 } else if (ul
== 1) {
2705 addReply(c
,shared
.cone
);
2708 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2709 addReplySds(c
,sdsnewlen(buf
,len
));
2712 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2715 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2716 len
= sdslen(obj
->ptr
);
2718 long n
= (long)obj
->ptr
;
2720 /* Compute how many bytes will take this integer as a radix 10 string */
2726 while((n
= n
/10) != 0) {
2730 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2733 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2734 addReplyBulkLen(c
,obj
);
2736 addReply(c
,shared
.crlf
);
2739 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2740 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2742 addReply(c
,shared
.nullbulk
);
2744 robj
*o
= createStringObject(s
,strlen(s
));
2750 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2755 REDIS_NOTUSED(mask
);
2756 REDIS_NOTUSED(privdata
);
2758 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2759 if (cfd
== AE_ERR
) {
2760 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2763 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2764 if ((c
= createClient(cfd
)) == NULL
) {
2765 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2766 close(cfd
); /* May be already closed, just ingore errors */
2769 /* If maxclient directive is set and this is one client more... close the
2770 * connection. Note that we create the client instead to check before
2771 * for this condition, since now the socket is already set in nonblocking
2772 * mode and we can send an error for free using the Kernel I/O */
2773 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2774 char *err
= "-ERR max number of clients reached\r\n";
2776 /* That's a best effort error message, don't check write errors */
2777 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2778 /* Nothing to do, Just to avoid the warning... */
2783 server
.stat_numconnections
++;
2786 /* ======================= Redis objects implementation ===================== */
2788 static robj
*createObject(int type
, void *ptr
) {
2791 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2792 if (listLength(server
.objfreelist
)) {
2793 listNode
*head
= listFirst(server
.objfreelist
);
2794 o
= listNodeValue(head
);
2795 listDelNode(server
.objfreelist
,head
);
2796 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2798 if (server
.vm_enabled
) {
2799 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2800 o
= zmalloc(sizeof(*o
));
2802 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2806 o
->encoding
= REDIS_ENCODING_RAW
;
2809 if (server
.vm_enabled
) {
2810 /* Note that this code may run in the context of an I/O thread
2811 * and accessing to server.unixtime in theory is an error
2812 * (no locks). But in practice this is safe, and even if we read
2813 * garbage Redis will not fail, as it's just a statistical info */
2814 o
->vm
.atime
= server
.unixtime
;
2815 o
->storage
= REDIS_VM_MEMORY
;
2820 static robj
*createStringObject(char *ptr
, size_t len
) {
2821 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2824 static robj
*createStringObjectFromLongLong(long long value
) {
2826 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2827 incrRefCount(shared
.integers
[value
]);
2828 o
= shared
.integers
[value
];
2830 o
= createObject(REDIS_STRING
, NULL
);
2831 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2832 o
->encoding
= REDIS_ENCODING_INT
;
2833 o
->ptr
= (void*)((long)value
);
2835 o
->ptr
= sdscatprintf(sdsempty(),"%lld",value
);
2841 static robj
*dupStringObject(robj
*o
) {
2842 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2843 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2846 static robj
*createListObject(void) {
2847 list
*l
= listCreate();
2849 listSetFreeMethod(l
,decrRefCount
);
2850 return createObject(REDIS_LIST
,l
);
2853 static robj
*createSetObject(void) {
2854 dict
*d
= dictCreate(&setDictType
,NULL
);
2855 return createObject(REDIS_SET
,d
);
2858 static robj
*createHashObject(void) {
2859 /* All the Hashes start as zipmaps. Will be automatically converted
2860 * into hash tables if there are enough elements or big elements
2862 unsigned char *zm
= zipmapNew();
2863 robj
*o
= createObject(REDIS_HASH
,zm
);
2864 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2868 static robj
*createZsetObject(void) {
2869 zset
*zs
= zmalloc(sizeof(*zs
));
2871 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2872 zs
->zsl
= zslCreate();
2873 return createObject(REDIS_ZSET
,zs
);
2876 static void freeStringObject(robj
*o
) {
2877 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2882 static void freeListObject(robj
*o
) {
2883 listRelease((list
*) o
->ptr
);
2886 static void freeSetObject(robj
*o
) {
2887 dictRelease((dict
*) o
->ptr
);
2890 static void freeZsetObject(robj
*o
) {
2893 dictRelease(zs
->dict
);
2898 static void freeHashObject(robj
*o
) {
2899 switch (o
->encoding
) {
2900 case REDIS_ENCODING_HT
:
2901 dictRelease((dict
*) o
->ptr
);
2903 case REDIS_ENCODING_ZIPMAP
:
2907 redisPanic("Unknown hash encoding type");
2912 static void incrRefCount(robj
*o
) {
2916 static void decrRefCount(void *obj
) {
2919 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
2920 /* Object is a key of a swapped out value, or in the process of being
2922 if (server
.vm_enabled
&&
2923 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2925 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2926 redisAssert(o
->type
== REDIS_STRING
);
2927 freeStringObject(o
);
2928 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2929 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2930 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2931 !listAddNodeHead(server
.objfreelist
,o
))
2933 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2934 server
.vm_stats_swapped_objects
--;
2937 /* Object is in memory, or in the process of being swapped out. */
2938 if (--(o
->refcount
) == 0) {
2939 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2940 vmCancelThreadedIOJob(obj
);
2942 case REDIS_STRING
: freeStringObject(o
); break;
2943 case REDIS_LIST
: freeListObject(o
); break;
2944 case REDIS_SET
: freeSetObject(o
); break;
2945 case REDIS_ZSET
: freeZsetObject(o
); break;
2946 case REDIS_HASH
: freeHashObject(o
); break;
2947 default: redisPanic("Unknown object type"); break;
2949 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2950 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2951 !listAddNodeHead(server
.objfreelist
,o
))
2953 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2957 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2958 dictEntry
*de
= dictFind(db
->dict
,key
);
2960 robj
*key
= dictGetEntryKey(de
);
2961 robj
*val
= dictGetEntryVal(de
);
2963 if (server
.vm_enabled
) {
2964 if (key
->storage
== REDIS_VM_MEMORY
||
2965 key
->storage
== REDIS_VM_SWAPPING
)
2967 /* If we were swapping the object out, stop it, this key
2969 if (key
->storage
== REDIS_VM_SWAPPING
)
2970 vmCancelThreadedIOJob(key
);
2971 /* Update the access time of the key for the aging algorithm. */
2972 key
->vm
.atime
= server
.unixtime
;
2974 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2976 /* Our value was swapped on disk. Bring it at home. */
2977 redisAssert(val
== NULL
);
2978 val
= vmLoadObject(key
);
2979 dictGetEntryVal(de
) = val
;
2981 /* Clients blocked by the VM subsystem may be waiting for
2983 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2992 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2993 expireIfNeeded(db
,key
);
2994 return lookupKey(db
,key
);
2997 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2998 deleteIfVolatile(db
,key
);
2999 return lookupKey(db
,key
);
3002 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3003 robj
*o
= lookupKeyRead(c
->db
, key
);
3004 if (!o
) addReply(c
,reply
);
3008 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3009 robj
*o
= lookupKeyWrite(c
->db
, key
);
3010 if (!o
) addReply(c
,reply
);
3014 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3015 if (o
->type
!= type
) {
3016 addReply(c
,shared
.wrongtypeerr
);
3022 static int deleteKey(redisDb
*db
, robj
*key
) {
3025 /* We need to protect key from destruction: after the first dictDelete()
3026 * it may happen that 'key' is no longer valid if we don't increment
3027 * it's count. This may happen when we get the object reference directly
3028 * from the hash table with dictRandomKey() or dict iterators */
3030 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3031 retval
= dictDelete(db
->dict
,key
);
3034 return retval
== DICT_OK
;
3037 /* Check if the nul-terminated string 's' can be represented by a long
3038 * (that is, is a number that fits into long without any other space or
3039 * character before or after the digits).
3041 * If so, the function returns REDIS_OK and *longval is set to the value
3042 * of the number. Otherwise REDIS_ERR is returned */
3043 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3044 char buf
[32], *endptr
;
3048 value
= strtol(s
, &endptr
, 10);
3049 if (endptr
[0] != '\0') return REDIS_ERR
;
3050 slen
= snprintf(buf
,32,"%ld",value
);
3052 /* If the number converted back into a string is not identical
3053 * then it's not possible to encode the string as integer */
3054 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3055 if (longval
) *longval
= value
;
3059 /* Try to encode a string object in order to save space */
3060 static robj
*tryObjectEncoding(robj
*o
) {
3064 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3065 return o
; /* Already encoded */
3067 /* It's not safe to encode shared objects: shared objects can be shared
3068 * everywhere in the "object space" of Redis. Encoded objects can only
3069 * appear as "values" (and not, for instance, as keys) */
3070 if (o
->refcount
> 1) return o
;
3072 /* Currently we try to encode only strings */
3073 redisAssert(o
->type
== REDIS_STRING
);
3075 /* Check if we can represent this string as a long integer */
3076 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3078 /* Ok, this object can be encoded */
3079 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3081 incrRefCount(shared
.integers
[value
]);
3082 return shared
.integers
[value
];
3084 o
->encoding
= REDIS_ENCODING_INT
;
3086 o
->ptr
= (void*) value
;
3091 /* Get a decoded version of an encoded object (returned as a new object).
3092 * If the object is already raw-encoded just increment the ref count. */
3093 static robj
*getDecodedObject(robj
*o
) {
3096 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3100 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3103 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3104 dec
= createStringObject(buf
,strlen(buf
));
3107 redisPanic("Unknown encoding type");
3111 /* Compare two string objects via strcmp() or alike.
3112 * Note that the objects may be integer-encoded. In such a case we
3113 * use snprintf() to get a string representation of the numbers on the stack
3114 * and compare the strings, it's much faster than calling getDecodedObject().
3116 * Important note: if objects are not integer encoded, but binary-safe strings,
3117 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3119 static int compareStringObjects(robj
*a
, robj
*b
) {
3120 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3121 char bufa
[128], bufb
[128], *astr
, *bstr
;
3124 if (a
== b
) return 0;
3125 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3126 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3132 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3133 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3139 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3142 static size_t stringObjectLen(robj
*o
) {
3143 redisAssert(o
->type
== REDIS_STRING
);
3144 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3145 return sdslen(o
->ptr
);
3149 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3153 static int getDoubleFromObject(robj
*o
, double *target
) {
3160 redisAssert(o
->type
== REDIS_STRING
);
3161 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3162 value
= strtod(o
->ptr
, &eptr
);
3163 if (eptr
[0] != '\0') return REDIS_ERR
;
3164 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3165 value
= (long)o
->ptr
;
3167 redisAssert(1 != 1);
3175 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3177 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3179 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3181 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3190 static int getLongLongFromObject(robj
*o
, long long *target
) {
3197 redisAssert(o
->type
== REDIS_STRING
);
3198 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3199 value
= strtoll(o
->ptr
, &eptr
, 10);
3200 if (eptr
[0] != '\0') return REDIS_ERR
;
3201 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3202 value
= (long)o
->ptr
;
3204 redisAssert(1 != 1);
3212 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3214 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3216 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3218 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3227 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3230 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3231 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3233 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3235 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3244 /*============================ RDB saving/loading =========================== */
3246 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3247 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3251 static int rdbSaveTime(FILE *fp
, time_t t
) {
3252 int32_t t32
= (int32_t) t
;
3253 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3257 /* check rdbLoadLen() comments for more info */
3258 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3259 unsigned char buf
[2];
3262 /* Save a 6 bit len */
3263 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3264 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3265 } else if (len
< (1<<14)) {
3266 /* Save a 14 bit len */
3267 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3269 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3271 /* Save a 32 bit len */
3272 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3273 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3275 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3280 /* String objects in the form "2391" "-100" without any space and with a
3281 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3282 * encoded as integers to save space */
3283 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3285 char *endptr
, buf
[32];
3287 /* Check if it's possible to encode this value as a number */
3288 value
= strtoll(s
, &endptr
, 10);
3289 if (endptr
[0] != '\0') return 0;
3290 snprintf(buf
,32,"%lld",value
);
3292 /* If the number converted back into a string is not identical
3293 * then it's not possible to encode the string as integer */
3294 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3296 /* Finally check if it fits in our ranges */
3297 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3298 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3299 enc
[1] = value
&0xFF;
3301 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3302 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3303 enc
[1] = value
&0xFF;
3304 enc
[2] = (value
>>8)&0xFF;
3306 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3307 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3308 enc
[1] = value
&0xFF;
3309 enc
[2] = (value
>>8)&0xFF;
3310 enc
[3] = (value
>>16)&0xFF;
3311 enc
[4] = (value
>>24)&0xFF;
3318 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3319 size_t comprlen
, outlen
;
3323 /* We require at least four bytes compression for this to be worth it */
3324 if (len
<= 4) return 0;
3326 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3327 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3328 if (comprlen
== 0) {
3332 /* Data compressed! Let's save it on disk */
3333 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3334 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3335 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3336 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3337 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3346 /* Save a string objet as [len][data] on disk. If the object is a string
3347 * representation of an integer value we try to safe it in a special form */
3348 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3351 /* Try integer encoding */
3353 unsigned char buf
[5];
3354 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3355 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3360 /* Try LZF compression - under 20 bytes it's unable to compress even
3361 * aaaaaaaaaaaaaaaaaa so skip it */
3362 if (server
.rdbcompression
&& len
> 20) {
3365 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3366 if (retval
== -1) return -1;
3367 if (retval
> 0) return 0;
3368 /* retval == 0 means data can't be compressed, save the old way */
3371 /* Store verbatim */
3372 if (rdbSaveLen(fp
,len
) == -1) return -1;
3373 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3377 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3378 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3381 /* Avoid incr/decr ref count business when possible.
3382 * This plays well with copy-on-write given that we are probably
3383 * in a child process (BGSAVE). Also this makes sure key objects
3384 * of swapped objects are not incRefCount-ed (an assert does not allow
3385 * this in order to avoid bugs) */
3386 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3387 obj
= getDecodedObject(obj
);
3388 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3391 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3396 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3397 * 8 bit integer specifing the length of the representation.
3398 * This 8 bit integer has special values in order to specify the following
3404 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3405 unsigned char buf
[128];
3411 } else if (!isfinite(val
)) {
3413 buf
[0] = (val
< 0) ? 255 : 254;
3415 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3416 buf
[0] = strlen((char*)buf
+1);
3419 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3423 /* Save a Redis object. */
3424 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3425 if (o
->type
== REDIS_STRING
) {
3426 /* Save a string value */
3427 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3428 } else if (o
->type
== REDIS_LIST
) {
3429 /* Save a list value */
3430 list
*list
= o
->ptr
;
3434 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3435 listRewind(list
,&li
);
3436 while((ln
= listNext(&li
))) {
3437 robj
*eleobj
= listNodeValue(ln
);
3439 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3441 } else if (o
->type
== REDIS_SET
) {
3442 /* Save a set value */
3444 dictIterator
*di
= dictGetIterator(set
);
3447 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3448 while((de
= dictNext(di
)) != NULL
) {
3449 robj
*eleobj
= dictGetEntryKey(de
);
3451 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3453 dictReleaseIterator(di
);
3454 } else if (o
->type
== REDIS_ZSET
) {
3455 /* Save a set value */
3457 dictIterator
*di
= dictGetIterator(zs
->dict
);
3460 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3461 while((de
= dictNext(di
)) != NULL
) {
3462 robj
*eleobj
= dictGetEntryKey(de
);
3463 double *score
= dictGetEntryVal(de
);
3465 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3466 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3468 dictReleaseIterator(di
);
3469 } else if (o
->type
== REDIS_HASH
) {
3470 /* Save a hash value */
3471 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3472 unsigned char *p
= zipmapRewind(o
->ptr
);
3473 unsigned int count
= zipmapLen(o
->ptr
);
3474 unsigned char *key
, *val
;
3475 unsigned int klen
, vlen
;
3477 if (rdbSaveLen(fp
,count
) == -1) return -1;
3478 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3479 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3480 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3483 dictIterator
*di
= dictGetIterator(o
->ptr
);
3486 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3487 while((de
= dictNext(di
)) != NULL
) {
3488 robj
*key
= dictGetEntryKey(de
);
3489 robj
*val
= dictGetEntryVal(de
);
3491 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3492 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3494 dictReleaseIterator(di
);
3497 redisPanic("Unknown object type");
3502 /* Return the length the object will have on disk if saved with
3503 * the rdbSaveObject() function. Currently we use a trick to get
3504 * this length with very little changes to the code. In the future
3505 * we could switch to a faster solution. */
3506 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3507 if (fp
== NULL
) fp
= server
.devnull
;
3509 assert(rdbSaveObject(fp
,o
) != 1);
3513 /* Return the number of pages required to save this object in the swap file */
3514 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3515 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3517 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3520 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3521 static int rdbSave(char *filename
) {
3522 dictIterator
*di
= NULL
;
3527 time_t now
= time(NULL
);
3529 /* Wait for I/O therads to terminate, just in case this is a
3530 * foreground-saving, to avoid seeking the swap file descriptor at the
3532 if (server
.vm_enabled
)
3533 waitEmptyIOJobsQueue();
3535 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3536 fp
= fopen(tmpfile
,"w");
3538 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3541 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3542 for (j
= 0; j
< server
.dbnum
; j
++) {
3543 redisDb
*db
= server
.db
+j
;
3545 if (dictSize(d
) == 0) continue;
3546 di
= dictGetIterator(d
);
3552 /* Write the SELECT DB opcode */
3553 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3554 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3556 /* Iterate this DB writing every entry */
3557 while((de
= dictNext(di
)) != NULL
) {
3558 robj
*key
= dictGetEntryKey(de
);
3559 robj
*o
= dictGetEntryVal(de
);
3560 time_t expiretime
= getExpire(db
,key
);
3562 /* Save the expire time */
3563 if (expiretime
!= -1) {
3564 /* If this key is already expired skip it */
3565 if (expiretime
< now
) continue;
3566 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3567 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3569 /* Save the key and associated value. This requires special
3570 * handling if the value is swapped out. */
3571 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3572 key
->storage
== REDIS_VM_SWAPPING
) {
3573 /* Save type, key, value */
3574 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3575 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3576 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3578 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3580 /* Get a preview of the object in memory */
3581 po
= vmPreviewObject(key
);
3582 /* Save type, key, value */
3583 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3584 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3585 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3586 /* Remove the loaded object from memory */
3590 dictReleaseIterator(di
);
3593 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3595 /* Make sure data will not remain on the OS's output buffers */
3600 /* Use RENAME to make sure the DB file is changed atomically only
3601 * if the generate DB file is ok. */
3602 if (rename(tmpfile
,filename
) == -1) {
3603 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3607 redisLog(REDIS_NOTICE
,"DB saved on disk");
3609 server
.lastsave
= time(NULL
);
3615 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3616 if (di
) dictReleaseIterator(di
);
3620 static int rdbSaveBackground(char *filename
) {
3623 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3624 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3625 if ((childpid
= fork()) == 0) {
3627 if (server
.vm_enabled
) vmReopenSwapFile();
3629 if (rdbSave(filename
) == REDIS_OK
) {
3636 if (childpid
== -1) {
3637 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3641 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3642 server
.bgsavechildpid
= childpid
;
3643 updateDictResizePolicy();
3646 return REDIS_OK
; /* unreached */
3649 static void rdbRemoveTempFile(pid_t childpid
) {
3652 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3656 static int rdbLoadType(FILE *fp
) {
3658 if (fread(&type
,1,1,fp
) == 0) return -1;
3662 static time_t rdbLoadTime(FILE *fp
) {
3664 if (fread(&t32
,4,1,fp
) == 0) return -1;
3665 return (time_t) t32
;
3668 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3669 * of this file for a description of how this are stored on disk.
3671 * isencoded is set to 1 if the readed length is not actually a length but
3672 * an "encoding type", check the above comments for more info */
3673 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3674 unsigned char buf
[2];
3678 if (isencoded
) *isencoded
= 0;
3679 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3680 type
= (buf
[0]&0xC0)>>6;
3681 if (type
== REDIS_RDB_6BITLEN
) {
3682 /* Read a 6 bit len */
3684 } else if (type
== REDIS_RDB_ENCVAL
) {
3685 /* Read a 6 bit len encoding type */
3686 if (isencoded
) *isencoded
= 1;
3688 } else if (type
== REDIS_RDB_14BITLEN
) {
3689 /* Read a 14 bit len */
3690 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3691 return ((buf
[0]&0x3F)<<8)|buf
[1];
3693 /* Read a 32 bit len */
3694 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3699 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3700 unsigned char enc
[4];
3703 if (enctype
== REDIS_RDB_ENC_INT8
) {
3704 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3705 val
= (signed char)enc
[0];
3706 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3708 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3709 v
= enc
[0]|(enc
[1]<<8);
3711 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3713 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3714 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3717 val
= 0; /* anti-warning */
3718 redisPanic("Unknown RDB integer encoding type");
3720 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3723 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3724 unsigned int len
, clen
;
3725 unsigned char *c
= NULL
;
3728 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3729 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3730 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3731 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3732 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3733 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3735 return createObject(REDIS_STRING
,val
);
3742 static robj
*rdbLoadStringObject(FILE*fp
) {
3747 len
= rdbLoadLen(fp
,&isencoded
);
3750 case REDIS_RDB_ENC_INT8
:
3751 case REDIS_RDB_ENC_INT16
:
3752 case REDIS_RDB_ENC_INT32
:
3753 return rdbLoadIntegerObject(fp
,len
);
3754 case REDIS_RDB_ENC_LZF
:
3755 return rdbLoadLzfStringObject(fp
);
3757 redisPanic("Unknown RDB encoding type");
3761 if (len
== REDIS_RDB_LENERR
) return NULL
;
3762 val
= sdsnewlen(NULL
,len
);
3763 if (len
&& fread(val
,len
,1,fp
) == 0) {
3767 return createObject(REDIS_STRING
,val
);
3770 /* For information about double serialization check rdbSaveDoubleValue() */
3771 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3775 if (fread(&len
,1,1,fp
) == 0) return -1;
3777 case 255: *val
= R_NegInf
; return 0;
3778 case 254: *val
= R_PosInf
; return 0;
3779 case 253: *val
= R_Nan
; return 0;
3781 if (fread(buf
,len
,1,fp
) == 0) return -1;
3783 sscanf(buf
, "%lg", val
);
3788 /* Load a Redis object of the specified type from the specified file.
3789 * On success a newly allocated object is returned, otherwise NULL. */
3790 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3793 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3794 if (type
== REDIS_STRING
) {
3795 /* Read string value */
3796 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3797 o
= tryObjectEncoding(o
);
3798 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3799 /* Read list/set value */
3802 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3803 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3804 /* It's faster to expand the dict to the right size asap in order
3805 * to avoid rehashing */
3806 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3807 dictExpand(o
->ptr
,listlen
);
3808 /* Load every single element of the list/set */
3812 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3813 ele
= tryObjectEncoding(ele
);
3814 if (type
== REDIS_LIST
) {
3815 listAddNodeTail((list
*)o
->ptr
,ele
);
3817 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3820 } else if (type
== REDIS_ZSET
) {
3821 /* Read list/set value */
3825 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3826 o
= createZsetObject();
3828 /* Load every single element of the list/set */
3831 double *score
= zmalloc(sizeof(double));
3833 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3834 ele
= tryObjectEncoding(ele
);
3835 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3836 dictAdd(zs
->dict
,ele
,score
);
3837 zslInsert(zs
->zsl
,*score
,ele
);
3838 incrRefCount(ele
); /* added to skiplist */
3840 } else if (type
== REDIS_HASH
) {
3843 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3844 o
= createHashObject();
3845 /* Too many entries? Use an hash table. */
3846 if (hashlen
> server
.hash_max_zipmap_entries
)
3847 convertToRealHash(o
);
3848 /* Load every key/value, then set it into the zipmap or hash
3849 * table, as needed. */
3853 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3854 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3855 /* If we are using a zipmap and there are too big values
3856 * the object is converted to real hash table encoding. */
3857 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3858 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3859 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3861 convertToRealHash(o
);
3864 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3865 unsigned char *zm
= o
->ptr
;
3867 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3868 val
->ptr
,sdslen(val
->ptr
),NULL
);
3873 key
= tryObjectEncoding(key
);
3874 val
= tryObjectEncoding(val
);
3875 dictAdd((dict
*)o
->ptr
,key
,val
);
3879 redisPanic("Unknown object type");
3884 static int rdbLoad(char *filename
) {
3886 robj
*keyobj
= NULL
;
3888 int type
, retval
, rdbver
;
3889 dict
*d
= server
.db
[0].dict
;
3890 redisDb
*db
= server
.db
+0;
3892 time_t expiretime
= -1, now
= time(NULL
);
3893 long long loadedkeys
= 0;
3895 fp
= fopen(filename
,"r");
3896 if (!fp
) return REDIS_ERR
;
3897 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3899 if (memcmp(buf
,"REDIS",5) != 0) {
3901 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3904 rdbver
= atoi(buf
+5);
3907 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3914 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3915 if (type
== REDIS_EXPIRETIME
) {
3916 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3917 /* We read the time so we need to read the object type again */
3918 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3920 if (type
== REDIS_EOF
) break;
3921 /* Handle SELECT DB opcode as a special case */
3922 if (type
== REDIS_SELECTDB
) {
3923 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3925 if (dbid
>= (unsigned)server
.dbnum
) {
3926 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3929 db
= server
.db
+dbid
;
3934 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3936 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3937 /* Add the new object in the hash table */
3938 retval
= dictAdd(d
,keyobj
,o
);
3939 if (retval
== DICT_ERR
) {
3940 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3943 /* Set the expire time if needed */
3944 if (expiretime
!= -1) {
3945 setExpire(db
,keyobj
,expiretime
);
3946 /* Delete this key if already expired */
3947 if (expiretime
< now
) deleteKey(db
,keyobj
);
3951 /* Handle swapping while loading big datasets when VM is on */
3953 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3954 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3955 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3962 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3963 if (keyobj
) decrRefCount(keyobj
);
3964 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3966 return REDIS_ERR
; /* Just to avoid warning */
3969 /*================================== Commands =============================== */
3971 static void authCommand(redisClient
*c
) {
3972 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3973 c
->authenticated
= 1;
3974 addReply(c
,shared
.ok
);
3976 c
->authenticated
= 0;
3977 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3981 static void pingCommand(redisClient
*c
) {
3982 addReply(c
,shared
.pong
);
3985 static void echoCommand(redisClient
*c
) {
3986 addReplyBulk(c
,c
->argv
[1]);
3989 /*=================================== Strings =============================== */
3991 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
3996 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
3999 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4004 if (nx
) deleteIfVolatile(c
->db
,key
);
4005 retval
= dictAdd(c
->db
->dict
,key
,val
);
4006 if (retval
== DICT_ERR
) {
4008 /* If the key is about a swapped value, we want a new key object
4009 * to overwrite the old. So we delete the old key in the database.
4010 * This will also make sure that swap pages about the old object
4011 * will be marked as free. */
4012 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4014 dictReplace(c
->db
->dict
,key
,val
);
4017 addReply(c
,shared
.czero
);
4025 removeExpire(c
->db
,key
);
4026 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4027 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4030 static void setCommand(redisClient
*c
) {
4031 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4034 static void setnxCommand(redisClient
*c
) {
4035 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4038 static void setexCommand(redisClient
*c
) {
4039 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4042 static int getGenericCommand(redisClient
*c
) {
4045 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4048 if (o
->type
!= REDIS_STRING
) {
4049 addReply(c
,shared
.wrongtypeerr
);
4057 static void getCommand(redisClient
*c
) {
4058 getGenericCommand(c
);
4061 static void getsetCommand(redisClient
*c
) {
4062 if (getGenericCommand(c
) == REDIS_ERR
) return;
4063 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4064 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4066 incrRefCount(c
->argv
[1]);
4068 incrRefCount(c
->argv
[2]);
4070 removeExpire(c
->db
,c
->argv
[1]);
4073 static void mgetCommand(redisClient
*c
) {
4076 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4077 for (j
= 1; j
< c
->argc
; j
++) {
4078 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4080 addReply(c
,shared
.nullbulk
);
4082 if (o
->type
!= REDIS_STRING
) {
4083 addReply(c
,shared
.nullbulk
);
4091 static void msetGenericCommand(redisClient
*c
, int nx
) {
4092 int j
, busykeys
= 0;
4094 if ((c
->argc
% 2) == 0) {
4095 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4098 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4099 * set nothing at all if at least one already key exists. */
4101 for (j
= 1; j
< c
->argc
; j
+= 2) {
4102 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4108 addReply(c
, shared
.czero
);
4112 for (j
= 1; j
< c
->argc
; j
+= 2) {
4115 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4116 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4117 if (retval
== DICT_ERR
) {
4118 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4119 incrRefCount(c
->argv
[j
+1]);
4121 incrRefCount(c
->argv
[j
]);
4122 incrRefCount(c
->argv
[j
+1]);
4124 removeExpire(c
->db
,c
->argv
[j
]);
4126 server
.dirty
+= (c
->argc
-1)/2;
4127 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4130 static void msetCommand(redisClient
*c
) {
4131 msetGenericCommand(c
,0);
4134 static void msetnxCommand(redisClient
*c
) {
4135 msetGenericCommand(c
,1);
4138 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4143 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4145 if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return;
4148 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4149 o
= tryObjectEncoding(o
);
4150 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4151 if (retval
== DICT_ERR
) {
4152 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4153 removeExpire(c
->db
,c
->argv
[1]);
4155 incrRefCount(c
->argv
[1]);
4158 addReply(c
,shared
.colon
);
4160 addReply(c
,shared
.crlf
);
4163 static void incrCommand(redisClient
*c
) {
4164 incrDecrCommand(c
,1);
4167 static void decrCommand(redisClient
*c
) {
4168 incrDecrCommand(c
,-1);
4171 static void incrbyCommand(redisClient
*c
) {
4174 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4175 incrDecrCommand(c
,incr
);
4178 static void decrbyCommand(redisClient
*c
) {
4181 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4182 incrDecrCommand(c
,-incr
);
4185 static void appendCommand(redisClient
*c
) {
4190 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4192 /* Create the key */
4193 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4194 incrRefCount(c
->argv
[1]);
4195 incrRefCount(c
->argv
[2]);
4196 totlen
= stringObjectLen(c
->argv
[2]);
4200 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4203 o
= dictGetEntryVal(de
);
4204 if (o
->type
!= REDIS_STRING
) {
4205 addReply(c
,shared
.wrongtypeerr
);
4208 /* If the object is specially encoded or shared we have to make
4210 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4211 robj
*decoded
= getDecodedObject(o
);
4213 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4214 decrRefCount(decoded
);
4215 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4218 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4219 o
->ptr
= sdscatlen(o
->ptr
,
4220 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4222 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4223 (unsigned long) c
->argv
[2]->ptr
);
4225 totlen
= sdslen(o
->ptr
);
4228 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4231 static void substrCommand(redisClient
*c
) {
4233 long start
= atoi(c
->argv
[2]->ptr
);
4234 long end
= atoi(c
->argv
[3]->ptr
);
4235 size_t rangelen
, strlen
;
4238 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4239 checkType(c
,o
,REDIS_STRING
)) return;
4241 o
= getDecodedObject(o
);
4242 strlen
= sdslen(o
->ptr
);
4244 /* convert negative indexes */
4245 if (start
< 0) start
= strlen
+start
;
4246 if (end
< 0) end
= strlen
+end
;
4247 if (start
< 0) start
= 0;
4248 if (end
< 0) end
= 0;
4250 /* indexes sanity checks */
4251 if (start
> end
|| (size_t)start
>= strlen
) {
4252 /* Out of range start or start > end result in null reply */
4253 addReply(c
,shared
.nullbulk
);
4257 if ((size_t)end
>= strlen
) end
= strlen
-1;
4258 rangelen
= (end
-start
)+1;
4260 /* Return the result */
4261 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4262 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4263 addReplySds(c
,range
);
4264 addReply(c
,shared
.crlf
);
4268 /* ========================= Type agnostic commands ========================= */
4270 static void delCommand(redisClient
*c
) {
4273 for (j
= 1; j
< c
->argc
; j
++) {
4274 if (deleteKey(c
->db
,c
->argv
[j
])) {
4279 addReplyLong(c
,deleted
);
4282 static void existsCommand(redisClient
*c
) {
4283 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4286 static void selectCommand(redisClient
*c
) {
4287 int id
= atoi(c
->argv
[1]->ptr
);
4289 if (selectDb(c
,id
) == REDIS_ERR
) {
4290 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4292 addReply(c
,shared
.ok
);
4296 static void randomkeyCommand(redisClient
*c
) {
4301 de
= dictGetRandomKey(c
->db
->dict
);
4302 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4306 addReply(c
,shared
.nullbulk
);
4310 key
= dictGetEntryKey(de
);
4311 if (server
.vm_enabled
) {
4312 key
= dupStringObject(key
);
4313 addReplyBulk(c
,key
);
4316 addReplyBulk(c
,key
);
4320 static void keysCommand(redisClient
*c
) {
4323 sds pattern
= c
->argv
[1]->ptr
;
4324 int plen
= sdslen(pattern
);
4325 unsigned long numkeys
= 0;
4326 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4328 di
= dictGetIterator(c
->db
->dict
);
4330 decrRefCount(lenobj
);
4331 while((de
= dictNext(di
)) != NULL
) {
4332 robj
*keyobj
= dictGetEntryKey(de
);
4334 sds key
= keyobj
->ptr
;
4335 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4336 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4337 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4338 addReplyBulk(c
,keyobj
);
4343 dictReleaseIterator(di
);
4344 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4347 static void dbsizeCommand(redisClient
*c
) {
4349 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4352 static void lastsaveCommand(redisClient
*c
) {
4354 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4357 static void typeCommand(redisClient
*c
) {
4361 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4366 case REDIS_STRING
: type
= "+string"; break;
4367 case REDIS_LIST
: type
= "+list"; break;
4368 case REDIS_SET
: type
= "+set"; break;
4369 case REDIS_ZSET
: type
= "+zset"; break;
4370 case REDIS_HASH
: type
= "+hash"; break;
4371 default: type
= "+unknown"; break;
4374 addReplySds(c
,sdsnew(type
));
4375 addReply(c
,shared
.crlf
);
4378 static void saveCommand(redisClient
*c
) {
4379 if (server
.bgsavechildpid
!= -1) {
4380 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4383 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4384 addReply(c
,shared
.ok
);
4386 addReply(c
,shared
.err
);
4390 static void bgsaveCommand(redisClient
*c
) {
4391 if (server
.bgsavechildpid
!= -1) {
4392 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4395 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4396 char *status
= "+Background saving started\r\n";
4397 addReplySds(c
,sdsnew(status
));
4399 addReply(c
,shared
.err
);
4403 static void shutdownCommand(redisClient
*c
) {
4404 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4405 /* Kill the saving child if there is a background saving in progress.
4406 We want to avoid race conditions, for instance our saving child may
4407 overwrite the synchronous saving did by SHUTDOWN. */
4408 if (server
.bgsavechildpid
!= -1) {
4409 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4410 kill(server
.bgsavechildpid
,SIGKILL
);
4411 rdbRemoveTempFile(server
.bgsavechildpid
);
4413 if (server
.appendonly
) {
4414 /* Append only file: fsync() the AOF and exit */
4415 fsync(server
.appendfd
);
4416 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4419 /* Snapshotting. Perform a SYNC SAVE and exit */
4420 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4421 if (server
.daemonize
)
4422 unlink(server
.pidfile
);
4423 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4424 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4425 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4428 /* Ooops.. error saving! The best we can do is to continue
4429 * operating. Note that if there was a background saving process,
4430 * in the next cron() Redis will be notified that the background
4431 * saving aborted, handling special stuff like slaves pending for
4432 * synchronization... */
4433 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4435 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4440 static void renameGenericCommand(redisClient
*c
, int nx
) {
4443 /* To use the same key as src and dst is probably an error */
4444 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4445 addReply(c
,shared
.sameobjecterr
);
4449 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4453 deleteIfVolatile(c
->db
,c
->argv
[2]);
4454 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4457 addReply(c
,shared
.czero
);
4460 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4462 incrRefCount(c
->argv
[2]);
4464 deleteKey(c
->db
,c
->argv
[1]);
4466 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4469 static void renameCommand(redisClient
*c
) {
4470 renameGenericCommand(c
,0);
4473 static void renamenxCommand(redisClient
*c
) {
4474 renameGenericCommand(c
,1);
4477 static void moveCommand(redisClient
*c
) {
4482 /* Obtain source and target DB pointers */
4485 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4486 addReply(c
,shared
.outofrangeerr
);
4490 selectDb(c
,srcid
); /* Back to the source DB */
4492 /* If the user is moving using as target the same
4493 * DB as the source DB it is probably an error. */
4495 addReply(c
,shared
.sameobjecterr
);
4499 /* Check if the element exists and get a reference */
4500 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4502 addReply(c
,shared
.czero
);
4506 /* Try to add the element to the target DB */
4507 deleteIfVolatile(dst
,c
->argv
[1]);
4508 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4509 addReply(c
,shared
.czero
);
4512 incrRefCount(c
->argv
[1]);
4515 /* OK! key moved, free the entry in the source DB */
4516 deleteKey(src
,c
->argv
[1]);
4518 addReply(c
,shared
.cone
);
4521 /* =================================== Lists ================================ */
4522 static void pushGenericCommand(redisClient
*c
, int where
) {
4526 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4528 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4529 addReply(c
,shared
.cone
);
4532 lobj
= createListObject();
4534 if (where
== REDIS_HEAD
) {
4535 listAddNodeHead(list
,c
->argv
[2]);
4537 listAddNodeTail(list
,c
->argv
[2]);
4539 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4540 incrRefCount(c
->argv
[1]);
4541 incrRefCount(c
->argv
[2]);
4543 if (lobj
->type
!= REDIS_LIST
) {
4544 addReply(c
,shared
.wrongtypeerr
);
4547 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4548 addReply(c
,shared
.cone
);
4552 if (where
== REDIS_HEAD
) {
4553 listAddNodeHead(list
,c
->argv
[2]);
4555 listAddNodeTail(list
,c
->argv
[2]);
4557 incrRefCount(c
->argv
[2]);
4560 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4563 static void lpushCommand(redisClient
*c
) {
4564 pushGenericCommand(c
,REDIS_HEAD
);
4567 static void rpushCommand(redisClient
*c
) {
4568 pushGenericCommand(c
,REDIS_TAIL
);
4571 static void llenCommand(redisClient
*c
) {
4575 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4576 checkType(c
,o
,REDIS_LIST
)) return;
4579 addReplyUlong(c
,listLength(l
));
4582 static void lindexCommand(redisClient
*c
) {
4584 int index
= atoi(c
->argv
[2]->ptr
);
4588 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4589 checkType(c
,o
,REDIS_LIST
)) return;
4592 ln
= listIndex(list
, index
);
4594 addReply(c
,shared
.nullbulk
);
4596 robj
*ele
= listNodeValue(ln
);
4597 addReplyBulk(c
,ele
);
4601 static void lsetCommand(redisClient
*c
) {
4603 int index
= atoi(c
->argv
[2]->ptr
);
4607 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4608 checkType(c
,o
,REDIS_LIST
)) return;
4611 ln
= listIndex(list
, index
);
4613 addReply(c
,shared
.outofrangeerr
);
4615 robj
*ele
= listNodeValue(ln
);
4618 listNodeValue(ln
) = c
->argv
[3];
4619 incrRefCount(c
->argv
[3]);
4620 addReply(c
,shared
.ok
);
4625 static void popGenericCommand(redisClient
*c
, int where
) {
4630 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4631 checkType(c
,o
,REDIS_LIST
)) return;
4634 if (where
== REDIS_HEAD
)
4635 ln
= listFirst(list
);
4637 ln
= listLast(list
);
4640 addReply(c
,shared
.nullbulk
);
4642 robj
*ele
= listNodeValue(ln
);
4643 addReplyBulk(c
,ele
);
4644 listDelNode(list
,ln
);
4645 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4650 static void lpopCommand(redisClient
*c
) {
4651 popGenericCommand(c
,REDIS_HEAD
);
4654 static void rpopCommand(redisClient
*c
) {
4655 popGenericCommand(c
,REDIS_TAIL
);
4658 static void lrangeCommand(redisClient
*c
) {
4660 int start
= atoi(c
->argv
[2]->ptr
);
4661 int end
= atoi(c
->argv
[3]->ptr
);
4668 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4669 || checkType(c
,o
,REDIS_LIST
)) return;
4671 llen
= listLength(list
);
4673 /* convert negative indexes */
4674 if (start
< 0) start
= llen
+start
;
4675 if (end
< 0) end
= llen
+end
;
4676 if (start
< 0) start
= 0;
4677 if (end
< 0) end
= 0;
4679 /* indexes sanity checks */
4680 if (start
> end
|| start
>= llen
) {
4681 /* Out of range start or start > end result in empty list */
4682 addReply(c
,shared
.emptymultibulk
);
4685 if (end
>= llen
) end
= llen
-1;
4686 rangelen
= (end
-start
)+1;
4688 /* Return the result in form of a multi-bulk reply */
4689 ln
= listIndex(list
, start
);
4690 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4691 for (j
= 0; j
< rangelen
; j
++) {
4692 ele
= listNodeValue(ln
);
4693 addReplyBulk(c
,ele
);
4698 static void ltrimCommand(redisClient
*c
) {
4700 int start
= atoi(c
->argv
[2]->ptr
);
4701 int end
= atoi(c
->argv
[3]->ptr
);
4703 int j
, ltrim
, rtrim
;
4707 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4708 checkType(c
,o
,REDIS_LIST
)) return;
4710 llen
= listLength(list
);
4712 /* convert negative indexes */
4713 if (start
< 0) start
= llen
+start
;
4714 if (end
< 0) end
= llen
+end
;
4715 if (start
< 0) start
= 0;
4716 if (end
< 0) end
= 0;
4718 /* indexes sanity checks */
4719 if (start
> end
|| start
>= llen
) {
4720 /* Out of range start or start > end result in empty list */
4724 if (end
>= llen
) end
= llen
-1;
4729 /* Remove list elements to perform the trim */
4730 for (j
= 0; j
< ltrim
; j
++) {
4731 ln
= listFirst(list
);
4732 listDelNode(list
,ln
);
4734 for (j
= 0; j
< rtrim
; j
++) {
4735 ln
= listLast(list
);
4736 listDelNode(list
,ln
);
4738 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4740 addReply(c
,shared
.ok
);
4743 static void lremCommand(redisClient
*c
) {
4746 listNode
*ln
, *next
;
4747 int toremove
= atoi(c
->argv
[2]->ptr
);
4751 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4752 checkType(c
,o
,REDIS_LIST
)) return;
4756 toremove
= -toremove
;
4759 ln
= fromtail
? list
->tail
: list
->head
;
4761 robj
*ele
= listNodeValue(ln
);
4763 next
= fromtail
? ln
->prev
: ln
->next
;
4764 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4765 listDelNode(list
,ln
);
4768 if (toremove
&& removed
== toremove
) break;
4772 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4773 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4776 /* This is the semantic of this command:
4777 * RPOPLPUSH srclist dstlist:
4778 * IF LLEN(srclist) > 0
4779 * element = RPOP srclist
4780 * LPUSH dstlist element
4787 * The idea is to be able to get an element from a list in a reliable way
4788 * since the element is not just returned but pushed against another list
4789 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4791 static void rpoplpushcommand(redisClient
*c
) {
4796 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4797 checkType(c
,sobj
,REDIS_LIST
)) return;
4798 srclist
= sobj
->ptr
;
4799 ln
= listLast(srclist
);
4802 addReply(c
,shared
.nullbulk
);
4804 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4805 robj
*ele
= listNodeValue(ln
);
4808 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4809 addReply(c
,shared
.wrongtypeerr
);
4813 /* Add the element to the target list (unless it's directly
4814 * passed to some BLPOP-ing client */
4815 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4817 /* Create the list if the key does not exist */
4818 dobj
= createListObject();
4819 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4820 incrRefCount(c
->argv
[2]);
4822 dstlist
= dobj
->ptr
;
4823 listAddNodeHead(dstlist
,ele
);
4827 /* Send the element to the client as reply as well */
4828 addReplyBulk(c
,ele
);
4830 /* Finally remove the element from the source list */
4831 listDelNode(srclist
,ln
);
4832 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4837 /* ==================================== Sets ================================ */
4839 static void saddCommand(redisClient
*c
) {
4842 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4844 set
= createSetObject();
4845 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4846 incrRefCount(c
->argv
[1]);
4848 if (set
->type
!= REDIS_SET
) {
4849 addReply(c
,shared
.wrongtypeerr
);
4853 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4854 incrRefCount(c
->argv
[2]);
4856 addReply(c
,shared
.cone
);
4858 addReply(c
,shared
.czero
);
4862 static void sremCommand(redisClient
*c
) {
4865 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4866 checkType(c
,set
,REDIS_SET
)) return;
4868 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4870 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4871 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4872 addReply(c
,shared
.cone
);
4874 addReply(c
,shared
.czero
);
4878 static void smoveCommand(redisClient
*c
) {
4879 robj
*srcset
, *dstset
;
4881 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4882 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4884 /* If the source key does not exist return 0, if it's of the wrong type
4886 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4887 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4890 /* Error if the destination key is not a set as well */
4891 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4892 addReply(c
,shared
.wrongtypeerr
);
4895 /* Remove the element from the source set */
4896 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4897 /* Key not found in the src set! return zero */
4898 addReply(c
,shared
.czero
);
4901 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4902 deleteKey(c
->db
,c
->argv
[1]);
4904 /* Add the element to the destination set */
4906 dstset
= createSetObject();
4907 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4908 incrRefCount(c
->argv
[2]);
4910 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4911 incrRefCount(c
->argv
[3]);
4912 addReply(c
,shared
.cone
);
4915 static void sismemberCommand(redisClient
*c
) {
4918 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4919 checkType(c
,set
,REDIS_SET
)) return;
4921 if (dictFind(set
->ptr
,c
->argv
[2]))
4922 addReply(c
,shared
.cone
);
4924 addReply(c
,shared
.czero
);
4927 static void scardCommand(redisClient
*c
) {
4931 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4932 checkType(c
,o
,REDIS_SET
)) return;
4935 addReplyUlong(c
,dictSize(s
));
4938 static void spopCommand(redisClient
*c
) {
4942 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4943 checkType(c
,set
,REDIS_SET
)) return;
4945 de
= dictGetRandomKey(set
->ptr
);
4947 addReply(c
,shared
.nullbulk
);
4949 robj
*ele
= dictGetEntryKey(de
);
4951 addReplyBulk(c
,ele
);
4952 dictDelete(set
->ptr
,ele
);
4953 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4954 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4959 static void srandmemberCommand(redisClient
*c
) {
4963 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4964 checkType(c
,set
,REDIS_SET
)) return;
4966 de
= dictGetRandomKey(set
->ptr
);
4968 addReply(c
,shared
.nullbulk
);
4970 robj
*ele
= dictGetEntryKey(de
);
4972 addReplyBulk(c
,ele
);
4976 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4977 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4979 return dictSize(*d1
)-dictSize(*d2
);
4982 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4983 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4986 robj
*lenobj
= NULL
, *dstset
= NULL
;
4987 unsigned long j
, cardinality
= 0;
4989 for (j
= 0; j
< setsnum
; j
++) {
4993 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4994 lookupKeyRead(c
->db
,setskeys
[j
]);
4998 if (deleteKey(c
->db
,dstkey
))
5000 addReply(c
,shared
.czero
);
5002 addReply(c
,shared
.emptymultibulk
);
5006 if (setobj
->type
!= REDIS_SET
) {
5008 addReply(c
,shared
.wrongtypeerr
);
5011 dv
[j
] = setobj
->ptr
;
5013 /* Sort sets from the smallest to largest, this will improve our
5014 * algorithm's performace */
5015 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5017 /* The first thing we should output is the total number of elements...
5018 * since this is a multi-bulk write, but at this stage we don't know
5019 * the intersection set size, so we use a trick, append an empty object
5020 * to the output list and save the pointer to later modify it with the
5023 lenobj
= createObject(REDIS_STRING
,NULL
);
5025 decrRefCount(lenobj
);
5027 /* If we have a target key where to store the resulting set
5028 * create this key with an empty set inside */
5029 dstset
= createSetObject();
5032 /* Iterate all the elements of the first (smallest) set, and test
5033 * the element against all the other sets, if at least one set does
5034 * not include the element it is discarded */
5035 di
= dictGetIterator(dv
[0]);
5037 while((de
= dictNext(di
)) != NULL
) {
5040 for (j
= 1; j
< setsnum
; j
++)
5041 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5043 continue; /* at least one set does not contain the member */
5044 ele
= dictGetEntryKey(de
);
5046 addReplyBulk(c
,ele
);
5049 dictAdd(dstset
->ptr
,ele
,NULL
);
5053 dictReleaseIterator(di
);
5056 /* Store the resulting set into the target, if the intersection
5057 * is not an empty set. */
5058 deleteKey(c
->db
,dstkey
);
5059 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5060 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5061 incrRefCount(dstkey
);
5062 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5064 decrRefCount(dstset
);
5065 addReply(c
,shared
.czero
);
5069 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5074 static void sinterCommand(redisClient
*c
) {
5075 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5078 static void sinterstoreCommand(redisClient
*c
) {
5079 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5082 #define REDIS_OP_UNION 0
5083 #define REDIS_OP_DIFF 1
5084 #define REDIS_OP_INTER 2
5086 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5087 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5090 robj
*dstset
= NULL
;
5091 int j
, cardinality
= 0;
5093 for (j
= 0; j
< setsnum
; j
++) {
5097 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5098 lookupKeyRead(c
->db
,setskeys
[j
]);
5103 if (setobj
->type
!= REDIS_SET
) {
5105 addReply(c
,shared
.wrongtypeerr
);
5108 dv
[j
] = setobj
->ptr
;
5111 /* We need a temp set object to store our union. If the dstkey
5112 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5113 * this set object will be the resulting object to set into the target key*/
5114 dstset
= createSetObject();
5116 /* Iterate all the elements of all the sets, add every element a single
5117 * time to the result set */
5118 for (j
= 0; j
< setsnum
; j
++) {
5119 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5120 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5122 di
= dictGetIterator(dv
[j
]);
5124 while((de
= dictNext(di
)) != NULL
) {
5127 /* dictAdd will not add the same element multiple times */
5128 ele
= dictGetEntryKey(de
);
5129 if (op
== REDIS_OP_UNION
|| j
== 0) {
5130 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5134 } else if (op
== REDIS_OP_DIFF
) {
5135 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5140 dictReleaseIterator(di
);
5142 /* result set is empty? Exit asap. */
5143 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5146 /* Output the content of the resulting set, if not in STORE mode */
5148 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5149 di
= dictGetIterator(dstset
->ptr
);
5150 while((de
= dictNext(di
)) != NULL
) {
5153 ele
= dictGetEntryKey(de
);
5154 addReplyBulk(c
,ele
);
5156 dictReleaseIterator(di
);
5157 decrRefCount(dstset
);
5159 /* If we have a target key where to store the resulting set
5160 * create this key with the result set inside */
5161 deleteKey(c
->db
,dstkey
);
5162 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5163 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5164 incrRefCount(dstkey
);
5165 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5167 decrRefCount(dstset
);
5168 addReply(c
,shared
.czero
);
5175 static void sunionCommand(redisClient
*c
) {
5176 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5179 static void sunionstoreCommand(redisClient
*c
) {
5180 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5183 static void sdiffCommand(redisClient
*c
) {
5184 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5187 static void sdiffstoreCommand(redisClient
*c
) {
5188 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5191 /* ==================================== ZSets =============================== */
5193 /* ZSETs are ordered sets using two data structures to hold the same elements
5194 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5197 * The elements are added to an hash table mapping Redis objects to scores.
5198 * At the same time the elements are added to a skip list mapping scores
5199 * to Redis objects (so objects are sorted by scores in this "view"). */
5201 /* This skiplist implementation is almost a C translation of the original
5202 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5203 * Alternative to Balanced Trees", modified in three ways:
5204 * a) this implementation allows for repeated values.
5205 * b) the comparison is not just by key (our 'score') but by satellite data.
5206 * c) there is a back pointer, so it's a doubly linked list with the back
5207 * pointers being only at "level 1". This allows to traverse the list
5208 * from tail to head, useful for ZREVRANGE. */
5210 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5211 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5213 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5215 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5221 static zskiplist
*zslCreate(void) {
5225 zsl
= zmalloc(sizeof(*zsl
));
5228 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5229 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5230 zsl
->header
->forward
[j
] = NULL
;
5232 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5233 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5234 zsl
->header
->span
[j
] = 0;
5236 zsl
->header
->backward
= NULL
;
5241 static void zslFreeNode(zskiplistNode
*node
) {
5242 decrRefCount(node
->obj
);
5243 zfree(node
->forward
);
5248 static void zslFree(zskiplist
*zsl
) {
5249 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5251 zfree(zsl
->header
->forward
);
5252 zfree(zsl
->header
->span
);
5255 next
= node
->forward
[0];
5262 static int zslRandomLevel(void) {
5264 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5266 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5269 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5270 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5271 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5275 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5276 /* store rank that is crossed to reach the insert position */
5277 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5279 while (x
->forward
[i
] &&
5280 (x
->forward
[i
]->score
< score
||
5281 (x
->forward
[i
]->score
== score
&&
5282 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5283 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5288 /* we assume the key is not already inside, since we allow duplicated
5289 * scores, and the re-insertion of score and redis object should never
5290 * happpen since the caller of zslInsert() should test in the hash table
5291 * if the element is already inside or not. */
5292 level
= zslRandomLevel();
5293 if (level
> zsl
->level
) {
5294 for (i
= zsl
->level
; i
< level
; i
++) {
5296 update
[i
] = zsl
->header
;
5297 update
[i
]->span
[i
-1] = zsl
->length
;
5301 x
= zslCreateNode(level
,score
,obj
);
5302 for (i
= 0; i
< level
; i
++) {
5303 x
->forward
[i
] = update
[i
]->forward
[i
];
5304 update
[i
]->forward
[i
] = x
;
5306 /* update span covered by update[i] as x is inserted here */
5308 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5309 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5313 /* increment span for untouched levels */
5314 for (i
= level
; i
< zsl
->level
; i
++) {
5315 update
[i
]->span
[i
-1]++;
5318 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5320 x
->forward
[0]->backward
= x
;
5326 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5327 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5329 for (i
= 0; i
< zsl
->level
; i
++) {
5330 if (update
[i
]->forward
[i
] == x
) {
5332 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5334 update
[i
]->forward
[i
] = x
->forward
[i
];
5336 /* invariant: i > 0, because update[0]->forward[0]
5337 * is always equal to x */
5338 update
[i
]->span
[i
-1] -= 1;
5341 if (x
->forward
[0]) {
5342 x
->forward
[0]->backward
= x
->backward
;
5344 zsl
->tail
= x
->backward
;
5346 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5351 /* Delete an element with matching score/object from the skiplist. */
5352 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5353 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5357 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5358 while (x
->forward
[i
] &&
5359 (x
->forward
[i
]->score
< score
||
5360 (x
->forward
[i
]->score
== score
&&
5361 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5365 /* We may have multiple elements with the same score, what we need
5366 * is to find the element with both the right score and object. */
5368 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5369 zslDeleteNode(zsl
, x
, update
);
5373 return 0; /* not found */
5375 return 0; /* not found */
5378 /* Delete all the elements with score between min and max from the skiplist.
5379 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5380 * Note that this function takes the reference to the hash table view of the
5381 * sorted set, in order to remove the elements from the hash table too. */
5382 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5383 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5384 unsigned long removed
= 0;
5388 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5389 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5393 /* We may have multiple elements with the same score, what we need
5394 * is to find the element with both the right score and object. */
5396 while (x
&& x
->score
<= max
) {
5397 zskiplistNode
*next
= x
->forward
[0];
5398 zslDeleteNode(zsl
, x
, update
);
5399 dictDelete(dict
,x
->obj
);
5404 return removed
; /* not found */
5407 /* Delete all the elements with rank between start and end from the skiplist.
5408 * Start and end are inclusive. Note that start and end need to be 1-based */
5409 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5410 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5411 unsigned long traversed
= 0, removed
= 0;
5415 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5416 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5417 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5425 while (x
&& traversed
<= end
) {
5426 zskiplistNode
*next
= x
->forward
[0];
5427 zslDeleteNode(zsl
, x
, update
);
5428 dictDelete(dict
,x
->obj
);
5437 /* Find the first node having a score equal or greater than the specified one.
5438 * Returns NULL if there is no match. */
5439 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5444 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5445 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5448 /* We may have multiple elements with the same score, what we need
5449 * is to find the element with both the right score and object. */
5450 return x
->forward
[0];
5453 /* Find the rank for an element by both score and key.
5454 * Returns 0 when the element cannot be found, rank otherwise.
5455 * Note that the rank is 1-based due to the span of zsl->header to the
5457 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5459 unsigned long rank
= 0;
5463 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5464 while (x
->forward
[i
] &&
5465 (x
->forward
[i
]->score
< score
||
5466 (x
->forward
[i
]->score
== score
&&
5467 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5468 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5472 /* x might be equal to zsl->header, so test if obj is non-NULL */
5473 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5480 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5481 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5483 unsigned long traversed
= 0;
5487 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5488 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5490 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5493 if (traversed
== rank
) {
5500 /* The actual Z-commands implementations */
5502 /* This generic command implements both ZADD and ZINCRBY.
5503 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5504 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5505 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5510 zsetobj
= lookupKeyWrite(c
->db
,key
);
5511 if (zsetobj
== NULL
) {
5512 zsetobj
= createZsetObject();
5513 dictAdd(c
->db
->dict
,key
,zsetobj
);
5516 if (zsetobj
->type
!= REDIS_ZSET
) {
5517 addReply(c
,shared
.wrongtypeerr
);
5523 /* Ok now since we implement both ZADD and ZINCRBY here the code
5524 * needs to handle the two different conditions. It's all about setting
5525 * '*score', that is, the new score to set, to the right value. */
5526 score
= zmalloc(sizeof(double));
5530 /* Read the old score. If the element was not present starts from 0 */
5531 de
= dictFind(zs
->dict
,ele
);
5533 double *oldscore
= dictGetEntryVal(de
);
5534 *score
= *oldscore
+ scoreval
;
5542 /* What follows is a simple remove and re-insert operation that is common
5543 * to both ZADD and ZINCRBY... */
5544 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5545 /* case 1: New element */
5546 incrRefCount(ele
); /* added to hash */
5547 zslInsert(zs
->zsl
,*score
,ele
);
5548 incrRefCount(ele
); /* added to skiplist */
5551 addReplyDouble(c
,*score
);
5553 addReply(c
,shared
.cone
);
5558 /* case 2: Score update operation */
5559 de
= dictFind(zs
->dict
,ele
);
5560 redisAssert(de
!= NULL
);
5561 oldscore
= dictGetEntryVal(de
);
5562 if (*score
!= *oldscore
) {
5565 /* Remove and insert the element in the skip list with new score */
5566 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5567 redisAssert(deleted
!= 0);
5568 zslInsert(zs
->zsl
,*score
,ele
);
5570 /* Update the score in the hash table */
5571 dictReplace(zs
->dict
,ele
,score
);
5577 addReplyDouble(c
,*score
);
5579 addReply(c
,shared
.czero
);
5583 static void zaddCommand(redisClient
*c
) {
5586 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5587 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5590 static void zincrbyCommand(redisClient
*c
) {
5593 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5594 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5597 static void zremCommand(redisClient
*c
) {
5604 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5605 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5608 de
= dictFind(zs
->dict
,c
->argv
[2]);
5610 addReply(c
,shared
.czero
);
5613 /* Delete from the skiplist */
5614 oldscore
= dictGetEntryVal(de
);
5615 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5616 redisAssert(deleted
!= 0);
5618 /* Delete from the hash table */
5619 dictDelete(zs
->dict
,c
->argv
[2]);
5620 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5621 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5623 addReply(c
,shared
.cone
);
5626 static void zremrangebyscoreCommand(redisClient
*c
) {
5633 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5634 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5636 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5637 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5640 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5641 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5642 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5643 server
.dirty
+= deleted
;
5644 addReplyLong(c
,deleted
);
5647 static void zremrangebyrankCommand(redisClient
*c
) {
5655 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5656 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5658 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5659 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5661 llen
= zs
->zsl
->length
;
5663 /* convert negative indexes */
5664 if (start
< 0) start
= llen
+start
;
5665 if (end
< 0) end
= llen
+end
;
5666 if (start
< 0) start
= 0;
5667 if (end
< 0) end
= 0;
5669 /* indexes sanity checks */
5670 if (start
> end
|| start
>= llen
) {
5671 addReply(c
,shared
.czero
);
5674 if (end
>= llen
) end
= llen
-1;
5676 /* increment start and end because zsl*Rank functions
5677 * use 1-based rank */
5678 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5679 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5680 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5681 server
.dirty
+= deleted
;
5682 addReplyLong(c
, deleted
);
5690 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5691 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5692 unsigned long size1
, size2
;
5693 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5694 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5695 return size1
- size2
;
5698 #define REDIS_AGGR_SUM 1
5699 #define REDIS_AGGR_MIN 2
5700 #define REDIS_AGGR_MAX 3
5702 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5703 if (aggregate
== REDIS_AGGR_SUM
) {
5704 *target
= *target
+ val
;
5705 } else if (aggregate
== REDIS_AGGR_MIN
) {
5706 *target
= val
< *target
? val
: *target
;
5707 } else if (aggregate
== REDIS_AGGR_MAX
) {
5708 *target
= val
> *target
? val
: *target
;
5711 redisPanic("Unknown ZUNION/INTER aggregate type");
5715 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5717 int aggregate
= REDIS_AGGR_SUM
;
5724 /* expect zsetnum input keys to be given */
5725 zsetnum
= atoi(c
->argv
[2]->ptr
);
5727 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5731 /* test if the expected number of keys would overflow */
5732 if (3+zsetnum
> c
->argc
) {
5733 addReply(c
,shared
.syntaxerr
);
5737 /* read keys to be used for input */
5738 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5739 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5740 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5744 if (zsetobj
->type
!= REDIS_ZSET
) {
5746 addReply(c
,shared
.wrongtypeerr
);
5749 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5752 /* default all weights to 1 */
5753 src
[i
].weight
= 1.0;
5756 /* parse optional extra arguments */
5758 int remaining
= c
->argc
- j
;
5761 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5763 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5764 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5767 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5769 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5770 aggregate
= REDIS_AGGR_SUM
;
5771 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5772 aggregate
= REDIS_AGGR_MIN
;
5773 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5774 aggregate
= REDIS_AGGR_MAX
;
5777 addReply(c
,shared
.syntaxerr
);
5783 addReply(c
,shared
.syntaxerr
);
5789 /* sort sets from the smallest to largest, this will improve our
5790 * algorithm's performance */
5791 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5793 dstobj
= createZsetObject();
5794 dstzset
= dstobj
->ptr
;
5796 if (op
== REDIS_OP_INTER
) {
5797 /* skip going over all entries if the smallest zset is NULL or empty */
5798 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5799 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5800 * from small to large, all src[i > 0].dict are non-empty too */
5801 di
= dictGetIterator(src
[0].dict
);
5802 while((de
= dictNext(di
)) != NULL
) {
5803 double *score
= zmalloc(sizeof(double)), value
;
5804 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5806 for (j
= 1; j
< zsetnum
; j
++) {
5807 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5809 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5810 zunionInterAggregate(score
, value
, aggregate
);
5816 /* skip entry when not present in every source dict */
5820 robj
*o
= dictGetEntryKey(de
);
5821 dictAdd(dstzset
->dict
,o
,score
);
5822 incrRefCount(o
); /* added to dictionary */
5823 zslInsert(dstzset
->zsl
,*score
,o
);
5824 incrRefCount(o
); /* added to skiplist */
5827 dictReleaseIterator(di
);
5829 } else if (op
== REDIS_OP_UNION
) {
5830 for (i
= 0; i
< zsetnum
; i
++) {
5831 if (!src
[i
].dict
) continue;
5833 di
= dictGetIterator(src
[i
].dict
);
5834 while((de
= dictNext(di
)) != NULL
) {
5835 /* skip key when already processed */
5836 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5838 double *score
= zmalloc(sizeof(double)), value
;
5839 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5841 /* because the zsets are sorted by size, its only possible
5842 * for sets at larger indices to hold this entry */
5843 for (j
= (i
+1); j
< zsetnum
; j
++) {
5844 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5846 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5847 zunionInterAggregate(score
, value
, aggregate
);
5851 robj
*o
= dictGetEntryKey(de
);
5852 dictAdd(dstzset
->dict
,o
,score
);
5853 incrRefCount(o
); /* added to dictionary */
5854 zslInsert(dstzset
->zsl
,*score
,o
);
5855 incrRefCount(o
); /* added to skiplist */
5857 dictReleaseIterator(di
);
5860 /* unknown operator */
5861 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5864 deleteKey(c
->db
,dstkey
);
5865 if (dstzset
->zsl
->length
) {
5866 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5867 incrRefCount(dstkey
);
5868 addReplyLong(c
, dstzset
->zsl
->length
);
5871 decrRefCount(dstobj
);
5872 addReply(c
, shared
.czero
);
5877 static void zunionCommand(redisClient
*c
) {
5878 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5881 static void zinterCommand(redisClient
*c
) {
5882 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5885 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5897 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5898 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5900 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5902 } else if (c
->argc
>= 5) {
5903 addReply(c
,shared
.syntaxerr
);
5907 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5908 || checkType(c
,o
,REDIS_ZSET
)) return;
5913 /* convert negative indexes */
5914 if (start
< 0) start
= llen
+start
;
5915 if (end
< 0) end
= llen
+end
;
5916 if (start
< 0) start
= 0;
5917 if (end
< 0) end
= 0;
5919 /* indexes sanity checks */
5920 if (start
> end
|| start
>= llen
) {
5921 /* Out of range start or start > end result in empty list */
5922 addReply(c
,shared
.emptymultibulk
);
5925 if (end
>= llen
) end
= llen
-1;
5926 rangelen
= (end
-start
)+1;
5928 /* check if starting point is trivial, before searching
5929 * the element in log(N) time */
5931 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5934 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5937 /* Return the result in form of a multi-bulk reply */
5938 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5939 withscores
? (rangelen
*2) : rangelen
));
5940 for (j
= 0; j
< rangelen
; j
++) {
5942 addReplyBulk(c
,ele
);
5944 addReplyDouble(c
,ln
->score
);
5945 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5949 static void zrangeCommand(redisClient
*c
) {
5950 zrangeGenericCommand(c
,0);
5953 static void zrevrangeCommand(redisClient
*c
) {
5954 zrangeGenericCommand(c
,1);
5957 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5958 * If justcount is non-zero, just the count is returned. */
5959 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5962 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5963 int offset
= 0, limit
= -1;
5967 /* Parse the min-max interval. If one of the values is prefixed
5968 * by the "(" character, it's considered "open". For instance
5969 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5970 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5971 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5972 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5975 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5977 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5978 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5981 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5984 /* Parse "WITHSCORES": note that if the command was called with
5985 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5986 * enter the following paths to parse WITHSCORES and LIMIT. */
5987 if (c
->argc
== 5 || c
->argc
== 8) {
5988 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5993 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5997 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6002 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6003 addReply(c
,shared
.syntaxerr
);
6005 } else if (c
->argc
== (7 + withscores
)) {
6006 offset
= atoi(c
->argv
[5]->ptr
);
6007 limit
= atoi(c
->argv
[6]->ptr
);
6008 if (offset
< 0) offset
= 0;
6011 /* Ok, lookup the key and get the range */
6012 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6014 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6016 if (o
->type
!= REDIS_ZSET
) {
6017 addReply(c
,shared
.wrongtypeerr
);
6019 zset
*zsetobj
= o
->ptr
;
6020 zskiplist
*zsl
= zsetobj
->zsl
;
6022 robj
*ele
, *lenobj
= NULL
;
6023 unsigned long rangelen
= 0;
6025 /* Get the first node with the score >= min, or with
6026 * score > min if 'minex' is true. */
6027 ln
= zslFirstWithScore(zsl
,min
);
6028 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6031 /* No element matching the speciifed interval */
6032 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6036 /* We don't know in advance how many matching elements there
6037 * are in the list, so we push this object that will represent
6038 * the multi-bulk length in the output buffer, and will "fix"
6041 lenobj
= createObject(REDIS_STRING
,NULL
);
6043 decrRefCount(lenobj
);
6046 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6049 ln
= ln
->forward
[0];
6052 if (limit
== 0) break;
6055 addReplyBulk(c
,ele
);
6057 addReplyDouble(c
,ln
->score
);
6059 ln
= ln
->forward
[0];
6061 if (limit
> 0) limit
--;
6064 addReplyLong(c
,(long)rangelen
);
6066 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6067 withscores
? (rangelen
*2) : rangelen
);
6073 static void zrangebyscoreCommand(redisClient
*c
) {
6074 genericZrangebyscoreCommand(c
,0);
6077 static void zcountCommand(redisClient
*c
) {
6078 genericZrangebyscoreCommand(c
,1);
6081 static void zcardCommand(redisClient
*c
) {
6085 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6086 checkType(c
,o
,REDIS_ZSET
)) return;
6089 addReplyUlong(c
,zs
->zsl
->length
);
6092 static void zscoreCommand(redisClient
*c
) {
6097 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6098 checkType(c
,o
,REDIS_ZSET
)) return;
6101 de
= dictFind(zs
->dict
,c
->argv
[2]);
6103 addReply(c
,shared
.nullbulk
);
6105 double *score
= dictGetEntryVal(de
);
6107 addReplyDouble(c
,*score
);
6111 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6119 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6120 checkType(c
,o
,REDIS_ZSET
)) return;
6124 de
= dictFind(zs
->dict
,c
->argv
[2]);
6126 addReply(c
,shared
.nullbulk
);
6130 score
= dictGetEntryVal(de
);
6131 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6134 addReplyLong(c
, zsl
->length
- rank
);
6136 addReplyLong(c
, rank
-1);
6139 addReply(c
,shared
.nullbulk
);
6143 static void zrankCommand(redisClient
*c
) {
6144 zrankGenericCommand(c
, 0);
6147 static void zrevrankCommand(redisClient
*c
) {
6148 zrankGenericCommand(c
, 1);
6151 /* ========================= Hashes utility functions ======================= */
6152 #define REDIS_HASH_KEY 1
6153 #define REDIS_HASH_VALUE 2
6155 /* Check the length of a number of objects to see if we need to convert a
6156 * zipmap to a real hash. Note that we only check string encoded objects
6157 * as their string length can be queried in constant time. */
6158 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6160 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6162 for (i
= start
; i
<= end
; i
++) {
6163 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6164 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6166 convertToRealHash(subject
);
6172 /* Encode given objects in-place when the hash uses a dict. */
6173 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6174 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6175 if (o1
) *o1
= tryObjectEncoding(*o1
);
6176 if (o2
) *o2
= tryObjectEncoding(*o2
);
6180 /* Get the value from a hash identified by key. Returns either a string
6181 * object or NULL if the value cannot be found. The refcount of the object
6182 * is always increased by 1 when the value was found. */
6183 static robj
*hashGet(robj
*o
, robj
*key
) {
6185 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6188 key
= getDecodedObject(key
);
6189 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6190 value
= createStringObject((char*)v
,vlen
);
6194 dictEntry
*de
= dictFind(o
->ptr
,key
);
6196 value
= dictGetEntryVal(de
);
6197 incrRefCount(value
);
6203 /* Test if the key exists in the given hash. Returns 1 if the key
6204 * exists and 0 when it doesn't. */
6205 static int hashExists(robj
*o
, robj
*key
) {
6206 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6207 key
= getDecodedObject(key
);
6208 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6214 if (dictFind(o
->ptr
,key
) != NULL
) {
6221 /* Add an element, discard the old if the key already exists.
6222 * Return 0 on insert and 1 on update. */
6223 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6225 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6226 key
= getDecodedObject(key
);
6227 value
= getDecodedObject(value
);
6228 o
->ptr
= zipmapSet(o
->ptr
,
6229 key
->ptr
,sdslen(key
->ptr
),
6230 value
->ptr
,sdslen(value
->ptr
), &update
);
6232 decrRefCount(value
);
6234 /* Check if the zipmap needs to be upgraded to a real hash table */
6235 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6236 convertToRealHash(o
);
6238 if (dictReplace(o
->ptr
,key
,value
)) {
6245 incrRefCount(value
);
6250 /* Delete an element from a hash.
6251 * Return 1 on deleted and 0 on not found. */
6252 static int hashDelete(robj
*o
, robj
*key
) {
6254 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6255 key
= getDecodedObject(key
);
6256 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6259 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6260 /* Always check if the dictionary needs a resize after a delete. */
6261 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6266 /* Return the number of elements in a hash. */
6267 static unsigned long hashLength(robj
*o
) {
6268 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6269 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6272 /* Structure to hold hash iteration abstration. Note that iteration over
6273 * hashes involves both fields and values. Because it is possible that
6274 * not both are required, store pointers in the iterator to avoid
6275 * unnecessary memory allocation for fields/values. */
6279 unsigned char *zk
, *zv
;
6280 unsigned int zklen
, zvlen
;
6286 static hashIterator
*hashInitIterator(robj
*subject
) {
6287 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6288 hi
->encoding
= subject
->encoding
;
6289 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6290 hi
->zi
= zipmapRewind(subject
->ptr
);
6291 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6292 hi
->di
= dictGetIterator(subject
->ptr
);
6299 static void hashReleaseIterator(hashIterator
*hi
) {
6300 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6301 dictReleaseIterator(hi
->di
);
6306 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6307 * could be found and REDIS_ERR when the iterator reaches the end. */
6308 static int hashNext(hashIterator
*hi
) {
6309 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6310 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6311 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6313 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6318 /* Get key or value object at current iteration position.
6319 * This increases the refcount of the field object by 1. */
6320 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6322 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6323 if (what
& REDIS_HASH_KEY
) {
6324 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6326 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6329 if (what
& REDIS_HASH_KEY
) {
6330 o
= dictGetEntryKey(hi
->de
);
6332 o
= dictGetEntryVal(hi
->de
);
6339 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6340 robj
*o
= lookupKeyWrite(c
->db
,key
);
6342 o
= createHashObject();
6343 dictAdd(c
->db
->dict
,key
,o
);
6346 if (o
->type
!= REDIS_HASH
) {
6347 addReply(c
,shared
.wrongtypeerr
);
6354 /* ============================= Hash commands ============================== */
6355 static void hsetCommand(redisClient
*c
) {
6359 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6360 hashTryConversion(o
,c
->argv
,2,3);
6361 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6362 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6363 addReply(c
, update
? shared
.czero
: shared
.cone
);
6367 static void hsetnxCommand(redisClient
*c
) {
6369 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6370 hashTryConversion(o
,c
->argv
,2,3);
6372 if (hashExists(o
, c
->argv
[2])) {
6373 addReply(c
, shared
.czero
);
6375 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6376 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6377 addReply(c
, shared
.cone
);
6382 static void hmsetCommand(redisClient
*c
) {
6386 if ((c
->argc
% 2) == 1) {
6387 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6391 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6392 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6393 for (i
= 2; i
< c
->argc
; i
+= 2) {
6394 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6395 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6397 addReply(c
, shared
.ok
);
6401 static void hincrbyCommand(redisClient
*c
) {
6402 long long value
, incr
;
6403 robj
*o
, *current
, *new;
6405 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6406 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6407 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6408 if (current
->encoding
== REDIS_ENCODING_RAW
)
6409 value
= strtoll(current
->ptr
,NULL
,10);
6410 else if (current
->encoding
== REDIS_ENCODING_INT
)
6411 value
= (long)current
->ptr
;
6413 redisAssert(1 != 1);
6414 decrRefCount(current
);
6420 new = createStringObjectFromLongLong(value
);
6421 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6422 hashSet(o
,c
->argv
[2],new);
6424 addReplyLongLong(c
,value
);
6428 static void hgetCommand(redisClient
*c
) {
6430 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6431 checkType(c
,o
,REDIS_HASH
)) return;
6433 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6434 addReplyBulk(c
,value
);
6435 decrRefCount(value
);
6437 addReply(c
,shared
.nullbulk
);
6441 static void hmgetCommand(redisClient
*c
) {
6444 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6445 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6446 addReply(c
,shared
.wrongtypeerr
);
6449 /* Note the check for o != NULL happens inside the loop. This is
6450 * done because objects that cannot be found are considered to be
6451 * an empty hash. The reply should then be a series of NULLs. */
6452 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6453 for (i
= 2; i
< c
->argc
; i
++) {
6454 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6455 addReplyBulk(c
,value
);
6456 decrRefCount(value
);
6458 addReply(c
,shared
.nullbulk
);
6463 static void hdelCommand(redisClient
*c
) {
6465 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6466 checkType(c
,o
,REDIS_HASH
)) return;
6468 if (hashDelete(o
,c
->argv
[2])) {
6469 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6470 addReply(c
,shared
.cone
);
6473 addReply(c
,shared
.czero
);
6477 static void hlenCommand(redisClient
*c
) {
6479 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6480 checkType(c
,o
,REDIS_HASH
)) return;
6482 addReplyUlong(c
,hashLength(o
));
6485 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6486 robj
*o
, *lenobj
, *obj
;
6487 unsigned long count
= 0;
6490 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6491 || checkType(c
,o
,REDIS_HASH
)) return;
6493 lenobj
= createObject(REDIS_STRING
,NULL
);
6495 decrRefCount(lenobj
);
6497 hi
= hashInitIterator(o
);
6498 while (hashNext(hi
) != REDIS_ERR
) {
6499 if (flags
& REDIS_HASH_KEY
) {
6500 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6501 addReplyBulk(c
,obj
);
6505 if (flags
& REDIS_HASH_VALUE
) {
6506 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6507 addReplyBulk(c
,obj
);
6512 hashReleaseIterator(hi
);
6514 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6517 static void hkeysCommand(redisClient
*c
) {
6518 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6521 static void hvalsCommand(redisClient
*c
) {
6522 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6525 static void hgetallCommand(redisClient
*c
) {
6526 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6529 static void hexistsCommand(redisClient
*c
) {
6531 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6532 checkType(c
,o
,REDIS_HASH
)) return;
6534 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6537 static void convertToRealHash(robj
*o
) {
6538 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6539 unsigned int klen
, vlen
;
6540 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6542 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6543 p
= zipmapRewind(zm
);
6544 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6545 robj
*keyobj
, *valobj
;
6547 keyobj
= createStringObject((char*)key
,klen
);
6548 valobj
= createStringObject((char*)val
,vlen
);
6549 keyobj
= tryObjectEncoding(keyobj
);
6550 valobj
= tryObjectEncoding(valobj
);
6551 dictAdd(dict
,keyobj
,valobj
);
6553 o
->encoding
= REDIS_ENCODING_HT
;
6558 /* ========================= Non type-specific commands ==================== */
6560 static void flushdbCommand(redisClient
*c
) {
6561 server
.dirty
+= dictSize(c
->db
->dict
);
6562 dictEmpty(c
->db
->dict
);
6563 dictEmpty(c
->db
->expires
);
6564 addReply(c
,shared
.ok
);
6567 static void flushallCommand(redisClient
*c
) {
6568 server
.dirty
+= emptyDb();
6569 addReply(c
,shared
.ok
);
6570 if (server
.bgsavechildpid
!= -1) {
6571 kill(server
.bgsavechildpid
,SIGKILL
);
6572 rdbRemoveTempFile(server
.bgsavechildpid
);
6574 rdbSave(server
.dbfilename
);
6578 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6579 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6581 so
->pattern
= pattern
;
6585 /* Return the value associated to the key with a name obtained
6586 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6587 * The returned object will always have its refcount increased by 1
6588 * when it is non-NULL. */
6589 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6592 robj keyobj
, fieldobj
, *o
;
6593 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6594 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6598 char buf
[REDIS_SORTKEY_MAX
+1];
6599 } keyname
, fieldname
;
6601 /* If the pattern is "#" return the substitution object itself in order
6602 * to implement the "SORT ... GET #" feature. */
6603 spat
= pattern
->ptr
;
6604 if (spat
[0] == '#' && spat
[1] == '\0') {
6605 incrRefCount(subst
);
6609 /* The substitution object may be specially encoded. If so we create
6610 * a decoded object on the fly. Otherwise getDecodedObject will just
6611 * increment the ref count, that we'll decrement later. */
6612 subst
= getDecodedObject(subst
);
6615 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6616 p
= strchr(spat
,'*');
6618 decrRefCount(subst
);
6622 /* Find out if we're dealing with a hash dereference. */
6623 if ((f
= strstr(p
+1, "->")) != NULL
) {
6624 fieldlen
= sdslen(spat
)-(f
-spat
);
6625 /* this also copies \0 character */
6626 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6627 fieldname
.len
= fieldlen
-2;
6633 sublen
= sdslen(ssub
);
6634 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6635 memcpy(keyname
.buf
,spat
,prefixlen
);
6636 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6637 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6638 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6639 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6640 decrRefCount(subst
);
6642 /* Lookup substituted key */
6643 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6644 o
= lookupKeyRead(db
,&keyobj
);
6645 if (o
== NULL
) return NULL
;
6648 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6650 /* Retrieve value from hash by the field name. This operation
6651 * already increases the refcount of the returned object. */
6652 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6653 o
= hashGet(o
, &fieldobj
);
6655 if (o
->type
!= REDIS_STRING
) return NULL
;
6657 /* Every object that this function returns needs to have its refcount
6658 * increased. sortCommand decreases it again. */
6665 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6666 * the additional parameter is not standard but a BSD-specific we have to
6667 * pass sorting parameters via the global 'server' structure */
6668 static int sortCompare(const void *s1
, const void *s2
) {
6669 const redisSortObject
*so1
= s1
, *so2
= s2
;
6672 if (!server
.sort_alpha
) {
6673 /* Numeric sorting. Here it's trivial as we precomputed scores */
6674 if (so1
->u
.score
> so2
->u
.score
) {
6676 } else if (so1
->u
.score
< so2
->u
.score
) {
6682 /* Alphanumeric sorting */
6683 if (server
.sort_bypattern
) {
6684 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6685 /* At least one compare object is NULL */
6686 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6688 else if (so1
->u
.cmpobj
== NULL
)
6693 /* We have both the objects, use strcoll */
6694 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6697 /* Compare elements directly. */
6698 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6701 return server
.sort_desc
? -cmp
: cmp
;
6704 /* The SORT command is the most complex command in Redis. Warning: this code
6705 * is optimized for speed and a bit less for readability */
6706 static void sortCommand(redisClient
*c
) {
6709 int desc
= 0, alpha
= 0;
6710 int limit_start
= 0, limit_count
= -1, start
, end
;
6711 int j
, dontsort
= 0, vectorlen
;
6712 int getop
= 0; /* GET operation counter */
6713 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6714 redisSortObject
*vector
; /* Resulting vector to sort */
6716 /* Lookup the key to sort. It must be of the right types */
6717 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6718 if (sortval
== NULL
) {
6719 addReply(c
,shared
.emptymultibulk
);
6722 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6723 sortval
->type
!= REDIS_ZSET
)
6725 addReply(c
,shared
.wrongtypeerr
);
6729 /* Create a list of operations to perform for every sorted element.
6730 * Operations can be GET/DEL/INCR/DECR */
6731 operations
= listCreate();
6732 listSetFreeMethod(operations
,zfree
);
6735 /* Now we need to protect sortval incrementing its count, in the future
6736 * SORT may have options able to overwrite/delete keys during the sorting
6737 * and the sorted key itself may get destroied */
6738 incrRefCount(sortval
);
6740 /* The SORT command has an SQL-alike syntax, parse it */
6741 while(j
< c
->argc
) {
6742 int leftargs
= c
->argc
-j
-1;
6743 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6745 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6747 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6749 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6750 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6751 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6753 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6754 storekey
= c
->argv
[j
+1];
6756 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6757 sortby
= c
->argv
[j
+1];
6758 /* If the BY pattern does not contain '*', i.e. it is constant,
6759 * we don't need to sort nor to lookup the weight keys. */
6760 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6762 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6763 listAddNodeTail(operations
,createSortOperation(
6764 REDIS_SORT_GET
,c
->argv
[j
+1]));
6768 decrRefCount(sortval
);
6769 listRelease(operations
);
6770 addReply(c
,shared
.syntaxerr
);
6776 /* Load the sorting vector with all the objects to sort */
6777 switch(sortval
->type
) {
6778 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6779 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6780 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6781 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
6783 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6786 if (sortval
->type
== REDIS_LIST
) {
6787 list
*list
= sortval
->ptr
;
6791 listRewind(list
,&li
);
6792 while((ln
= listNext(&li
))) {
6793 robj
*ele
= ln
->value
;
6794 vector
[j
].obj
= ele
;
6795 vector
[j
].u
.score
= 0;
6796 vector
[j
].u
.cmpobj
= NULL
;
6804 if (sortval
->type
== REDIS_SET
) {
6807 zset
*zs
= sortval
->ptr
;
6811 di
= dictGetIterator(set
);
6812 while((setele
= dictNext(di
)) != NULL
) {
6813 vector
[j
].obj
= dictGetEntryKey(setele
);
6814 vector
[j
].u
.score
= 0;
6815 vector
[j
].u
.cmpobj
= NULL
;
6818 dictReleaseIterator(di
);
6820 redisAssert(j
== vectorlen
);
6822 /* Now it's time to load the right scores in the sorting vector */
6823 if (dontsort
== 0) {
6824 for (j
= 0; j
< vectorlen
; j
++) {
6827 /* lookup value to sort by */
6828 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6829 if (!byval
) continue;
6831 /* use object itself to sort by */
6832 byval
= vector
[j
].obj
;
6836 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6838 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6839 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6840 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
6841 /* Don't need to decode the object if it's
6842 * integer-encoded (the only encoding supported) so
6843 * far. We can just cast it */
6844 vector
[j
].u
.score
= (long)byval
->ptr
;
6846 redisAssert(1 != 1);
6850 /* when the object was retrieved using lookupKeyByPattern,
6851 * its refcount needs to be decreased. */
6853 decrRefCount(byval
);
6858 /* We are ready to sort the vector... perform a bit of sanity check
6859 * on the LIMIT option too. We'll use a partial version of quicksort. */
6860 start
= (limit_start
< 0) ? 0 : limit_start
;
6861 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6862 if (start
>= vectorlen
) {
6863 start
= vectorlen
-1;
6866 if (end
>= vectorlen
) end
= vectorlen
-1;
6868 if (dontsort
== 0) {
6869 server
.sort_desc
= desc
;
6870 server
.sort_alpha
= alpha
;
6871 server
.sort_bypattern
= sortby
? 1 : 0;
6872 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6873 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6875 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6878 /* Send command output to the output buffer, performing the specified
6879 * GET/DEL/INCR/DECR operations if any. */
6880 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6881 if (storekey
== NULL
) {
6882 /* STORE option not specified, sent the sorting result to client */
6883 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6884 for (j
= start
; j
<= end
; j
++) {
6888 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6889 listRewind(operations
,&li
);
6890 while((ln
= listNext(&li
))) {
6891 redisSortOperation
*sop
= ln
->value
;
6892 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6895 if (sop
->type
== REDIS_SORT_GET
) {
6897 addReply(c
,shared
.nullbulk
);
6899 addReplyBulk(c
,val
);
6903 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6908 robj
*listObject
= createListObject();
6909 list
*listPtr
= (list
*) listObject
->ptr
;
6911 /* STORE option specified, set the sorting result as a List object */
6912 for (j
= start
; j
<= end
; j
++) {
6917 listAddNodeTail(listPtr
,vector
[j
].obj
);
6918 incrRefCount(vector
[j
].obj
);
6920 listRewind(operations
,&li
);
6921 while((ln
= listNext(&li
))) {
6922 redisSortOperation
*sop
= ln
->value
;
6923 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6926 if (sop
->type
== REDIS_SORT_GET
) {
6928 listAddNodeTail(listPtr
,createStringObject("",0));
6930 /* We should do a incrRefCount on val because it is
6931 * added to the list, but also a decrRefCount because
6932 * it is returned by lookupKeyByPattern. This results
6933 * in doing nothing at all. */
6934 listAddNodeTail(listPtr
,val
);
6937 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6941 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6942 incrRefCount(storekey
);
6944 /* Note: we add 1 because the DB is dirty anyway since even if the
6945 * SORT result is empty a new key is set and maybe the old content
6947 server
.dirty
+= 1+outputlen
;
6948 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6952 decrRefCount(sortval
);
6953 listRelease(operations
);
6954 for (j
= 0; j
< vectorlen
; j
++) {
6955 if (alpha
&& vector
[j
].u
.cmpobj
)
6956 decrRefCount(vector
[j
].u
.cmpobj
);
6961 /* Convert an amount of bytes into a human readable string in the form
6962 * of 100B, 2G, 100M, 4K, and so forth. */
6963 static void bytesToHuman(char *s
, unsigned long long n
) {
6968 sprintf(s
,"%lluB",n
);
6970 } else if (n
< (1024*1024)) {
6971 d
= (double)n
/(1024);
6972 sprintf(s
,"%.2fK",d
);
6973 } else if (n
< (1024LL*1024*1024)) {
6974 d
= (double)n
/(1024*1024);
6975 sprintf(s
,"%.2fM",d
);
6976 } else if (n
< (1024LL*1024*1024*1024)) {
6977 d
= (double)n
/(1024LL*1024*1024);
6978 sprintf(s
,"%.2fG",d
);
6982 /* Create the string returned by the INFO command. This is decoupled
6983 * by the INFO command itself as we need to report the same information
6984 * on memory corruption problems. */
6985 static sds
genRedisInfoString(void) {
6987 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6991 bytesToHuman(hmem
,zmalloc_used_memory());
6992 info
= sdscatprintf(sdsempty(),
6993 "redis_version:%s\r\n"
6995 "multiplexing_api:%s\r\n"
6996 "process_id:%ld\r\n"
6997 "uptime_in_seconds:%ld\r\n"
6998 "uptime_in_days:%ld\r\n"
6999 "connected_clients:%d\r\n"
7000 "connected_slaves:%d\r\n"
7001 "blocked_clients:%d\r\n"
7002 "used_memory:%zu\r\n"
7003 "used_memory_human:%s\r\n"
7004 "changes_since_last_save:%lld\r\n"
7005 "bgsave_in_progress:%d\r\n"
7006 "last_save_time:%ld\r\n"
7007 "bgrewriteaof_in_progress:%d\r\n"
7008 "total_connections_received:%lld\r\n"
7009 "total_commands_processed:%lld\r\n"
7010 "expired_keys:%lld\r\n"
7011 "hash_max_zipmap_entries:%ld\r\n"
7012 "hash_max_zipmap_value:%ld\r\n"
7013 "pubsub_channels:%ld\r\n"
7014 "pubsub_patterns:%u\r\n"
7018 (sizeof(long) == 8) ? "64" : "32",
7023 listLength(server
.clients
)-listLength(server
.slaves
),
7024 listLength(server
.slaves
),
7025 server
.blpop_blocked_clients
,
7026 zmalloc_used_memory(),
7029 server
.bgsavechildpid
!= -1,
7031 server
.bgrewritechildpid
!= -1,
7032 server
.stat_numconnections
,
7033 server
.stat_numcommands
,
7034 server
.stat_expiredkeys
,
7035 server
.hash_max_zipmap_entries
,
7036 server
.hash_max_zipmap_value
,
7037 dictSize(server
.pubsub_channels
),
7038 listLength(server
.pubsub_patterns
),
7039 server
.vm_enabled
!= 0,
7040 server
.masterhost
== NULL
? "master" : "slave"
7042 if (server
.masterhost
) {
7043 info
= sdscatprintf(info
,
7044 "master_host:%s\r\n"
7045 "master_port:%d\r\n"
7046 "master_link_status:%s\r\n"
7047 "master_last_io_seconds_ago:%d\r\n"
7050 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7052 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7055 if (server
.vm_enabled
) {
7057 info
= sdscatprintf(info
,
7058 "vm_conf_max_memory:%llu\r\n"
7059 "vm_conf_page_size:%llu\r\n"
7060 "vm_conf_pages:%llu\r\n"
7061 "vm_stats_used_pages:%llu\r\n"
7062 "vm_stats_swapped_objects:%llu\r\n"
7063 "vm_stats_swappin_count:%llu\r\n"
7064 "vm_stats_swappout_count:%llu\r\n"
7065 "vm_stats_io_newjobs_len:%lu\r\n"
7066 "vm_stats_io_processing_len:%lu\r\n"
7067 "vm_stats_io_processed_len:%lu\r\n"
7068 "vm_stats_io_active_threads:%lu\r\n"
7069 "vm_stats_blocked_clients:%lu\r\n"
7070 ,(unsigned long long) server
.vm_max_memory
,
7071 (unsigned long long) server
.vm_page_size
,
7072 (unsigned long long) server
.vm_pages
,
7073 (unsigned long long) server
.vm_stats_used_pages
,
7074 (unsigned long long) server
.vm_stats_swapped_objects
,
7075 (unsigned long long) server
.vm_stats_swapins
,
7076 (unsigned long long) server
.vm_stats_swapouts
,
7077 (unsigned long) listLength(server
.io_newjobs
),
7078 (unsigned long) listLength(server
.io_processing
),
7079 (unsigned long) listLength(server
.io_processed
),
7080 (unsigned long) server
.io_active_threads
,
7081 (unsigned long) server
.vm_blocked_clients
7085 for (j
= 0; j
< server
.dbnum
; j
++) {
7086 long long keys
, vkeys
;
7088 keys
= dictSize(server
.db
[j
].dict
);
7089 vkeys
= dictSize(server
.db
[j
].expires
);
7090 if (keys
|| vkeys
) {
7091 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7098 static void infoCommand(redisClient
*c
) {
7099 sds info
= genRedisInfoString();
7100 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7101 (unsigned long)sdslen(info
)));
7102 addReplySds(c
,info
);
7103 addReply(c
,shared
.crlf
);
7106 static void monitorCommand(redisClient
*c
) {
7107 /* ignore MONITOR if aleady slave or in monitor mode */
7108 if (c
->flags
& REDIS_SLAVE
) return;
7110 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7112 listAddNodeTail(server
.monitors
,c
);
7113 addReply(c
,shared
.ok
);
7116 /* ================================= Expire ================================= */
7117 static int removeExpire(redisDb
*db
, robj
*key
) {
7118 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7125 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7126 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7134 /* Return the expire time of the specified key, or -1 if no expire
7135 * is associated with this key (i.e. the key is non volatile) */
7136 static time_t getExpire(redisDb
*db
, robj
*key
) {
7139 /* No expire? return ASAP */
7140 if (dictSize(db
->expires
) == 0 ||
7141 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7143 return (time_t) dictGetEntryVal(de
);
7146 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7150 /* No expire? return ASAP */
7151 if (dictSize(db
->expires
) == 0 ||
7152 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7154 /* Lookup the expire */
7155 when
= (time_t) dictGetEntryVal(de
);
7156 if (time(NULL
) <= when
) return 0;
7158 /* Delete the key */
7159 dictDelete(db
->expires
,key
);
7160 server
.stat_expiredkeys
++;
7161 return dictDelete(db
->dict
,key
) == DICT_OK
;
7164 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7167 /* No expire? return ASAP */
7168 if (dictSize(db
->expires
) == 0 ||
7169 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7171 /* Delete the key */
7173 server
.stat_expiredkeys
++;
7174 dictDelete(db
->expires
,key
);
7175 return dictDelete(db
->dict
,key
) == DICT_OK
;
7178 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7182 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7186 de
= dictFind(c
->db
->dict
,key
);
7188 addReply(c
,shared
.czero
);
7192 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7193 addReply(c
, shared
.cone
);
7196 time_t when
= time(NULL
)+seconds
;
7197 if (setExpire(c
->db
,key
,when
)) {
7198 addReply(c
,shared
.cone
);
7201 addReply(c
,shared
.czero
);
7207 static void expireCommand(redisClient
*c
) {
7208 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7211 static void expireatCommand(redisClient
*c
) {
7212 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7215 static void ttlCommand(redisClient
*c
) {
7219 expire
= getExpire(c
->db
,c
->argv
[1]);
7221 ttl
= (int) (expire
-time(NULL
));
7222 if (ttl
< 0) ttl
= -1;
7224 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7227 /* ================================ MULTI/EXEC ============================== */
7229 /* Client state initialization for MULTI/EXEC */
7230 static void initClientMultiState(redisClient
*c
) {
7231 c
->mstate
.commands
= NULL
;
7232 c
->mstate
.count
= 0;
7235 /* Release all the resources associated with MULTI/EXEC state */
7236 static void freeClientMultiState(redisClient
*c
) {
7239 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7241 multiCmd
*mc
= c
->mstate
.commands
+j
;
7243 for (i
= 0; i
< mc
->argc
; i
++)
7244 decrRefCount(mc
->argv
[i
]);
7247 zfree(c
->mstate
.commands
);
7250 /* Add a new command into the MULTI commands queue */
7251 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7255 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7256 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7257 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7260 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7261 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7262 for (j
= 0; j
< c
->argc
; j
++)
7263 incrRefCount(mc
->argv
[j
]);
7267 static void multiCommand(redisClient
*c
) {
7268 c
->flags
|= REDIS_MULTI
;
7269 addReply(c
,shared
.ok
);
7272 static void discardCommand(redisClient
*c
) {
7273 if (!(c
->flags
& REDIS_MULTI
)) {
7274 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7278 freeClientMultiState(c
);
7279 initClientMultiState(c
);
7280 c
->flags
&= (~REDIS_MULTI
);
7281 addReply(c
,shared
.ok
);
7284 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7285 * implememntation for more information. */
7286 static void execCommandReplicateMulti(redisClient
*c
) {
7287 struct redisCommand
*cmd
;
7288 robj
*multistring
= createStringObject("MULTI",5);
7290 cmd
= lookupCommand("multi");
7291 if (server
.appendonly
)
7292 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7293 if (listLength(server
.slaves
))
7294 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7295 decrRefCount(multistring
);
7298 static void execCommand(redisClient
*c
) {
7303 if (!(c
->flags
& REDIS_MULTI
)) {
7304 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7308 /* Replicate a MULTI request now that we are sure the block is executed.
7309 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7310 * both the AOF and the replication link will have the same consistency
7311 * and atomicity guarantees. */
7312 execCommandReplicateMulti(c
);
7314 /* Exec all the queued commands */
7315 orig_argv
= c
->argv
;
7316 orig_argc
= c
->argc
;
7317 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7318 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7319 c
->argc
= c
->mstate
.commands
[j
].argc
;
7320 c
->argv
= c
->mstate
.commands
[j
].argv
;
7321 call(c
,c
->mstate
.commands
[j
].cmd
);
7323 c
->argv
= orig_argv
;
7324 c
->argc
= orig_argc
;
7325 freeClientMultiState(c
);
7326 initClientMultiState(c
);
7327 c
->flags
&= (~REDIS_MULTI
);
7328 /* Make sure the EXEC command is always replicated / AOF, since we
7329 * always send the MULTI command (we can't know beforehand if the
7330 * next operations will contain at least a modification to the DB). */
7334 /* =========================== Blocking Operations ========================= */
7336 /* Currently Redis blocking operations support is limited to list POP ops,
7337 * so the current implementation is not fully generic, but it is also not
7338 * completely specific so it will not require a rewrite to support new
7339 * kind of blocking operations in the future.
7341 * Still it's important to note that list blocking operations can be already
7342 * used as a notification mechanism in order to implement other blocking
7343 * operations at application level, so there must be a very strong evidence
7344 * of usefulness and generality before new blocking operations are implemented.
7346 * This is how the current blocking POP works, we use BLPOP as example:
7347 * - If the user calls BLPOP and the key exists and contains a non empty list
7348 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7349 * if there is not to block.
7350 * - If instead BLPOP is called and the key does not exists or the list is
7351 * empty we need to block. In order to do so we remove the notification for
7352 * new data to read in the client socket (so that we'll not serve new
7353 * requests if the blocking request is not served). Also we put the client
7354 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7355 * blocking for this keys.
7356 * - If a PUSH operation against a key with blocked clients waiting is
7357 * performed, we serve the first in the list: basically instead to push
7358 * the new element inside the list we return it to the (first / oldest)
7359 * blocking client, unblock the client, and remove it form the list.
7361 * The above comment and the source code should be enough in order to understand
7362 * the implementation and modify / fix it later.
7365 /* Set a client in blocking mode for the specified key, with the specified
7367 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7372 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7373 c
->blockingkeysnum
= numkeys
;
7374 c
->blockingto
= timeout
;
7375 for (j
= 0; j
< numkeys
; j
++) {
7376 /* Add the key in the client structure, to map clients -> keys */
7377 c
->blockingkeys
[j
] = keys
[j
];
7378 incrRefCount(keys
[j
]);
7380 /* And in the other "side", to map keys -> clients */
7381 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7385 /* For every key we take a list of clients blocked for it */
7387 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7388 incrRefCount(keys
[j
]);
7389 assert(retval
== DICT_OK
);
7391 l
= dictGetEntryVal(de
);
7393 listAddNodeTail(l
,c
);
7395 /* Mark the client as a blocked client */
7396 c
->flags
|= REDIS_BLOCKED
;
7397 server
.blpop_blocked_clients
++;
7400 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7401 static void unblockClientWaitingData(redisClient
*c
) {
7406 assert(c
->blockingkeys
!= NULL
);
7407 /* The client may wait for multiple keys, so unblock it for every key. */
7408 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7409 /* Remove this client from the list of clients waiting for this key. */
7410 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7412 l
= dictGetEntryVal(de
);
7413 listDelNode(l
,listSearchKey(l
,c
));
7414 /* If the list is empty we need to remove it to avoid wasting memory */
7415 if (listLength(l
) == 0)
7416 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7417 decrRefCount(c
->blockingkeys
[j
]);
7419 /* Cleanup the client structure */
7420 zfree(c
->blockingkeys
);
7421 c
->blockingkeys
= NULL
;
7422 c
->flags
&= (~REDIS_BLOCKED
);
7423 server
.blpop_blocked_clients
--;
7424 /* We want to process data if there is some command waiting
7425 * in the input buffer. Note that this is safe even if
7426 * unblockClientWaitingData() gets called from freeClient() because
7427 * freeClient() will be smart enough to call this function
7428 * *after* c->querybuf was set to NULL. */
7429 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7432 /* This should be called from any function PUSHing into lists.
7433 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7434 * 'ele' is the element pushed.
7436 * If the function returns 0 there was no client waiting for a list push
7439 * If the function returns 1 there was a client waiting for a list push
7440 * against this key, the element was passed to this client thus it's not
7441 * needed to actually add it to the list and the caller should return asap. */
7442 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7443 struct dictEntry
*de
;
7444 redisClient
*receiver
;
7448 de
= dictFind(c
->db
->blockingkeys
,key
);
7449 if (de
== NULL
) return 0;
7450 l
= dictGetEntryVal(de
);
7453 receiver
= ln
->value
;
7455 addReplySds(receiver
,sdsnew("*2\r\n"));
7456 addReplyBulk(receiver
,key
);
7457 addReplyBulk(receiver
,ele
);
7458 unblockClientWaitingData(receiver
);
7462 /* Blocking RPOP/LPOP */
7463 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7468 for (j
= 1; j
< c
->argc
-1; j
++) {
7469 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7471 if (o
->type
!= REDIS_LIST
) {
7472 addReply(c
,shared
.wrongtypeerr
);
7475 list
*list
= o
->ptr
;
7476 if (listLength(list
) != 0) {
7477 /* If the list contains elements fall back to the usual
7478 * non-blocking POP operation */
7479 robj
*argv
[2], **orig_argv
;
7482 /* We need to alter the command arguments before to call
7483 * popGenericCommand() as the command takes a single key. */
7484 orig_argv
= c
->argv
;
7485 orig_argc
= c
->argc
;
7486 argv
[1] = c
->argv
[j
];
7490 /* Also the return value is different, we need to output
7491 * the multi bulk reply header and the key name. The
7492 * "real" command will add the last element (the value)
7493 * for us. If this souds like an hack to you it's just
7494 * because it is... */
7495 addReplySds(c
,sdsnew("*2\r\n"));
7496 addReplyBulk(c
,argv
[1]);
7497 popGenericCommand(c
,where
);
7499 /* Fix the client structure with the original stuff */
7500 c
->argv
= orig_argv
;
7501 c
->argc
= orig_argc
;
7507 /* If the list is empty or the key does not exists we must block */
7508 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7509 if (timeout
> 0) timeout
+= time(NULL
);
7510 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7513 static void blpopCommand(redisClient
*c
) {
7514 blockingPopGenericCommand(c
,REDIS_HEAD
);
7517 static void brpopCommand(redisClient
*c
) {
7518 blockingPopGenericCommand(c
,REDIS_TAIL
);
7521 /* =============================== Replication ============================= */
7523 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7524 ssize_t nwritten
, ret
= size
;
7525 time_t start
= time(NULL
);
7529 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7530 nwritten
= write(fd
,ptr
,size
);
7531 if (nwritten
== -1) return -1;
7535 if ((time(NULL
)-start
) > timeout
) {
7543 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7544 ssize_t nread
, totread
= 0;
7545 time_t start
= time(NULL
);
7549 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7550 nread
= read(fd
,ptr
,size
);
7551 if (nread
== -1) return -1;
7556 if ((time(NULL
)-start
) > timeout
) {
7564 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7571 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7574 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7585 static void syncCommand(redisClient
*c
) {
7586 /* ignore SYNC if aleady slave or in monitor mode */
7587 if (c
->flags
& REDIS_SLAVE
) return;
7589 /* SYNC can't be issued when the server has pending data to send to
7590 * the client about already issued commands. We need a fresh reply
7591 * buffer registering the differences between the BGSAVE and the current
7592 * dataset, so that we can copy to other slaves if needed. */
7593 if (listLength(c
->reply
) != 0) {
7594 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7598 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7599 /* Here we need to check if there is a background saving operation
7600 * in progress, or if it is required to start one */
7601 if (server
.bgsavechildpid
!= -1) {
7602 /* Ok a background save is in progress. Let's check if it is a good
7603 * one for replication, i.e. if there is another slave that is
7604 * registering differences since the server forked to save */
7609 listRewind(server
.slaves
,&li
);
7610 while((ln
= listNext(&li
))) {
7612 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7615 /* Perfect, the server is already registering differences for
7616 * another slave. Set the right state, and copy the buffer. */
7617 listRelease(c
->reply
);
7618 c
->reply
= listDup(slave
->reply
);
7619 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7620 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7622 /* No way, we need to wait for the next BGSAVE in order to
7623 * register differences */
7624 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7625 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7628 /* Ok we don't have a BGSAVE in progress, let's start one */
7629 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7630 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7631 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7632 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7635 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7638 c
->flags
|= REDIS_SLAVE
;
7640 listAddNodeTail(server
.slaves
,c
);
7644 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7645 redisClient
*slave
= privdata
;
7647 REDIS_NOTUSED(mask
);
7648 char buf
[REDIS_IOBUF_LEN
];
7649 ssize_t nwritten
, buflen
;
7651 if (slave
->repldboff
== 0) {
7652 /* Write the bulk write count before to transfer the DB. In theory here
7653 * we don't know how much room there is in the output buffer of the
7654 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7655 * operations) will never be smaller than the few bytes we need. */
7658 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7660 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7668 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7669 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7671 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7672 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7676 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7677 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7682 slave
->repldboff
+= nwritten
;
7683 if (slave
->repldboff
== slave
->repldbsize
) {
7684 close(slave
->repldbfd
);
7685 slave
->repldbfd
= -1;
7686 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7687 slave
->replstate
= REDIS_REPL_ONLINE
;
7688 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7689 sendReplyToClient
, slave
) == AE_ERR
) {
7693 addReplySds(slave
,sdsempty());
7694 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7698 /* This function is called at the end of every backgrond saving.
7699 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7700 * otherwise REDIS_ERR is passed to the function.
7702 * The goal of this function is to handle slaves waiting for a successful
7703 * background saving in order to perform non-blocking synchronization. */
7704 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7706 int startbgsave
= 0;
7709 listRewind(server
.slaves
,&li
);
7710 while((ln
= listNext(&li
))) {
7711 redisClient
*slave
= ln
->value
;
7713 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7715 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7716 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7717 struct redis_stat buf
;
7719 if (bgsaveerr
!= REDIS_OK
) {
7721 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7724 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7725 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7727 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7730 slave
->repldboff
= 0;
7731 slave
->repldbsize
= buf
.st_size
;
7732 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7733 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7734 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7741 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7744 listRewind(server
.slaves
,&li
);
7745 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7746 while((ln
= listNext(&li
))) {
7747 redisClient
*slave
= ln
->value
;
7749 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7756 static int syncWithMaster(void) {
7757 char buf
[1024], tmpfile
[256], authcmd
[1024];
7759 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7760 int dfd
, maxtries
= 5;
7763 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7768 /* AUTH with the master if required. */
7769 if(server
.masterauth
) {
7770 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7771 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7773 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7777 /* Read the AUTH result. */
7778 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7780 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7784 if (buf
[0] != '+') {
7786 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7791 /* Issue the SYNC command */
7792 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7794 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7798 /* Read the bulk write count */
7799 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7801 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7805 if (buf
[0] != '$') {
7807 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7810 dumpsize
= strtol(buf
+1,NULL
,10);
7811 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7812 /* Read the bulk write data on a temp file */
7814 snprintf(tmpfile
,256,
7815 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7816 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7817 if (dfd
!= -1) break;
7822 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7826 int nread
, nwritten
;
7828 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7830 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7836 nwritten
= write(dfd
,buf
,nread
);
7837 if (nwritten
== -1) {
7838 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7846 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7847 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7853 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7854 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7858 server
.master
= createClient(fd
);
7859 server
.master
->flags
|= REDIS_MASTER
;
7860 server
.master
->authenticated
= 1;
7861 server
.replstate
= REDIS_REPL_CONNECTED
;
7865 static void slaveofCommand(redisClient
*c
) {
7866 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7867 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7868 if (server
.masterhost
) {
7869 sdsfree(server
.masterhost
);
7870 server
.masterhost
= NULL
;
7871 if (server
.master
) freeClient(server
.master
);
7872 server
.replstate
= REDIS_REPL_NONE
;
7873 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7876 sdsfree(server
.masterhost
);
7877 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7878 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7879 if (server
.master
) freeClient(server
.master
);
7880 server
.replstate
= REDIS_REPL_CONNECT
;
7881 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7882 server
.masterhost
, server
.masterport
);
7884 addReply(c
,shared
.ok
);
7887 /* ============================ Maxmemory directive ======================== */
7889 /* Try to free one object form the pre-allocated objects free list.
7890 * This is useful under low mem conditions as by default we take 1 million
7891 * free objects allocated. On success REDIS_OK is returned, otherwise
7893 static int tryFreeOneObjectFromFreelist(void) {
7896 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7897 if (listLength(server
.objfreelist
)) {
7898 listNode
*head
= listFirst(server
.objfreelist
);
7899 o
= listNodeValue(head
);
7900 listDelNode(server
.objfreelist
,head
);
7901 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7905 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7910 /* This function gets called when 'maxmemory' is set on the config file to limit
7911 * the max memory used by the server, and we are out of memory.
7912 * This function will try to, in order:
7914 * - Free objects from the free list
7915 * - Try to remove keys with an EXPIRE set
7917 * It is not possible to free enough memory to reach used-memory < maxmemory
7918 * the server will start refusing commands that will enlarge even more the
7921 static void freeMemoryIfNeeded(void) {
7922 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7923 int j
, k
, freed
= 0;
7925 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7926 for (j
= 0; j
< server
.dbnum
; j
++) {
7928 robj
*minkey
= NULL
;
7929 struct dictEntry
*de
;
7931 if (dictSize(server
.db
[j
].expires
)) {
7933 /* From a sample of three keys drop the one nearest to
7934 * the natural expire */
7935 for (k
= 0; k
< 3; k
++) {
7938 de
= dictGetRandomKey(server
.db
[j
].expires
);
7939 t
= (time_t) dictGetEntryVal(de
);
7940 if (minttl
== -1 || t
< minttl
) {
7941 minkey
= dictGetEntryKey(de
);
7945 deleteKey(server
.db
+j
,minkey
);
7948 if (!freed
) return; /* nothing to free... */
7952 /* ============================== Append Only file ========================== */
7954 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7955 sds buf
= sdsempty();
7961 /* The DB this command was targetting is not the same as the last command
7962 * we appendend. To issue a SELECT command is needed. */
7963 if (dictid
!= server
.appendseldb
) {
7966 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7967 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7968 (unsigned long)strlen(seldb
),seldb
);
7969 server
.appendseldb
= dictid
;
7972 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7973 * EXPIREs into EXPIREATs calls */
7974 if (cmd
->proc
== expireCommand
) {
7977 tmpargv
[0] = createStringObject("EXPIREAT",8);
7978 tmpargv
[1] = argv
[1];
7979 incrRefCount(argv
[1]);
7980 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7981 tmpargv
[2] = createObject(REDIS_STRING
,
7982 sdscatprintf(sdsempty(),"%ld",when
));
7986 /* Append the actual command */
7987 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7988 for (j
= 0; j
< argc
; j
++) {
7991 o
= getDecodedObject(o
);
7992 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7993 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7994 buf
= sdscatlen(buf
,"\r\n",2);
7998 /* Free the objects from the modified argv for EXPIREAT */
7999 if (cmd
->proc
== expireCommand
) {
8000 for (j
= 0; j
< 3; j
++)
8001 decrRefCount(argv
[j
]);
8004 /* We want to perform a single write. This should be guaranteed atomic
8005 * at least if the filesystem we are writing is a real physical one.
8006 * While this will save us against the server being killed I don't think
8007 * there is much to do about the whole server stopping for power problems
8009 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
8010 if (nwritten
!= (signed)sdslen(buf
)) {
8011 /* Ooops, we are in troubles. The best thing to do for now is
8012 * to simply exit instead to give the illusion that everything is
8013 * working as expected. */
8014 if (nwritten
== -1) {
8015 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8017 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8021 /* If a background append only file rewriting is in progress we want to
8022 * accumulate the differences between the child DB and the current one
8023 * in a buffer, so that when the child process will do its work we
8024 * can append the differences to the new append only file. */
8025 if (server
.bgrewritechildpid
!= -1)
8026 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8030 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8031 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8032 now
-server
.lastfsync
> 1))
8034 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8035 server
.lastfsync
= now
;
8039 /* In Redis commands are always executed in the context of a client, so in
8040 * order to load the append only file we need to create a fake client. */
8041 static struct redisClient
*createFakeClient(void) {
8042 struct redisClient
*c
= zmalloc(sizeof(*c
));
8046 c
->querybuf
= sdsempty();
8050 /* We set the fake client as a slave waiting for the synchronization
8051 * so that Redis will not try to send replies to this client. */
8052 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8053 c
->reply
= listCreate();
8054 listSetFreeMethod(c
->reply
,decrRefCount
);
8055 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8059 static void freeFakeClient(struct redisClient
*c
) {
8060 sdsfree(c
->querybuf
);
8061 listRelease(c
->reply
);
8065 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8066 * error (the append only file is zero-length) REDIS_ERR is returned. On
8067 * fatal error an error message is logged and the program exists. */
8068 int loadAppendOnlyFile(char *filename
) {
8069 struct redisClient
*fakeClient
;
8070 FILE *fp
= fopen(filename
,"r");
8071 struct redis_stat sb
;
8072 unsigned long long loadedkeys
= 0;
8074 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8078 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8082 fakeClient
= createFakeClient();
8089 struct redisCommand
*cmd
;
8091 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8097 if (buf
[0] != '*') goto fmterr
;
8099 argv
= zmalloc(sizeof(robj
*)*argc
);
8100 for (j
= 0; j
< argc
; j
++) {
8101 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8102 if (buf
[0] != '$') goto fmterr
;
8103 len
= strtol(buf
+1,NULL
,10);
8104 argsds
= sdsnewlen(NULL
,len
);
8105 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8106 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8107 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8110 /* Command lookup */
8111 cmd
= lookupCommand(argv
[0]->ptr
);
8113 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8116 /* Try object encoding */
8117 if (cmd
->flags
& REDIS_CMD_BULK
)
8118 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8119 /* Run the command in the context of a fake client */
8120 fakeClient
->argc
= argc
;
8121 fakeClient
->argv
= argv
;
8122 cmd
->proc(fakeClient
);
8123 /* Discard the reply objects list from the fake client */
8124 while(listLength(fakeClient
->reply
))
8125 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8126 /* Clean up, ready for the next command */
8127 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8129 /* Handle swapping while loading big datasets when VM is on */
8131 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8132 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8133 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8138 freeFakeClient(fakeClient
);
8143 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8145 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8149 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8153 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8154 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8158 /* Avoid the incr/decr ref count business if possible to help
8159 * copy-on-write (we are often in a child process when this function
8161 * Also makes sure that key objects don't get incrRefCount-ed when VM
8163 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8164 obj
= getDecodedObject(obj
);
8167 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8168 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8169 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8171 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8172 if (decrrc
) decrRefCount(obj
);
8175 if (decrrc
) decrRefCount(obj
);
8179 /* Write binary-safe string into a file in the bulkformat
8180 * $<count>\r\n<payload>\r\n */
8181 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8184 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8185 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8186 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8187 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8191 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8192 static int fwriteBulkDouble(FILE *fp
, double d
) {
8193 char buf
[128], dbuf
[128];
8195 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8196 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8197 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8198 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8202 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8203 static int fwriteBulkLong(FILE *fp
, long l
) {
8204 char buf
[128], lbuf
[128];
8206 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8207 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8208 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8209 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8213 /* Write a sequence of commands able to fully rebuild the dataset into
8214 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8215 static int rewriteAppendOnlyFile(char *filename
) {
8216 dictIterator
*di
= NULL
;
8221 time_t now
= time(NULL
);
8223 /* Note that we have to use a different temp name here compared to the
8224 * one used by rewriteAppendOnlyFileBackground() function. */
8225 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8226 fp
= fopen(tmpfile
,"w");
8228 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8231 for (j
= 0; j
< server
.dbnum
; j
++) {
8232 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8233 redisDb
*db
= server
.db
+j
;
8235 if (dictSize(d
) == 0) continue;
8236 di
= dictGetIterator(d
);
8242 /* SELECT the new DB */
8243 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8244 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8246 /* Iterate this DB writing every entry */
8247 while((de
= dictNext(di
)) != NULL
) {
8252 key
= dictGetEntryKey(de
);
8253 /* If the value for this key is swapped, load a preview in memory.
8254 * We use a "swapped" flag to remember if we need to free the
8255 * value object instead to just increment the ref count anyway
8256 * in order to avoid copy-on-write of pages if we are forked() */
8257 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8258 key
->storage
== REDIS_VM_SWAPPING
) {
8259 o
= dictGetEntryVal(de
);
8262 o
= vmPreviewObject(key
);
8265 expiretime
= getExpire(db
,key
);
8267 /* Save the key and associated value */
8268 if (o
->type
== REDIS_STRING
) {
8269 /* Emit a SET command */
8270 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8271 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8273 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8274 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8275 } else if (o
->type
== REDIS_LIST
) {
8276 /* Emit the RPUSHes needed to rebuild the list */
8277 list
*list
= o
->ptr
;
8281 listRewind(list
,&li
);
8282 while((ln
= listNext(&li
))) {
8283 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8284 robj
*eleobj
= listNodeValue(ln
);
8286 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8287 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8288 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8290 } else if (o
->type
== REDIS_SET
) {
8291 /* Emit the SADDs needed to rebuild the set */
8293 dictIterator
*di
= dictGetIterator(set
);
8296 while((de
= dictNext(di
)) != NULL
) {
8297 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8298 robj
*eleobj
= dictGetEntryKey(de
);
8300 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8301 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8302 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8304 dictReleaseIterator(di
);
8305 } else if (o
->type
== REDIS_ZSET
) {
8306 /* Emit the ZADDs needed to rebuild the sorted set */
8308 dictIterator
*di
= dictGetIterator(zs
->dict
);
8311 while((de
= dictNext(di
)) != NULL
) {
8312 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8313 robj
*eleobj
= dictGetEntryKey(de
);
8314 double *score
= dictGetEntryVal(de
);
8316 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8317 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8318 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8319 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8321 dictReleaseIterator(di
);
8322 } else if (o
->type
== REDIS_HASH
) {
8323 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8325 /* Emit the HSETs needed to rebuild the hash */
8326 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8327 unsigned char *p
= zipmapRewind(o
->ptr
);
8328 unsigned char *field
, *val
;
8329 unsigned int flen
, vlen
;
8331 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8332 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8333 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8334 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8336 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8340 dictIterator
*di
= dictGetIterator(o
->ptr
);
8343 while((de
= dictNext(di
)) != NULL
) {
8344 robj
*field
= dictGetEntryKey(de
);
8345 robj
*val
= dictGetEntryVal(de
);
8347 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8348 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8349 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8350 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8352 dictReleaseIterator(di
);
8355 redisPanic("Unknown object type");
8357 /* Save the expire time */
8358 if (expiretime
!= -1) {
8359 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8360 /* If this key is already expired skip it */
8361 if (expiretime
< now
) continue;
8362 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8363 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8364 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8366 if (swapped
) decrRefCount(o
);
8368 dictReleaseIterator(di
);
8371 /* Make sure data will not remain on the OS's output buffers */
8376 /* Use RENAME to make sure the DB file is changed atomically only
8377 * if the generate DB file is ok. */
8378 if (rename(tmpfile
,filename
) == -1) {
8379 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8383 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8389 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8390 if (di
) dictReleaseIterator(di
);
8394 /* This is how rewriting of the append only file in background works:
8396 * 1) The user calls BGREWRITEAOF
8397 * 2) Redis calls this function, that forks():
8398 * 2a) the child rewrite the append only file in a temp file.
8399 * 2b) the parent accumulates differences in server.bgrewritebuf.
8400 * 3) When the child finished '2a' exists.
8401 * 4) The parent will trap the exit code, if it's OK, will append the
8402 * data accumulated into server.bgrewritebuf into the temp file, and
8403 * finally will rename(2) the temp file in the actual file name.
8404 * The the new file is reopened as the new append only file. Profit!
8406 static int rewriteAppendOnlyFileBackground(void) {
8409 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8410 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8411 if ((childpid
= fork()) == 0) {
8415 if (server
.vm_enabled
) vmReopenSwapFile();
8417 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8418 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8425 if (childpid
== -1) {
8426 redisLog(REDIS_WARNING
,
8427 "Can't rewrite append only file in background: fork: %s",
8431 redisLog(REDIS_NOTICE
,
8432 "Background append only file rewriting started by pid %d",childpid
);
8433 server
.bgrewritechildpid
= childpid
;
8434 updateDictResizePolicy();
8435 /* We set appendseldb to -1 in order to force the next call to the
8436 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8437 * accumulated by the parent into server.bgrewritebuf will start
8438 * with a SELECT statement and it will be safe to merge. */
8439 server
.appendseldb
= -1;
8442 return REDIS_OK
; /* unreached */
8445 static void bgrewriteaofCommand(redisClient
*c
) {
8446 if (server
.bgrewritechildpid
!= -1) {
8447 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8450 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8451 char *status
= "+Background append only file rewriting started\r\n";
8452 addReplySds(c
,sdsnew(status
));
8454 addReply(c
,shared
.err
);
8458 static void aofRemoveTempFile(pid_t childpid
) {
8461 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8465 /* Virtual Memory is composed mainly of two subsystems:
8466 * - Blocking Virutal Memory
8467 * - Threaded Virtual Memory I/O
8468 * The two parts are not fully decoupled, but functions are split among two
8469 * different sections of the source code (delimited by comments) in order to
8470 * make more clear what functionality is about the blocking VM and what about
8471 * the threaded (not blocking) VM.
8475 * Redis VM is a blocking VM (one that blocks reading swapped values from
8476 * disk into memory when a value swapped out is needed in memory) that is made
8477 * unblocking by trying to examine the command argument vector in order to
8478 * load in background values that will likely be needed in order to exec
8479 * the command. The command is executed only once all the relevant keys
8480 * are loaded into memory.
8482 * This basically is almost as simple of a blocking VM, but almost as parallel
8483 * as a fully non-blocking VM.
8486 /* =================== Virtual Memory - Blocking Side ====================== */
8488 /* substitute the first occurrence of '%p' with the process pid in the
8489 * swap file name. */
8490 static void expandVmSwapFilename(void) {
8491 char *p
= strstr(server
.vm_swap_file
,"%p");
8497 new = sdscat(new,server
.vm_swap_file
);
8498 new = sdscatprintf(new,"%ld",(long) getpid());
8499 new = sdscat(new,p
+2);
8500 zfree(server
.vm_swap_file
);
8501 server
.vm_swap_file
= new;
8504 static void vmInit(void) {
8509 if (server
.vm_max_threads
!= 0)
8510 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8512 expandVmSwapFilename();
8513 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8514 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8515 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8517 if (server
.vm_fp
== NULL
) {
8518 redisLog(REDIS_WARNING
,
8519 "Impossible to open the swap file: %s. Exiting.",
8523 server
.vm_fd
= fileno(server
.vm_fp
);
8524 server
.vm_next_page
= 0;
8525 server
.vm_near_pages
= 0;
8526 server
.vm_stats_used_pages
= 0;
8527 server
.vm_stats_swapped_objects
= 0;
8528 server
.vm_stats_swapouts
= 0;
8529 server
.vm_stats_swapins
= 0;
8530 totsize
= server
.vm_pages
*server
.vm_page_size
;
8531 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8532 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8533 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8537 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8539 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8540 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8541 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8542 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8544 /* Initialize threaded I/O (used by Virtual Memory) */
8545 server
.io_newjobs
= listCreate();
8546 server
.io_processing
= listCreate();
8547 server
.io_processed
= listCreate();
8548 server
.io_ready_clients
= listCreate();
8549 pthread_mutex_init(&server
.io_mutex
,NULL
);
8550 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8551 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8552 server
.io_active_threads
= 0;
8553 if (pipe(pipefds
) == -1) {
8554 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8558 server
.io_ready_pipe_read
= pipefds
[0];
8559 server
.io_ready_pipe_write
= pipefds
[1];
8560 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8561 /* LZF requires a lot of stack */
8562 pthread_attr_init(&server
.io_threads_attr
);
8563 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8564 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8565 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8566 /* Listen for events in the threaded I/O pipe */
8567 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8568 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8569 oom("creating file event");
8572 /* Mark the page as used */
8573 static void vmMarkPageUsed(off_t page
) {
8574 off_t byte
= page
/8;
8576 redisAssert(vmFreePage(page
) == 1);
8577 server
.vm_bitmap
[byte
] |= 1<<bit
;
8580 /* Mark N contiguous pages as used, with 'page' being the first. */
8581 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8584 for (j
= 0; j
< count
; j
++)
8585 vmMarkPageUsed(page
+j
);
8586 server
.vm_stats_used_pages
+= count
;
8587 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8588 (long long)count
, (long long)page
);
8591 /* Mark the page as free */
8592 static void vmMarkPageFree(off_t page
) {
8593 off_t byte
= page
/8;
8595 redisAssert(vmFreePage(page
) == 0);
8596 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8599 /* Mark N contiguous pages as free, with 'page' being the first. */
8600 static void vmMarkPagesFree(off_t page
, off_t count
) {
8603 for (j
= 0; j
< count
; j
++)
8604 vmMarkPageFree(page
+j
);
8605 server
.vm_stats_used_pages
-= count
;
8606 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8607 (long long)count
, (long long)page
);
8610 /* Test if the page is free */
8611 static int vmFreePage(off_t page
) {
8612 off_t byte
= page
/8;
8614 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8617 /* Find N contiguous free pages storing the first page of the cluster in *first.
8618 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8619 * REDIS_ERR is returned.
8621 * This function uses a simple algorithm: we try to allocate
8622 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8623 * again from the start of the swap file searching for free spaces.
8625 * If it looks pretty clear that there are no free pages near our offset
8626 * we try to find less populated places doing a forward jump of
8627 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8628 * without hurry, and then we jump again and so forth...
8630 * This function can be improved using a free list to avoid to guess
8631 * too much, since we could collect data about freed pages.
8633 * note: I implemented this function just after watching an episode of
8634 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8636 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8637 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8639 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8640 server
.vm_near_pages
= 0;
8641 server
.vm_next_page
= 0;
8643 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8644 base
= server
.vm_next_page
;
8646 while(offset
< server
.vm_pages
) {
8647 off_t
this = base
+offset
;
8649 /* If we overflow, restart from page zero */
8650 if (this >= server
.vm_pages
) {
8651 this -= server
.vm_pages
;
8653 /* Just overflowed, what we found on tail is no longer
8654 * interesting, as it's no longer contiguous. */
8658 if (vmFreePage(this)) {
8659 /* This is a free page */
8661 /* Already got N free pages? Return to the caller, with success */
8663 *first
= this-(n
-1);
8664 server
.vm_next_page
= this+1;
8665 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8669 /* The current one is not a free page */
8673 /* Fast-forward if the current page is not free and we already
8674 * searched enough near this place. */
8676 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8677 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8679 /* Note that even if we rewind after the jump, we are don't need
8680 * to make sure numfree is set to zero as we only jump *if* it
8681 * is set to zero. */
8683 /* Otherwise just check the next page */
8690 /* Write the specified object at the specified page of the swap file */
8691 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8692 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8693 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8694 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8695 redisLog(REDIS_WARNING
,
8696 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8700 rdbSaveObject(server
.vm_fp
,o
);
8701 fflush(server
.vm_fp
);
8702 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8706 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8707 * needed to later retrieve the object into the key object.
8708 * If we can't find enough contiguous empty pages to swap the object on disk
8709 * REDIS_ERR is returned. */
8710 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8711 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8714 assert(key
->storage
== REDIS_VM_MEMORY
);
8715 assert(key
->refcount
== 1);
8716 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8717 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8718 key
->vm
.page
= page
;
8719 key
->vm
.usedpages
= pages
;
8720 key
->storage
= REDIS_VM_SWAPPED
;
8721 key
->vtype
= val
->type
;
8722 decrRefCount(val
); /* Deallocate the object from memory. */
8723 vmMarkPagesUsed(page
,pages
);
8724 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8725 (unsigned char*) key
->ptr
,
8726 (unsigned long long) page
, (unsigned long long) pages
);
8727 server
.vm_stats_swapped_objects
++;
8728 server
.vm_stats_swapouts
++;
8732 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8735 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8736 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8737 redisLog(REDIS_WARNING
,
8738 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8742 o
= rdbLoadObject(type
,server
.vm_fp
);
8744 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8747 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8751 /* Load the value object relative to the 'key' object from swap to memory.
8752 * The newly allocated object is returned.
8754 * If preview is true the unserialized object is returned to the caller but
8755 * no changes are made to the key object, nor the pages are marked as freed */
8756 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8759 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8760 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8762 key
->storage
= REDIS_VM_MEMORY
;
8763 key
->vm
.atime
= server
.unixtime
;
8764 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8765 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8766 (unsigned char*) key
->ptr
);
8767 server
.vm_stats_swapped_objects
--;
8769 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8770 (unsigned char*) key
->ptr
);
8772 server
.vm_stats_swapins
++;
8776 /* Plain object loading, from swap to memory */
8777 static robj
*vmLoadObject(robj
*key
) {
8778 /* If we are loading the object in background, stop it, we
8779 * need to load this object synchronously ASAP. */
8780 if (key
->storage
== REDIS_VM_LOADING
)
8781 vmCancelThreadedIOJob(key
);
8782 return vmGenericLoadObject(key
,0);
8785 /* Just load the value on disk, without to modify the key.
8786 * This is useful when we want to perform some operation on the value
8787 * without to really bring it from swap to memory, like while saving the
8788 * dataset or rewriting the append only log. */
8789 static robj
*vmPreviewObject(robj
*key
) {
8790 return vmGenericLoadObject(key
,1);
8793 /* How a good candidate is this object for swapping?
8794 * The better candidate it is, the greater the returned value.
8796 * Currently we try to perform a fast estimation of the object size in
8797 * memory, and combine it with aging informations.
8799 * Basically swappability = idle-time * log(estimated size)
8801 * Bigger objects are preferred over smaller objects, but not
8802 * proportionally, this is why we use the logarithm. This algorithm is
8803 * just a first try and will probably be tuned later. */
8804 static double computeObjectSwappability(robj
*o
) {
8805 time_t age
= server
.unixtime
- o
->vm
.atime
;
8809 struct dictEntry
*de
;
8812 if (age
<= 0) return 0;
8815 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8818 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8823 listNode
*ln
= listFirst(l
);
8825 asize
= sizeof(list
);
8827 robj
*ele
= ln
->value
;
8830 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8831 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8833 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8838 z
= (o
->type
== REDIS_ZSET
);
8839 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8841 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8842 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8847 de
= dictGetRandomKey(d
);
8848 ele
= dictGetEntryKey(de
);
8849 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8850 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8852 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8853 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8857 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8858 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8859 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8860 unsigned int klen
, vlen
;
8861 unsigned char *key
, *val
;
8863 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8867 asize
= len
*(klen
+vlen
+3);
8868 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8870 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8875 de
= dictGetRandomKey(d
);
8876 ele
= dictGetEntryKey(de
);
8877 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8878 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8880 ele
= dictGetEntryVal(de
);
8881 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8882 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8884 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8889 return (double)age
*log(1+asize
);
8892 /* Try to swap an object that's a good candidate for swapping.
8893 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8894 * to swap any object at all.
8896 * If 'usethreaded' is true, Redis will try to swap the object in background
8897 * using I/O threads. */
8898 static int vmSwapOneObject(int usethreads
) {
8900 struct dictEntry
*best
= NULL
;
8901 double best_swappability
= 0;
8902 redisDb
*best_db
= NULL
;
8905 for (j
= 0; j
< server
.dbnum
; j
++) {
8906 redisDb
*db
= server
.db
+j
;
8907 /* Why maxtries is set to 100?
8908 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8909 * are swappable objects */
8912 if (dictSize(db
->dict
) == 0) continue;
8913 for (i
= 0; i
< 5; i
++) {
8915 double swappability
;
8917 if (maxtries
) maxtries
--;
8918 de
= dictGetRandomKey(db
->dict
);
8919 key
= dictGetEntryKey(de
);
8920 val
= dictGetEntryVal(de
);
8921 /* Only swap objects that are currently in memory.
8923 * Also don't swap shared objects if threaded VM is on, as we
8924 * try to ensure that the main thread does not touch the
8925 * object while the I/O thread is using it, but we can't
8926 * control other keys without adding additional mutex. */
8927 if (key
->storage
!= REDIS_VM_MEMORY
||
8928 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8929 if (maxtries
) i
--; /* don't count this try */
8932 swappability
= computeObjectSwappability(val
);
8933 if (!best
|| swappability
> best_swappability
) {
8935 best_swappability
= swappability
;
8940 if (best
== NULL
) return REDIS_ERR
;
8941 key
= dictGetEntryKey(best
);
8942 val
= dictGetEntryVal(best
);
8944 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8945 key
->ptr
, best_swappability
);
8947 /* Unshare the key if needed */
8948 if (key
->refcount
> 1) {
8949 robj
*newkey
= dupStringObject(key
);
8951 key
= dictGetEntryKey(best
) = newkey
;
8955 vmSwapObjectThreaded(key
,val
,best_db
);
8958 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8959 dictGetEntryVal(best
) = NULL
;
8967 static int vmSwapOneObjectBlocking() {
8968 return vmSwapOneObject(0);
8971 static int vmSwapOneObjectThreaded() {
8972 return vmSwapOneObject(1);
8975 /* Return true if it's safe to swap out objects in a given moment.
8976 * Basically we don't want to swap objects out while there is a BGSAVE
8977 * or a BGAEOREWRITE running in backgroud. */
8978 static int vmCanSwapOut(void) {
8979 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8982 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8983 * and was deleted. Otherwise 0 is returned. */
8984 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8988 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8989 foundkey
= dictGetEntryKey(de
);
8990 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8995 /* =================== Virtual Memory - Threaded I/O ======================= */
8997 static void freeIOJob(iojob
*j
) {
8998 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8999 j
->type
== REDIS_IOJOB_DO_SWAP
||
9000 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9001 decrRefCount(j
->val
);
9002 /* We don't decrRefCount the j->key field as we did't incremented
9003 * the count creating IO Jobs. This is because the key field here is
9004 * just used as an indentifier and if a key is removed the Job should
9005 * never be touched again. */
9009 /* Every time a thread finished a Job, it writes a byte into the write side
9010 * of an unix pipe in order to "awake" the main thread, and this function
9012 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9016 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9018 REDIS_NOTUSED(mask
);
9019 REDIS_NOTUSED(privdata
);
9021 /* For every byte we read in the read side of the pipe, there is one
9022 * I/O job completed to process. */
9023 while((retval
= read(fd
,buf
,1)) == 1) {
9027 struct dictEntry
*de
;
9029 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9031 /* Get the processed element (the oldest one) */
9033 assert(listLength(server
.io_processed
) != 0);
9034 if (toprocess
== -1) {
9035 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9036 if (toprocess
<= 0) toprocess
= 1;
9038 ln
= listFirst(server
.io_processed
);
9040 listDelNode(server
.io_processed
,ln
);
9042 /* If this job is marked as canceled, just ignore it */
9047 /* Post process it in the main thread, as there are things we
9048 * can do just here to avoid race conditions and/or invasive locks */
9049 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9050 de
= dictFind(j
->db
->dict
,j
->key
);
9052 key
= dictGetEntryKey(de
);
9053 if (j
->type
== REDIS_IOJOB_LOAD
) {
9056 /* Key loaded, bring it at home */
9057 key
->storage
= REDIS_VM_MEMORY
;
9058 key
->vm
.atime
= server
.unixtime
;
9059 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9060 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9061 (unsigned char*) key
->ptr
);
9062 server
.vm_stats_swapped_objects
--;
9063 server
.vm_stats_swapins
++;
9064 dictGetEntryVal(de
) = j
->val
;
9065 incrRefCount(j
->val
);
9068 /* Handle clients waiting for this key to be loaded. */
9069 handleClientsBlockedOnSwappedKey(db
,key
);
9070 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9071 /* Now we know the amount of pages required to swap this object.
9072 * Let's find some space for it, and queue this task again
9073 * rebranded as REDIS_IOJOB_DO_SWAP. */
9074 if (!vmCanSwapOut() ||
9075 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9077 /* Ooops... no space or we can't swap as there is
9078 * a fork()ed Redis trying to save stuff on disk. */
9080 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9082 /* Note that we need to mark this pages as used now,
9083 * if the job will be canceled, we'll mark them as freed
9085 vmMarkPagesUsed(j
->page
,j
->pages
);
9086 j
->type
= REDIS_IOJOB_DO_SWAP
;
9091 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9094 /* Key swapped. We can finally free some memory. */
9095 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9096 printf("key->storage: %d\n",key
->storage
);
9097 printf("key->name: %s\n",(char*)key
->ptr
);
9098 printf("key->refcount: %d\n",key
->refcount
);
9099 printf("val: %p\n",(void*)j
->val
);
9100 printf("val->type: %d\n",j
->val
->type
);
9101 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9103 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9104 val
= dictGetEntryVal(de
);
9105 key
->vm
.page
= j
->page
;
9106 key
->vm
.usedpages
= j
->pages
;
9107 key
->storage
= REDIS_VM_SWAPPED
;
9108 key
->vtype
= j
->val
->type
;
9109 decrRefCount(val
); /* Deallocate the object from memory. */
9110 dictGetEntryVal(de
) = NULL
;
9111 redisLog(REDIS_DEBUG
,
9112 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9113 (unsigned char*) key
->ptr
,
9114 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9115 server
.vm_stats_swapped_objects
++;
9116 server
.vm_stats_swapouts
++;
9118 /* Put a few more swap requests in queue if we are still
9120 if (trytoswap
&& vmCanSwapOut() &&
9121 zmalloc_used_memory() > server
.vm_max_memory
)
9126 more
= listLength(server
.io_newjobs
) <
9127 (unsigned) server
.vm_max_threads
;
9129 /* Don't waste CPU time if swappable objects are rare. */
9130 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9138 if (processed
== toprocess
) return;
9140 if (retval
< 0 && errno
!= EAGAIN
) {
9141 redisLog(REDIS_WARNING
,
9142 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9147 static void lockThreadedIO(void) {
9148 pthread_mutex_lock(&server
.io_mutex
);
9151 static void unlockThreadedIO(void) {
9152 pthread_mutex_unlock(&server
.io_mutex
);
9155 /* Remove the specified object from the threaded I/O queue if still not
9156 * processed, otherwise make sure to flag it as canceled. */
9157 static void vmCancelThreadedIOJob(robj
*o
) {
9159 server
.io_newjobs
, /* 0 */
9160 server
.io_processing
, /* 1 */
9161 server
.io_processed
/* 2 */
9165 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9168 /* Search for a matching key in one of the queues */
9169 for (i
= 0; i
< 3; i
++) {
9173 listRewind(lists
[i
],&li
);
9174 while ((ln
= listNext(&li
)) != NULL
) {
9175 iojob
*job
= ln
->value
;
9177 if (job
->canceled
) continue; /* Skip this, already canceled. */
9178 if (job
->key
== o
) {
9179 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9180 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9181 /* Mark the pages as free since the swap didn't happened
9182 * or happened but is now discarded. */
9183 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9184 vmMarkPagesFree(job
->page
,job
->pages
);
9185 /* Cancel the job. It depends on the list the job is
9188 case 0: /* io_newjobs */
9189 /* If the job was yet not processed the best thing to do
9190 * is to remove it from the queue at all */
9192 listDelNode(lists
[i
],ln
);
9194 case 1: /* io_processing */
9195 /* Oh Shi- the thread is messing with the Job:
9197 * Probably it's accessing the object if this is a
9198 * PREPARE_SWAP or DO_SWAP job.
9199 * If it's a LOAD job it may be reading from disk and
9200 * if we don't wait for the job to terminate before to
9201 * cancel it, maybe in a few microseconds data can be
9202 * corrupted in this pages. So the short story is:
9204 * Better to wait for the job to move into the
9205 * next queue (processed)... */
9207 /* We try again and again until the job is completed. */
9209 /* But let's wait some time for the I/O thread
9210 * to finish with this job. After all this condition
9211 * should be very rare. */
9214 case 2: /* io_processed */
9215 /* The job was already processed, that's easy...
9216 * just mark it as canceled so that we'll ignore it
9217 * when processing completed jobs. */
9221 /* Finally we have to adjust the storage type of the object
9222 * in order to "UNDO" the operaiton. */
9223 if (o
->storage
== REDIS_VM_LOADING
)
9224 o
->storage
= REDIS_VM_SWAPPED
;
9225 else if (o
->storage
== REDIS_VM_SWAPPING
)
9226 o
->storage
= REDIS_VM_MEMORY
;
9233 assert(1 != 1); /* We should never reach this */
9236 static void *IOThreadEntryPoint(void *arg
) {
9241 pthread_detach(pthread_self());
9243 /* Get a new job to process */
9245 if (listLength(server
.io_newjobs
) == 0) {
9246 /* No new jobs in queue, exit. */
9247 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9248 (long) pthread_self());
9249 server
.io_active_threads
--;
9253 ln
= listFirst(server
.io_newjobs
);
9255 listDelNode(server
.io_newjobs
,ln
);
9256 /* Add the job in the processing queue */
9257 j
->thread
= pthread_self();
9258 listAddNodeTail(server
.io_processing
,j
);
9259 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9261 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9262 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9264 /* Process the Job */
9265 if (j
->type
== REDIS_IOJOB_LOAD
) {
9266 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9267 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9268 FILE *fp
= fopen("/dev/null","w+");
9269 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9271 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9272 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9276 /* Done: insert the job into the processed queue */
9277 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9278 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9280 listDelNode(server
.io_processing
,ln
);
9281 listAddNodeTail(server
.io_processed
,j
);
9284 /* Signal the main thread there is new stuff to process */
9285 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9287 return NULL
; /* never reached */
9290 static void spawnIOThread(void) {
9292 sigset_t mask
, omask
;
9296 sigaddset(&mask
,SIGCHLD
);
9297 sigaddset(&mask
,SIGHUP
);
9298 sigaddset(&mask
,SIGPIPE
);
9299 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9300 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9301 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9305 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9306 server
.io_active_threads
++;
9309 /* We need to wait for the last thread to exit before we are able to
9310 * fork() in order to BGSAVE or BGREWRITEAOF. */
9311 static void waitEmptyIOJobsQueue(void) {
9313 int io_processed_len
;
9316 if (listLength(server
.io_newjobs
) == 0 &&
9317 listLength(server
.io_processing
) == 0 &&
9318 server
.io_active_threads
== 0)
9323 /* While waiting for empty jobs queue condition we post-process some
9324 * finshed job, as I/O threads may be hanging trying to write against
9325 * the io_ready_pipe_write FD but there are so much pending jobs that
9327 io_processed_len
= listLength(server
.io_processed
);
9329 if (io_processed_len
) {
9330 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9331 usleep(1000); /* 1 millisecond */
9333 usleep(10000); /* 10 milliseconds */
9338 static void vmReopenSwapFile(void) {
9339 /* Note: we don't close the old one as we are in the child process
9340 * and don't want to mess at all with the original file object. */
9341 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9342 if (server
.vm_fp
== NULL
) {
9343 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9344 server
.vm_swap_file
);
9347 server
.vm_fd
= fileno(server
.vm_fp
);
9350 /* This function must be called while with threaded IO locked */
9351 static void queueIOJob(iojob
*j
) {
9352 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9353 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9354 listAddNodeTail(server
.io_newjobs
,j
);
9355 if (server
.io_active_threads
< server
.vm_max_threads
)
9359 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9362 assert(key
->storage
== REDIS_VM_MEMORY
);
9363 assert(key
->refcount
== 1);
9365 j
= zmalloc(sizeof(*j
));
9366 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9372 j
->thread
= (pthread_t
) -1;
9373 key
->storage
= REDIS_VM_SWAPPING
;
9381 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9383 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9384 * If there is not already a job loading the key, it is craeted.
9385 * The key is added to the io_keys list in the client structure, and also
9386 * in the hash table mapping swapped keys to waiting clients, that is,
9387 * server.io_waited_keys. */
9388 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9389 struct dictEntry
*de
;
9393 /* If the key does not exist or is already in RAM we don't need to
9394 * block the client at all. */
9395 de
= dictFind(c
->db
->dict
,key
);
9396 if (de
== NULL
) return 0;
9397 o
= dictGetEntryKey(de
);
9398 if (o
->storage
== REDIS_VM_MEMORY
) {
9400 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9401 /* We were swapping the key, undo it! */
9402 vmCancelThreadedIOJob(o
);
9406 /* OK: the key is either swapped, or being loaded just now. */
9408 /* Add the key to the list of keys this client is waiting for.
9409 * This maps clients to keys they are waiting for. */
9410 listAddNodeTail(c
->io_keys
,key
);
9413 /* Add the client to the swapped keys => clients waiting map. */
9414 de
= dictFind(c
->db
->io_keys
,key
);
9418 /* For every key we take a list of clients blocked for it */
9420 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9422 assert(retval
== DICT_OK
);
9424 l
= dictGetEntryVal(de
);
9426 listAddNodeTail(l
,c
);
9428 /* Are we already loading the key from disk? If not create a job */
9429 if (o
->storage
== REDIS_VM_SWAPPED
) {
9432 o
->storage
= REDIS_VM_LOADING
;
9433 j
= zmalloc(sizeof(*j
));
9434 j
->type
= REDIS_IOJOB_LOAD
;
9437 j
->key
->vtype
= o
->vtype
;
9438 j
->page
= o
->vm
.page
;
9441 j
->thread
= (pthread_t
) -1;
9449 /* Preload keys needed for the ZUNION and ZINTER commands. */
9450 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9452 num
= atoi(c
->argv
[2]->ptr
);
9453 for (i
= 0; i
< num
; i
++) {
9454 waitForSwappedKey(c
,c
->argv
[3+i
]);
9458 /* Is this client attempting to run a command against swapped keys?
9459 * If so, block it ASAP, load the keys in background, then resume it.
9461 * The important idea about this function is that it can fail! If keys will
9462 * still be swapped when the client is resumed, this key lookups will
9463 * just block loading keys from disk. In practical terms this should only
9464 * happen with SORT BY command or if there is a bug in this function.
9466 * Return 1 if the client is marked as blocked, 0 if the client can
9467 * continue as the keys it is going to access appear to be in memory. */
9468 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9471 if (cmd
->vm_preload_proc
!= NULL
) {
9472 cmd
->vm_preload_proc(c
);
9474 if (cmd
->vm_firstkey
== 0) return 0;
9475 last
= cmd
->vm_lastkey
;
9476 if (last
< 0) last
= c
->argc
+last
;
9477 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9478 waitForSwappedKey(c
,c
->argv
[j
]);
9481 /* If the client was blocked for at least one key, mark it as blocked. */
9482 if (listLength(c
->io_keys
)) {
9483 c
->flags
|= REDIS_IO_WAIT
;
9484 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9485 server
.vm_blocked_clients
++;
9492 /* Remove the 'key' from the list of blocked keys for a given client.
9494 * The function returns 1 when there are no longer blocking keys after
9495 * the current one was removed (and the client can be unblocked). */
9496 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9500 struct dictEntry
*de
;
9502 /* Remove the key from the list of keys this client is waiting for. */
9503 listRewind(c
->io_keys
,&li
);
9504 while ((ln
= listNext(&li
)) != NULL
) {
9505 if (compareStringObjects(ln
->value
,key
) == 0) {
9506 listDelNode(c
->io_keys
,ln
);
9512 /* Remove the client form the key => waiting clients map. */
9513 de
= dictFind(c
->db
->io_keys
,key
);
9515 l
= dictGetEntryVal(de
);
9516 ln
= listSearchKey(l
,c
);
9519 if (listLength(l
) == 0)
9520 dictDelete(c
->db
->io_keys
,key
);
9522 return listLength(c
->io_keys
) == 0;
9525 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9526 struct dictEntry
*de
;
9531 de
= dictFind(db
->io_keys
,key
);
9534 l
= dictGetEntryVal(de
);
9535 len
= listLength(l
);
9536 /* Note: we can't use something like while(listLength(l)) as the list
9537 * can be freed by the calling function when we remove the last element. */
9540 redisClient
*c
= ln
->value
;
9542 if (dontWaitForSwappedKey(c
,key
)) {
9543 /* Put the client in the list of clients ready to go as we
9544 * loaded all the keys about it. */
9545 listAddNodeTail(server
.io_ready_clients
,c
);
9550 /* =========================== Remote Configuration ========================= */
9552 static void configSetCommand(redisClient
*c
) {
9553 robj
*o
= getDecodedObject(c
->argv
[3]);
9554 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9555 zfree(server
.dbfilename
);
9556 server
.dbfilename
= zstrdup(o
->ptr
);
9557 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9558 zfree(server
.requirepass
);
9559 server
.requirepass
= zstrdup(o
->ptr
);
9560 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9561 zfree(server
.masterauth
);
9562 server
.masterauth
= zstrdup(o
->ptr
);
9563 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9564 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9566 addReplySds(c
,sdscatprintf(sdsempty(),
9567 "-ERR not supported CONFIG parameter %s\r\n",
9568 (char*)c
->argv
[2]->ptr
));
9573 addReply(c
,shared
.ok
);
9576 static void configGetCommand(redisClient
*c
) {
9577 robj
*o
= getDecodedObject(c
->argv
[2]);
9578 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9579 char *pattern
= o
->ptr
;
9583 decrRefCount(lenobj
);
9585 if (stringmatch(pattern
,"dbfilename",0)) {
9586 addReplyBulkCString(c
,"dbfilename");
9587 addReplyBulkCString(c
,server
.dbfilename
);
9590 if (stringmatch(pattern
,"requirepass",0)) {
9591 addReplyBulkCString(c
,"requirepass");
9592 addReplyBulkCString(c
,server
.requirepass
);
9595 if (stringmatch(pattern
,"masterauth",0)) {
9596 addReplyBulkCString(c
,"masterauth");
9597 addReplyBulkCString(c
,server
.masterauth
);
9600 if (stringmatch(pattern
,"maxmemory",0)) {
9603 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9604 addReplyBulkCString(c
,"maxmemory");
9605 addReplyBulkCString(c
,buf
);
9609 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9612 static void configCommand(redisClient
*c
) {
9613 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9614 if (c
->argc
!= 4) goto badarity
;
9615 configSetCommand(c
);
9616 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9617 if (c
->argc
!= 3) goto badarity
;
9618 configGetCommand(c
);
9619 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9620 if (c
->argc
!= 2) goto badarity
;
9621 server
.stat_numcommands
= 0;
9622 server
.stat_numconnections
= 0;
9623 server
.stat_expiredkeys
= 0;
9624 server
.stat_starttime
= time(NULL
);
9625 addReply(c
,shared
.ok
);
9627 addReplySds(c
,sdscatprintf(sdsempty(),
9628 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9633 addReplySds(c
,sdscatprintf(sdsempty(),
9634 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9635 (char*) c
->argv
[1]->ptr
));
9638 /* =========================== Pubsub implementation ======================== */
9640 static void freePubsubPattern(void *p
) {
9641 pubsubPattern
*pat
= p
;
9643 decrRefCount(pat
->pattern
);
9647 static int listMatchPubsubPattern(void *a
, void *b
) {
9648 pubsubPattern
*pa
= a
, *pb
= b
;
9650 return (pa
->client
== pb
->client
) &&
9651 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9654 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9655 * 0 if the client was already subscribed to that channel. */
9656 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9657 struct dictEntry
*de
;
9658 list
*clients
= NULL
;
9661 /* Add the channel to the client -> channels hash table */
9662 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9664 incrRefCount(channel
);
9665 /* Add the client to the channel -> list of clients hash table */
9666 de
= dictFind(server
.pubsub_channels
,channel
);
9668 clients
= listCreate();
9669 dictAdd(server
.pubsub_channels
,channel
,clients
);
9670 incrRefCount(channel
);
9672 clients
= dictGetEntryVal(de
);
9674 listAddNodeTail(clients
,c
);
9676 /* Notify the client */
9677 addReply(c
,shared
.mbulk3
);
9678 addReply(c
,shared
.subscribebulk
);
9679 addReplyBulk(c
,channel
);
9680 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9684 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9685 * 0 if the client was not subscribed to the specified channel. */
9686 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9687 struct dictEntry
*de
;
9692 /* Remove the channel from the client -> channels hash table */
9693 incrRefCount(channel
); /* channel may be just a pointer to the same object
9694 we have in the hash tables. Protect it... */
9695 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9697 /* Remove the client from the channel -> clients list hash table */
9698 de
= dictFind(server
.pubsub_channels
,channel
);
9700 clients
= dictGetEntryVal(de
);
9701 ln
= listSearchKey(clients
,c
);
9703 listDelNode(clients
,ln
);
9704 if (listLength(clients
) == 0) {
9705 /* Free the list and associated hash entry at all if this was
9706 * the latest client, so that it will be possible to abuse
9707 * Redis PUBSUB creating millions of channels. */
9708 dictDelete(server
.pubsub_channels
,channel
);
9711 /* Notify the client */
9713 addReply(c
,shared
.mbulk3
);
9714 addReply(c
,shared
.unsubscribebulk
);
9715 addReplyBulk(c
,channel
);
9716 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9717 listLength(c
->pubsub_patterns
));
9720 decrRefCount(channel
); /* it is finally safe to release it */
9724 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9725 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9728 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9731 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9732 incrRefCount(pattern
);
9733 pat
= zmalloc(sizeof(*pat
));
9734 pat
->pattern
= getDecodedObject(pattern
);
9736 listAddNodeTail(server
.pubsub_patterns
,pat
);
9738 /* Notify the client */
9739 addReply(c
,shared
.mbulk3
);
9740 addReply(c
,shared
.psubscribebulk
);
9741 addReplyBulk(c
,pattern
);
9742 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9746 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9747 * 0 if the client was not subscribed to the specified channel. */
9748 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9753 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9754 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9756 listDelNode(c
->pubsub_patterns
,ln
);
9758 pat
.pattern
= pattern
;
9759 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9760 listDelNode(server
.pubsub_patterns
,ln
);
9762 /* Notify the client */
9764 addReply(c
,shared
.mbulk3
);
9765 addReply(c
,shared
.punsubscribebulk
);
9766 addReplyBulk(c
,pattern
);
9767 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9768 listLength(c
->pubsub_patterns
));
9770 decrRefCount(pattern
);
9774 /* Unsubscribe from all the channels. Return the number of channels the
9775 * client was subscribed from. */
9776 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9777 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9781 while((de
= dictNext(di
)) != NULL
) {
9782 robj
*channel
= dictGetEntryKey(de
);
9784 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9786 dictReleaseIterator(di
);
9790 /* Unsubscribe from all the patterns. Return the number of patterns the
9791 * client was subscribed from. */
9792 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9797 listRewind(c
->pubsub_patterns
,&li
);
9798 while ((ln
= listNext(&li
)) != NULL
) {
9799 robj
*pattern
= ln
->value
;
9801 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9806 /* Publish a message */
9807 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9809 struct dictEntry
*de
;
9813 /* Send to clients listening for that channel */
9814 de
= dictFind(server
.pubsub_channels
,channel
);
9816 list
*list
= dictGetEntryVal(de
);
9820 listRewind(list
,&li
);
9821 while ((ln
= listNext(&li
)) != NULL
) {
9822 redisClient
*c
= ln
->value
;
9824 addReply(c
,shared
.mbulk3
);
9825 addReply(c
,shared
.messagebulk
);
9826 addReplyBulk(c
,channel
);
9827 addReplyBulk(c
,message
);
9831 /* Send to clients listening to matching channels */
9832 if (listLength(server
.pubsub_patterns
)) {
9833 listRewind(server
.pubsub_patterns
,&li
);
9834 channel
= getDecodedObject(channel
);
9835 while ((ln
= listNext(&li
)) != NULL
) {
9836 pubsubPattern
*pat
= ln
->value
;
9838 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9839 sdslen(pat
->pattern
->ptr
),
9840 (char*)channel
->ptr
,
9841 sdslen(channel
->ptr
),0)) {
9842 addReply(pat
->client
,shared
.mbulk4
);
9843 addReply(pat
->client
,shared
.pmessagebulk
);
9844 addReplyBulk(pat
->client
,pat
->pattern
);
9845 addReplyBulk(pat
->client
,channel
);
9846 addReplyBulk(pat
->client
,message
);
9850 decrRefCount(channel
);
9855 static void subscribeCommand(redisClient
*c
) {
9858 for (j
= 1; j
< c
->argc
; j
++)
9859 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9862 static void unsubscribeCommand(redisClient
*c
) {
9864 pubsubUnsubscribeAllChannels(c
,1);
9869 for (j
= 1; j
< c
->argc
; j
++)
9870 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9874 static void psubscribeCommand(redisClient
*c
) {
9877 for (j
= 1; j
< c
->argc
; j
++)
9878 pubsubSubscribePattern(c
,c
->argv
[j
]);
9881 static void punsubscribeCommand(redisClient
*c
) {
9883 pubsubUnsubscribeAllPatterns(c
,1);
9888 for (j
= 1; j
< c
->argc
; j
++)
9889 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9893 static void publishCommand(redisClient
*c
) {
9894 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9895 addReplyLong(c
,receivers
);
9898 /* ================================= Debugging ============================== */
9900 static void debugCommand(redisClient
*c
) {
9901 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9903 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9904 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9905 addReply(c
,shared
.err
);
9909 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9910 addReply(c
,shared
.err
);
9913 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9914 addReply(c
,shared
.ok
);
9915 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9917 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9918 addReply(c
,shared
.err
);
9921 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9922 addReply(c
,shared
.ok
);
9923 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9924 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9928 addReply(c
,shared
.nokeyerr
);
9931 key
= dictGetEntryKey(de
);
9932 val
= dictGetEntryVal(de
);
9933 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9934 key
->storage
== REDIS_VM_SWAPPING
)) {
9938 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9939 strenc
= strencoding
[val
->encoding
];
9941 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9944 addReplySds(c
,sdscatprintf(sdsempty(),
9945 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9946 "encoding:%s serializedlength:%lld\r\n",
9947 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9948 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9950 addReplySds(c
,sdscatprintf(sdsempty(),
9951 "+Key at:%p refcount:%d, value swapped at: page %llu "
9952 "using %llu pages\r\n",
9953 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9954 (unsigned long long) key
->vm
.usedpages
));
9956 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9957 lookupKeyRead(c
->db
,c
->argv
[2]);
9958 addReply(c
,shared
.ok
);
9959 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9960 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9963 if (!server
.vm_enabled
) {
9964 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9968 addReply(c
,shared
.nokeyerr
);
9971 key
= dictGetEntryKey(de
);
9972 val
= dictGetEntryVal(de
);
9973 /* If the key is shared we want to create a copy */
9974 if (key
->refcount
> 1) {
9975 robj
*newkey
= dupStringObject(key
);
9977 key
= dictGetEntryKey(de
) = newkey
;
9980 if (key
->storage
!= REDIS_VM_MEMORY
) {
9981 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9982 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9983 dictGetEntryVal(de
) = NULL
;
9984 addReply(c
,shared
.ok
);
9986 addReply(c
,shared
.err
);
9989 addReplySds(c
,sdsnew(
9990 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9994 static void _redisAssert(char *estr
, char *file
, int line
) {
9995 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9996 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9997 #ifdef HAVE_BACKTRACE
9998 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10003 static void _redisPanic(char *msg
, char *file
, int line
) {
10004 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10005 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10006 #ifdef HAVE_BACKTRACE
10007 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10008 *((char*)-1) = 'x';
10012 /* =================================== Main! ================================ */
10015 int linuxOvercommitMemoryValue(void) {
10016 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10019 if (!fp
) return -1;
10020 if (fgets(buf
,64,fp
) == NULL
) {
10029 void linuxOvercommitMemoryWarning(void) {
10030 if (linuxOvercommitMemoryValue() == 0) {
10031 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10034 #endif /* __linux__ */
10036 static void daemonize(void) {
10040 if (fork() != 0) exit(0); /* parent exits */
10041 setsid(); /* create a new session */
10043 /* Every output goes to /dev/null. If Redis is daemonized but
10044 * the 'logfile' is set to 'stdout' in the configuration file
10045 * it will not log at all. */
10046 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10047 dup2(fd
, STDIN_FILENO
);
10048 dup2(fd
, STDOUT_FILENO
);
10049 dup2(fd
, STDERR_FILENO
);
10050 if (fd
> STDERR_FILENO
) close(fd
);
10052 /* Try to write the pid file */
10053 fp
= fopen(server
.pidfile
,"w");
10055 fprintf(fp
,"%d\n",getpid());
10060 static void version() {
10061 printf("Redis server version %s\n", REDIS_VERSION
);
10065 static void usage() {
10066 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10067 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10071 int main(int argc
, char **argv
) {
10074 initServerConfig();
10076 if (strcmp(argv
[1], "-v") == 0 ||
10077 strcmp(argv
[1], "--version") == 0) version();
10078 if (strcmp(argv
[1], "--help") == 0) usage();
10079 resetServerSaveParams();
10080 loadServerConfig(argv
[1]);
10081 } else if ((argc
> 2)) {
10084 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10086 if (server
.daemonize
) daemonize();
10088 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10090 linuxOvercommitMemoryWarning();
10092 start
= time(NULL
);
10093 if (server
.appendonly
) {
10094 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10095 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10097 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10098 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10100 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10101 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10103 aeDeleteEventLoop(server
.el
);
10107 /* ============================= Backtrace support ========================= */
10109 #ifdef HAVE_BACKTRACE
10110 static char *findFuncName(void *pointer
, unsigned long *offset
);
10112 static void *getMcontextEip(ucontext_t
*uc
) {
10113 #if defined(__FreeBSD__)
10114 return (void*) uc
->uc_mcontext
.mc_eip
;
10115 #elif defined(__dietlibc__)
10116 return (void*) uc
->uc_mcontext
.eip
;
10117 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10119 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10121 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10123 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10124 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10125 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10127 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10129 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10130 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10131 #elif defined(__ia64__) /* Linux IA64 */
10132 return (void*) uc
->uc_mcontext
.sc_ip
;
10138 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10140 char **messages
= NULL
;
10141 int i
, trace_size
= 0;
10142 unsigned long offset
=0;
10143 ucontext_t
*uc
= (ucontext_t
*) secret
;
10145 REDIS_NOTUSED(info
);
10147 redisLog(REDIS_WARNING
,
10148 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10149 infostring
= genRedisInfoString();
10150 redisLog(REDIS_WARNING
, "%s",infostring
);
10151 /* It's not safe to sdsfree() the returned string under memory
10152 * corruption conditions. Let it leak as we are going to abort */
10154 trace_size
= backtrace(trace
, 100);
10155 /* overwrite sigaction with caller's address */
10156 if (getMcontextEip(uc
) != NULL
) {
10157 trace
[1] = getMcontextEip(uc
);
10159 messages
= backtrace_symbols(trace
, trace_size
);
10161 for (i
=1; i
<trace_size
; ++i
) {
10162 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10164 p
= strchr(messages
[i
],'+');
10165 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10166 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10168 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10171 /* free(messages); Don't call free() with possibly corrupted memory. */
10175 static void setupSigSegvAction(void) {
10176 struct sigaction act
;
10178 sigemptyset (&act
.sa_mask
);
10179 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10180 * is used. Otherwise, sa_handler is used */
10181 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10182 act
.sa_sigaction
= segvHandler
;
10183 sigaction (SIGSEGV
, &act
, NULL
);
10184 sigaction (SIGBUS
, &act
, NULL
);
10185 sigaction (SIGFPE
, &act
, NULL
);
10186 sigaction (SIGILL
, &act
, NULL
);
10187 sigaction (SIGBUS
, &act
, NULL
);
10191 #include "staticsymbols.h"
10192 /* This function try to convert a pointer into a function name. It's used in
10193 * oreder to provide a backtrace under segmentation fault that's able to
10194 * display functions declared as static (otherwise the backtrace is useless). */
10195 static char *findFuncName(void *pointer
, unsigned long *offset
){
10197 unsigned long off
, minoff
= 0;
10199 /* Try to match against the Symbol with the smallest offset */
10200 for (i
=0; symsTable
[i
].pointer
; i
++) {
10201 unsigned long lp
= (unsigned long) pointer
;
10203 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10204 off
=lp
-symsTable
[i
].pointer
;
10205 if (ret
< 0 || off
< minoff
) {
10211 if (ret
== -1) return NULL
;
10213 return symsTable
[ret
].name
;
10215 #else /* HAVE_BACKTRACE */
10216 static void setupSigSegvAction(void) {
10218 #endif /* HAVE_BACKTRACE */