2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.10"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
64 #include "solarisfixes.h"
68 #include "ae.h" /* Event driven programming library */
69 #include "sds.h" /* Dynamic safe strings */
70 #include "anet.h" /* Networking the easy way */
71 #include "dict.h" /* Hash tables */
72 #include "adlist.h" /* Linked lists */
73 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
74 #include "lzf.h" /* LZF compression library */
75 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
82 /* Static server configuration */
83 #define REDIS_SERVERPORT 6379 /* TCP port */
84 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
85 #define REDIS_IOBUF_LEN 1024
86 #define REDIS_LOADBUF_LEN 1024
87 #define REDIS_STATIC_ARGS 8
88 #define REDIS_DEFAULT_DBNUM 16
89 #define REDIS_CONFIGLINE_MAX 1024
90 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
91 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
92 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
93 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
94 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
96 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
97 #define REDIS_WRITEV_THRESHOLD 3
98 /* Max number of iovecs used for each writev call */
99 #define REDIS_WRITEV_IOVEC_COUNT 256
101 /* Hash table parameters */
102 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
105 #define REDIS_CMD_BULK 1 /* Bulk write command */
106 #define REDIS_CMD_INLINE 2 /* Inline command */
107 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
108 this flags will return an error when the 'maxmemory' option is set in the
109 config file and the server is using more than maxmemory bytes of memory.
110 In short this commands are denied on low memory conditions. */
111 #define REDIS_CMD_DENYOOM 4
112 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
115 #define REDIS_STRING 0
121 /* Objects encoding. Some kind of objects like Strings and Hashes can be
122 * internally represented in multiple ways. The 'encoding' field of the object
123 * is set to one of this fields for this object. */
124 #define REDIS_ENCODING_RAW 0 /* Raw representation */
125 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
126 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
127 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
129 static char* strencoding
[] = {
130 "raw", "int", "zipmap", "hashtable"
133 /* Object types only used for dumping to disk */
134 #define REDIS_EXPIRETIME 253
135 #define REDIS_SELECTDB 254
136 #define REDIS_EOF 255
138 /* Defines related to the dump file format. To store 32 bits lengths for short
139 * keys requires a lot of space, so we check the most significant 2 bits of
140 * the first byte to interpreter the length:
142 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
143 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
144 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
145 * 11|000000 this means: specially encoded object will follow. The six bits
146 * number specify the kind of object that follows.
147 * See the REDIS_RDB_ENC_* defines.
149 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
150 * values, will fit inside. */
151 #define REDIS_RDB_6BITLEN 0
152 #define REDIS_RDB_14BITLEN 1
153 #define REDIS_RDB_32BITLEN 2
154 #define REDIS_RDB_ENCVAL 3
155 #define REDIS_RDB_LENERR UINT_MAX
157 /* When a length of a string object stored on disk has the first two bits
158 * set, the remaining two bits specify a special encoding for the object
159 * accordingly to the following defines: */
160 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
161 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
162 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
163 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
165 /* Virtual memory object->where field. */
166 #define REDIS_VM_MEMORY 0 /* The object is on memory */
167 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
168 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
169 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
171 /* Virtual memory static configuration stuff.
172 * Check vmFindContiguousPages() to know more about this magic numbers. */
173 #define REDIS_VM_MAX_NEAR_PAGES 65536
174 #define REDIS_VM_MAX_RANDOM_JUMP 4096
175 #define REDIS_VM_MAX_THREADS 32
176 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
177 /* The following is the *percentage* of completed I/O jobs to process when the
178 * handelr is called. While Virtual Memory I/O operations are performed by
179 * threads, this operations must be processed by the main thread when completed
180 * in order to take effect. */
181 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
184 #define REDIS_SLAVE 1 /* This client is a slave server */
185 #define REDIS_MASTER 2 /* This client is a master server */
186 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
187 #define REDIS_MULTI 8 /* This client is in a MULTI context */
188 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
189 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
191 /* Slave replication state - slave side */
192 #define REDIS_REPL_NONE 0 /* No active replication */
193 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
194 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
196 /* Slave replication state - from the point of view of master
197 * Note that in SEND_BULK and ONLINE state the slave receives new updates
198 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
199 * to start the next background saving in order to send updates to it. */
200 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
201 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
202 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
203 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
205 /* List related stuff */
209 /* Sort operations */
210 #define REDIS_SORT_GET 0
211 #define REDIS_SORT_ASC 1
212 #define REDIS_SORT_DESC 2
213 #define REDIS_SORTKEY_MAX 1024
216 #define REDIS_DEBUG 0
217 #define REDIS_VERBOSE 1
218 #define REDIS_NOTICE 2
219 #define REDIS_WARNING 3
221 /* Anti-warning macro... */
222 #define REDIS_NOTUSED(V) ((void) V)
224 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
225 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
227 /* Append only defines */
228 #define APPENDFSYNC_NO 0
229 #define APPENDFSYNC_ALWAYS 1
230 #define APPENDFSYNC_EVERYSEC 2
232 /* Hashes related defaults */
233 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
234 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
236 /* We can print the stacktrace, so our assert is defined this way: */
237 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
238 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
239 static void _redisAssert(char *estr
, char *file
, int line
);
240 static void _redisPanic(char *msg
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 sds aofbuf
; /* AOF buffer, written before entering the event loop */
374 struct saveparam
*saveparams
;
379 char *appendfilename
;
383 /* Replication related */
388 redisClient
*master
; /* client that is master for this slave */
390 unsigned int maxclients
;
391 unsigned long long maxmemory
;
392 unsigned int blpop_blocked_clients
;
393 unsigned int vm_blocked_clients
;
394 /* Sort parameters - qsort_r() is only available under BSD so we
395 * have to take this state global, in order to pass it to sortCompare() */
399 /* Virtual memory configuration */
404 unsigned long long vm_max_memory
;
406 size_t hash_max_zipmap_entries
;
407 size_t hash_max_zipmap_value
;
408 /* Virtual memory state */
411 off_t vm_next_page
; /* Next probably empty page */
412 off_t vm_near_pages
; /* Number of pages allocated sequentially */
413 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
414 time_t unixtime
; /* Unix time sampled every second. */
415 /* Virtual memory I/O threads stuff */
416 /* An I/O thread process an element taken from the io_jobs queue and
417 * put the result of the operation in the io_done list. While the
418 * job is being processed, it's put on io_processing queue. */
419 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
420 list
*io_processing
; /* List of VM I/O jobs being processed */
421 list
*io_processed
; /* List of VM I/O jobs already processed */
422 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
423 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
424 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
425 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
426 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
427 int io_active_threads
; /* Number of running I/O threads */
428 int vm_max_threads
; /* Max number of I/O threads running at the same time */
429 /* Our main thread is blocked on the event loop, locking for sockets ready
430 * to be read or written, so when a threaded I/O operation is ready to be
431 * processed by the main thread, the I/O thread will use a unix pipe to
432 * awake the main thread. The followings are the two pipe FDs. */
433 int io_ready_pipe_read
;
434 int io_ready_pipe_write
;
435 /* Virtual memory stats */
436 unsigned long long vm_stats_used_pages
;
437 unsigned long long vm_stats_swapped_objects
;
438 unsigned long long vm_stats_swapouts
;
439 unsigned long long vm_stats_swapins
;
441 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
442 list
*pubsub_patterns
; /* A list of pubsub_patterns */
447 typedef struct pubsubPattern
{
452 typedef void redisCommandProc(redisClient
*c
);
453 struct redisCommand
{
455 redisCommandProc
*proc
;
458 /* Use a function to determine which keys need to be loaded
459 * in the background prior to executing this command. Takes precedence
460 * over vm_firstkey and others, ignored when NULL */
461 redisCommandProc
*vm_preload_proc
;
462 /* What keys should be loaded in background when calling this command? */
463 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
464 int vm_lastkey
; /* THe last argument that's a key */
465 int vm_keystep
; /* The step between first and last key */
468 struct redisFunctionSym
{
470 unsigned long pointer
;
473 typedef struct _redisSortObject
{
481 typedef struct _redisSortOperation
{
484 } redisSortOperation
;
486 /* ZSETs use a specialized version of Skiplists */
488 typedef struct zskiplistNode
{
489 struct zskiplistNode
**forward
;
490 struct zskiplistNode
*backward
;
496 typedef struct zskiplist
{
497 struct zskiplistNode
*header
, *tail
;
498 unsigned long length
;
502 typedef struct zset
{
507 /* Our shared "common" objects */
509 #define REDIS_SHARED_INTEGERS 10000
510 struct sharedObjectsStruct
{
511 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
512 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
513 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
514 *outofrangeerr
, *plus
,
515 *select0
, *select1
, *select2
, *select3
, *select4
,
516 *select5
, *select6
, *select7
, *select8
, *select9
,
517 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
518 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
519 *integers
[REDIS_SHARED_INTEGERS
];
522 /* Global vars that are actally used as constants. The following double
523 * values are used for double on-disk serialization, and are initialized
524 * at runtime to avoid strange compiler optimizations. */
526 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
528 /* VM threaded I/O request message */
529 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
530 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
531 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
532 typedef struct iojob
{
533 int type
; /* Request type, REDIS_IOJOB_* */
534 redisDb
*db
;/* Redis database */
535 robj
*key
; /* This I/O request is about swapping this key */
536 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
537 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
538 off_t page
; /* Swap page where to read/write the object */
539 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
540 int canceled
; /* True if this command was canceled by blocking side of VM */
541 pthread_t thread
; /* ID of the thread processing this entry */
544 /*================================ Prototypes =============================== */
546 static void freeStringObject(robj
*o
);
547 static void freeListObject(robj
*o
);
548 static void freeSetObject(robj
*o
);
549 static void decrRefCount(void *o
);
550 static robj
*createObject(int type
, void *ptr
);
551 static void freeClient(redisClient
*c
);
552 static int rdbLoad(char *filename
);
553 static void addReply(redisClient
*c
, robj
*obj
);
554 static void addReplySds(redisClient
*c
, sds s
);
555 static void incrRefCount(robj
*o
);
556 static int rdbSaveBackground(char *filename
);
557 static robj
*createStringObject(char *ptr
, size_t len
);
558 static robj
*dupStringObject(robj
*o
);
559 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
560 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
561 static void flushAppendOnlyFile(void);
562 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
563 static int syncWithMaster(void);
564 static robj
*tryObjectEncoding(robj
*o
);
565 static robj
*getDecodedObject(robj
*o
);
566 static int removeExpire(redisDb
*db
, robj
*key
);
567 static int expireIfNeeded(redisDb
*db
, robj
*key
);
568 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
569 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
570 static int deleteKey(redisDb
*db
, robj
*key
);
571 static time_t getExpire(redisDb
*db
, robj
*key
);
572 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
573 static void updateSlavesWaitingBgsave(int bgsaveerr
);
574 static void freeMemoryIfNeeded(void);
575 static int processCommand(redisClient
*c
);
576 static void setupSigSegvAction(void);
577 static void rdbRemoveTempFile(pid_t childpid
);
578 static void aofRemoveTempFile(pid_t childpid
);
579 static size_t stringObjectLen(robj
*o
);
580 static void processInputBuffer(redisClient
*c
);
581 static zskiplist
*zslCreate(void);
582 static void zslFree(zskiplist
*zsl
);
583 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
584 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
585 static void initClientMultiState(redisClient
*c
);
586 static void freeClientMultiState(redisClient
*c
);
587 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
588 static void unblockClientWaitingData(redisClient
*c
);
589 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
590 static void vmInit(void);
591 static void vmMarkPagesFree(off_t page
, off_t count
);
592 static robj
*vmLoadObject(robj
*key
);
593 static robj
*vmPreviewObject(robj
*key
);
594 static int vmSwapOneObjectBlocking(void);
595 static int vmSwapOneObjectThreaded(void);
596 static int vmCanSwapOut(void);
597 static int tryFreeOneObjectFromFreelist(void);
598 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
599 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static void vmCancelThreadedIOJob(robj
*o
);
601 static void lockThreadedIO(void);
602 static void unlockThreadedIO(void);
603 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
604 static void freeIOJob(iojob
*j
);
605 static void queueIOJob(iojob
*j
);
606 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
607 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
608 static void waitEmptyIOJobsQueue(void);
609 static void vmReopenSwapFile(void);
610 static int vmFreePage(off_t page
);
611 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
612 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
613 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
614 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
615 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
616 static struct redisCommand
*lookupCommand(char *name
);
617 static void call(redisClient
*c
, struct redisCommand
*cmd
);
618 static void resetClient(redisClient
*c
);
619 static void convertToRealHash(robj
*o
);
620 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
621 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
622 static void freePubsubPattern(void *p
);
623 static int listMatchPubsubPattern(void *a
, void *b
);
624 static int compareStringObjects(robj
*a
, robj
*b
);
625 static int equalStringObjects(robj
*a
, robj
*b
);
627 static int rewriteAppendOnlyFileBackground(void);
628 static int vmSwapObjectBlocking(robj
*key
, robj
*val
);
630 static void authCommand(redisClient
*c
);
631 static void pingCommand(redisClient
*c
);
632 static void echoCommand(redisClient
*c
);
633 static void setCommand(redisClient
*c
);
634 static void setnxCommand(redisClient
*c
);
635 static void setexCommand(redisClient
*c
);
636 static void getCommand(redisClient
*c
);
637 static void delCommand(redisClient
*c
);
638 static void existsCommand(redisClient
*c
);
639 static void incrCommand(redisClient
*c
);
640 static void decrCommand(redisClient
*c
);
641 static void incrbyCommand(redisClient
*c
);
642 static void decrbyCommand(redisClient
*c
);
643 static void selectCommand(redisClient
*c
);
644 static void randomkeyCommand(redisClient
*c
);
645 static void keysCommand(redisClient
*c
);
646 static void dbsizeCommand(redisClient
*c
);
647 static void lastsaveCommand(redisClient
*c
);
648 static void saveCommand(redisClient
*c
);
649 static void bgsaveCommand(redisClient
*c
);
650 static void bgrewriteaofCommand(redisClient
*c
);
651 static void shutdownCommand(redisClient
*c
);
652 static void moveCommand(redisClient
*c
);
653 static void renameCommand(redisClient
*c
);
654 static void renamenxCommand(redisClient
*c
);
655 static void lpushCommand(redisClient
*c
);
656 static void rpushCommand(redisClient
*c
);
657 static void lpopCommand(redisClient
*c
);
658 static void rpopCommand(redisClient
*c
);
659 static void llenCommand(redisClient
*c
);
660 static void lindexCommand(redisClient
*c
);
661 static void lrangeCommand(redisClient
*c
);
662 static void ltrimCommand(redisClient
*c
);
663 static void typeCommand(redisClient
*c
);
664 static void lsetCommand(redisClient
*c
);
665 static void saddCommand(redisClient
*c
);
666 static void sremCommand(redisClient
*c
);
667 static void smoveCommand(redisClient
*c
);
668 static void sismemberCommand(redisClient
*c
);
669 static void scardCommand(redisClient
*c
);
670 static void spopCommand(redisClient
*c
);
671 static void srandmemberCommand(redisClient
*c
);
672 static void sinterCommand(redisClient
*c
);
673 static void sinterstoreCommand(redisClient
*c
);
674 static void sunionCommand(redisClient
*c
);
675 static void sunionstoreCommand(redisClient
*c
);
676 static void sdiffCommand(redisClient
*c
);
677 static void sdiffstoreCommand(redisClient
*c
);
678 static void syncCommand(redisClient
*c
);
679 static void flushdbCommand(redisClient
*c
);
680 static void flushallCommand(redisClient
*c
);
681 static void sortCommand(redisClient
*c
);
682 static void lremCommand(redisClient
*c
);
683 static void rpoplpushcommand(redisClient
*c
);
684 static void infoCommand(redisClient
*c
);
685 static void mgetCommand(redisClient
*c
);
686 static void monitorCommand(redisClient
*c
);
687 static void expireCommand(redisClient
*c
);
688 static void expireatCommand(redisClient
*c
);
689 static void getsetCommand(redisClient
*c
);
690 static void ttlCommand(redisClient
*c
);
691 static void slaveofCommand(redisClient
*c
);
692 static void debugCommand(redisClient
*c
);
693 static void msetCommand(redisClient
*c
);
694 static void msetnxCommand(redisClient
*c
);
695 static void zaddCommand(redisClient
*c
);
696 static void zincrbyCommand(redisClient
*c
);
697 static void zrangeCommand(redisClient
*c
);
698 static void zrangebyscoreCommand(redisClient
*c
);
699 static void zcountCommand(redisClient
*c
);
700 static void zrevrangeCommand(redisClient
*c
);
701 static void zcardCommand(redisClient
*c
);
702 static void zremCommand(redisClient
*c
);
703 static void zscoreCommand(redisClient
*c
);
704 static void zremrangebyscoreCommand(redisClient
*c
);
705 static void multiCommand(redisClient
*c
);
706 static void execCommand(redisClient
*c
);
707 static void discardCommand(redisClient
*c
);
708 static void blpopCommand(redisClient
*c
);
709 static void brpopCommand(redisClient
*c
);
710 static void appendCommand(redisClient
*c
);
711 static void substrCommand(redisClient
*c
);
712 static void zrankCommand(redisClient
*c
);
713 static void zrevrankCommand(redisClient
*c
);
714 static void hsetCommand(redisClient
*c
);
715 static void hsetnxCommand(redisClient
*c
);
716 static void hgetCommand(redisClient
*c
);
717 static void hmsetCommand(redisClient
*c
);
718 static void hmgetCommand(redisClient
*c
);
719 static void hdelCommand(redisClient
*c
);
720 static void hlenCommand(redisClient
*c
);
721 static void zremrangebyrankCommand(redisClient
*c
);
722 static void zunionCommand(redisClient
*c
);
723 static void zinterCommand(redisClient
*c
);
724 static void hkeysCommand(redisClient
*c
);
725 static void hvalsCommand(redisClient
*c
);
726 static void hgetallCommand(redisClient
*c
);
727 static void hexistsCommand(redisClient
*c
);
728 static void configCommand(redisClient
*c
);
729 static void hincrbyCommand(redisClient
*c
);
730 static void subscribeCommand(redisClient
*c
);
731 static void unsubscribeCommand(redisClient
*c
);
732 static void psubscribeCommand(redisClient
*c
);
733 static void punsubscribeCommand(redisClient
*c
);
734 static void publishCommand(redisClient
*c
);
736 /*================================= Globals ================================= */
739 static struct redisServer server
; /* server global state */
740 static struct redisCommand cmdTable
[] = {
741 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
742 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
743 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
744 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
745 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
746 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
748 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
750 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
751 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
752 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
753 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
754 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
761 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
764 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
765 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
766 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
767 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
768 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
769 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
771 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
773 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
774 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
775 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
776 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
777 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
778 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
780 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
781 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
782 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
783 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
784 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
785 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
786 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
790 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
793 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
794 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
797 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
799 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
800 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
801 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
806 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
807 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
808 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
809 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
810 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
811 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
814 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
815 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
816 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
823 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
829 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
831 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
836 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
839 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
842 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
846 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
847 {NULL
,NULL
,0,0,NULL
,0,0,0}
850 /*============================ Utility functions ============================ */
852 /* Glob-style pattern matching. */
853 static int stringmatchlen(const char *pattern
, int patternLen
,
854 const char *string
, int stringLen
, int nocase
)
859 while (pattern
[1] == '*') {
864 return 1; /* match */
866 if (stringmatchlen(pattern
+1, patternLen
-1,
867 string
, stringLen
, nocase
))
868 return 1; /* match */
872 return 0; /* no match */
876 return 0; /* no match */
886 not = pattern
[0] == '^';
893 if (pattern
[0] == '\\') {
896 if (pattern
[0] == string
[0])
898 } else if (pattern
[0] == ']') {
900 } else if (patternLen
== 0) {
904 } else if (pattern
[1] == '-' && patternLen
>= 3) {
905 int start
= pattern
[0];
906 int end
= pattern
[2];
914 start
= tolower(start
);
920 if (c
>= start
&& c
<= end
)
924 if (pattern
[0] == string
[0])
927 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
937 return 0; /* no match */
943 if (patternLen
>= 2) {
950 if (pattern
[0] != string
[0])
951 return 0; /* no match */
953 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
954 return 0; /* no match */
962 if (stringLen
== 0) {
963 while(*pattern
== '*') {
970 if (patternLen
== 0 && stringLen
== 0)
975 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
976 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
979 /* Convert a string representing an amount of memory into the number of
980 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
983 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
985 static long long memtoll(const char *p
, int *err
) {
988 long mul
; /* unit multiplier */
993 /* Search the first non digit character. */
996 while(*u
&& isdigit(*u
)) u
++;
997 if (*u
== '\0' || !strcasecmp(u
,"b")) {
999 } else if (!strcasecmp(u
,"k")) {
1001 } else if (!strcasecmp(u
,"kb")) {
1003 } else if (!strcasecmp(u
,"m")) {
1005 } else if (!strcasecmp(u
,"mb")) {
1007 } else if (!strcasecmp(u
,"g")) {
1008 mul
= 1000L*1000*1000;
1009 } else if (!strcasecmp(u
,"gb")) {
1010 mul
= 1024L*1024*1024;
1016 if (digits
>= sizeof(buf
)) {
1020 memcpy(buf
,p
,digits
);
1022 val
= strtoll(buf
,NULL
,10);
1026 static void redisLog(int level
, const char *fmt
, ...) {
1030 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1034 if (level
>= server
.verbosity
) {
1040 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1041 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1042 vfprintf(fp
, fmt
, ap
);
1048 if (server
.logfile
) fclose(fp
);
1051 /*====================== Hash table type implementation ==================== */
1053 /* This is an hash table type that uses the SDS dynamic strings libary as
1054 * keys and radis objects as values (objects can hold SDS strings,
1057 static void dictVanillaFree(void *privdata
, void *val
)
1059 DICT_NOTUSED(privdata
);
1063 static void dictListDestructor(void *privdata
, void *val
)
1065 DICT_NOTUSED(privdata
);
1066 listRelease((list
*)val
);
1069 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1073 DICT_NOTUSED(privdata
);
1075 l1
= sdslen((sds
)key1
);
1076 l2
= sdslen((sds
)key2
);
1077 if (l1
!= l2
) return 0;
1078 return memcmp(key1
, key2
, l1
) == 0;
1081 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1083 DICT_NOTUSED(privdata
);
1085 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1089 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1092 const robj
*o1
= key1
, *o2
= key2
;
1093 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1096 static unsigned int dictObjHash(const void *key
) {
1097 const robj
*o
= key
;
1098 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1101 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1104 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1107 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1108 o2
->encoding
== REDIS_ENCODING_INT
)
1109 return o1
->ptr
== o2
->ptr
;
1111 o1
= getDecodedObject(o1
);
1112 o2
= getDecodedObject(o2
);
1113 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1119 static unsigned int dictEncObjHash(const void *key
) {
1120 robj
*o
= (robj
*) key
;
1122 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1123 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1125 if (o
->encoding
== REDIS_ENCODING_INT
) {
1129 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1130 return dictGenHashFunction((unsigned char*)buf
, len
);
1134 o
= getDecodedObject(o
);
1135 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1142 /* Sets type and expires */
1143 static dictType setDictType
= {
1144 dictEncObjHash
, /* hash function */
1147 dictEncObjKeyCompare
, /* key compare */
1148 dictRedisObjectDestructor
, /* key destructor */
1149 NULL
/* val destructor */
1152 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1153 static dictType zsetDictType
= {
1154 dictEncObjHash
, /* hash function */
1157 dictEncObjKeyCompare
, /* key compare */
1158 dictRedisObjectDestructor
, /* key destructor */
1159 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1163 static dictType dbDictType
= {
1164 dictObjHash
, /* hash function */
1167 dictObjKeyCompare
, /* key compare */
1168 dictRedisObjectDestructor
, /* key destructor */
1169 dictRedisObjectDestructor
/* val destructor */
1173 static dictType keyptrDictType
= {
1174 dictObjHash
, /* hash function */
1177 dictObjKeyCompare
, /* key compare */
1178 dictRedisObjectDestructor
, /* key destructor */
1179 NULL
/* val destructor */
1182 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1183 static dictType hashDictType
= {
1184 dictEncObjHash
, /* hash function */
1187 dictEncObjKeyCompare
, /* key compare */
1188 dictRedisObjectDestructor
, /* key destructor */
1189 dictRedisObjectDestructor
/* val destructor */
1192 /* Keylist hash table type has unencoded redis objects as keys and
1193 * lists as values. It's used for blocking operations (BLPOP) and to
1194 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1195 static dictType keylistDictType
= {
1196 dictObjHash
, /* hash function */
1199 dictObjKeyCompare
, /* key compare */
1200 dictRedisObjectDestructor
, /* key destructor */
1201 dictListDestructor
/* val destructor */
1204 static void version();
1206 /* ========================= Random utility functions ======================= */
1208 /* Redis generally does not try to recover from out of memory conditions
1209 * when allocating objects or strings, it is not clear if it will be possible
1210 * to report this condition to the client since the networking layer itself
1211 * is based on heap allocation for send buffers, so we simply abort.
1212 * At least the code will be simpler to read... */
1213 static void oom(const char *msg
) {
1214 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1219 /* ====================== Redis server networking stuff ===================== */
1220 static void closeTimedoutClients(void) {
1223 time_t now
= time(NULL
);
1226 listRewind(server
.clients
,&li
);
1227 while ((ln
= listNext(&li
)) != NULL
) {
1228 c
= listNodeValue(ln
);
1229 if (server
.maxidletime
&&
1230 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1231 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1232 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1233 listLength(c
->pubsub_patterns
) == 0 &&
1234 (now
- c
->lastinteraction
> server
.maxidletime
))
1236 redisLog(REDIS_VERBOSE
,"Closing idle client");
1238 } else if (c
->flags
& REDIS_BLOCKED
) {
1239 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1240 addReply(c
,shared
.nullmultibulk
);
1241 unblockClientWaitingData(c
);
1247 static int htNeedsResize(dict
*dict
) {
1248 long long size
, used
;
1250 size
= dictSlots(dict
);
1251 used
= dictSize(dict
);
1252 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1253 (used
*100/size
< REDIS_HT_MINFILL
));
1256 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1257 * we resize the hash table to save memory */
1258 static void tryResizeHashTables(void) {
1261 for (j
= 0; j
< server
.dbnum
; j
++) {
1262 if (htNeedsResize(server
.db
[j
].dict
))
1263 dictResize(server
.db
[j
].dict
);
1264 if (htNeedsResize(server
.db
[j
].expires
))
1265 dictResize(server
.db
[j
].expires
);
1269 /* Our hash table implementation performs rehashing incrementally while
1270 * we write/read from the hash table. Still if the server is idle, the hash
1271 * table will use two tables for a long time. So we try to use 1 millisecond
1272 * of CPU time at every serverCron() loop in order to rehash some key. */
1273 static void incrementallyRehash(void) {
1276 for (j
= 0; j
< server
.dbnum
; j
++) {
1277 if (dictIsRehashing(server
.db
[j
].dict
)) {
1278 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1279 break; /* already used our millisecond for this loop... */
1284 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1285 void backgroundSaveDoneHandler(int statloc
) {
1286 int exitcode
= WEXITSTATUS(statloc
);
1287 int bysignal
= WIFSIGNALED(statloc
);
1289 if (!bysignal
&& exitcode
== 0) {
1290 redisLog(REDIS_NOTICE
,
1291 "Background saving terminated with success");
1293 server
.lastsave
= time(NULL
);
1294 } else if (!bysignal
&& exitcode
!= 0) {
1295 redisLog(REDIS_WARNING
, "Background saving error");
1297 redisLog(REDIS_WARNING
,
1298 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1299 rdbRemoveTempFile(server
.bgsavechildpid
);
1301 server
.bgsavechildpid
= -1;
1302 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1303 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1304 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1307 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1309 void backgroundRewriteDoneHandler(int statloc
) {
1310 int exitcode
= WEXITSTATUS(statloc
);
1311 int bysignal
= WIFSIGNALED(statloc
);
1313 if (!bysignal
&& exitcode
== 0) {
1317 redisLog(REDIS_NOTICE
,
1318 "Background append only file rewriting terminated with success");
1319 /* Now it's time to flush the differences accumulated by the parent */
1320 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1321 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1323 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1326 /* Flush our data... */
1327 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1328 (signed) sdslen(server
.bgrewritebuf
)) {
1329 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1333 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1334 /* Now our work is to rename the temp file into the stable file. And
1335 * switch the file descriptor used by the server for append only. */
1336 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1337 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1341 /* Mission completed... almost */
1342 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1343 if (server
.appendfd
!= -1) {
1344 /* If append only is actually enabled... */
1345 close(server
.appendfd
);
1346 server
.appendfd
= fd
;
1348 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1349 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1351 /* If append only is disabled we just generate a dump in this
1352 * format. Why not? */
1355 } else if (!bysignal
&& exitcode
!= 0) {
1356 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1358 redisLog(REDIS_WARNING
,
1359 "Background append only file rewriting terminated by signal %d",
1363 sdsfree(server
.bgrewritebuf
);
1364 server
.bgrewritebuf
= sdsempty();
1365 aofRemoveTempFile(server
.bgrewritechildpid
);
1366 server
.bgrewritechildpid
= -1;
1369 /* This function is called once a background process of some kind terminates,
1370 * as we want to avoid resizing the hash tables when there is a child in order
1371 * to play well with copy-on-write (otherwise when a resize happens lots of
1372 * memory pages are copied). The goal of this function is to update the ability
1373 * for dict.c to resize the hash tables accordingly to the fact we have o not
1374 * running childs. */
1375 static void updateDictResizePolicy(void) {
1376 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1379 dictDisableResize();
1382 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1383 int j
, loops
= server
.cronloops
++;
1384 REDIS_NOTUSED(eventLoop
);
1386 REDIS_NOTUSED(clientData
);
1388 /* We take a cached value of the unix time in the global state because
1389 * with virtual memory and aging there is to store the current time
1390 * in objects at every object access, and accuracy is not needed.
1391 * To access a global var is faster than calling time(NULL) */
1392 server
.unixtime
= time(NULL
);
1394 /* Show some info about non-empty databases */
1395 for (j
= 0; j
< server
.dbnum
; j
++) {
1396 long long size
, used
, vkeys
;
1398 size
= dictSlots(server
.db
[j
].dict
);
1399 used
= dictSize(server
.db
[j
].dict
);
1400 vkeys
= dictSize(server
.db
[j
].expires
);
1401 if (!(loops
% 50) && (used
|| vkeys
)) {
1402 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1403 /* dictPrintStats(server.dict); */
1407 /* We don't want to resize the hash tables while a bacground saving
1408 * is in progress: the saving child is created using fork() that is
1409 * implemented with a copy-on-write semantic in most modern systems, so
1410 * if we resize the HT while there is the saving child at work actually
1411 * a lot of memory movements in the parent will cause a lot of pages
1413 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1414 if (!(loops
% 10)) tryResizeHashTables();
1415 if (server
.activerehashing
) incrementallyRehash();
1418 /* Show information about connected clients */
1419 if (!(loops
% 50)) {
1420 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1421 listLength(server
.clients
)-listLength(server
.slaves
),
1422 listLength(server
.slaves
),
1423 zmalloc_used_memory());
1426 /* Close connections of timedout clients */
1427 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1428 closeTimedoutClients();
1430 /* Check if a background saving or AOF rewrite in progress terminated */
1431 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1435 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1436 if (pid
== server
.bgsavechildpid
) {
1437 backgroundSaveDoneHandler(statloc
);
1439 backgroundRewriteDoneHandler(statloc
);
1441 updateDictResizePolicy();
1444 /* If there is not a background saving in progress check if
1445 * we have to save now */
1446 time_t now
= time(NULL
);
1447 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1448 struct saveparam
*sp
= server
.saveparams
+j
;
1450 if (server
.dirty
>= sp
->changes
&&
1451 now
-server
.lastsave
> sp
->seconds
) {
1452 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1453 sp
->changes
, sp
->seconds
);
1454 rdbSaveBackground(server
.dbfilename
);
1460 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1461 * will use few CPU cycles if there are few expiring keys, otherwise
1462 * it will get more aggressive to avoid that too much memory is used by
1463 * keys that can be removed from the keyspace. */
1464 for (j
= 0; j
< server
.dbnum
; j
++) {
1466 redisDb
*db
= server
.db
+j
;
1468 /* Continue to expire if at the end of the cycle more than 25%
1469 * of the keys were expired. */
1471 long num
= dictSize(db
->expires
);
1472 time_t now
= time(NULL
);
1475 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1476 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1481 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1482 t
= (time_t) dictGetEntryVal(de
);
1484 deleteKey(db
,dictGetEntryKey(de
));
1486 server
.stat_expiredkeys
++;
1489 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1492 /* Swap a few keys on disk if we are over the memory limit and VM
1493 * is enbled. Try to free objects from the free list first. */
1494 if (vmCanSwapOut()) {
1495 while (server
.vm_enabled
&& zmalloc_used_memory() >
1496 server
.vm_max_memory
)
1500 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1501 retval
= (server
.vm_max_threads
== 0) ?
1502 vmSwapOneObjectBlocking() :
1503 vmSwapOneObjectThreaded();
1504 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1505 zmalloc_used_memory() >
1506 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1508 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1510 /* Note that when using threade I/O we free just one object,
1511 * because anyway when the I/O thread in charge to swap this
1512 * object out will finish, the handler of completed jobs
1513 * will try to swap more objects if we are still out of memory. */
1514 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1518 /* Check if we should connect to a MASTER */
1519 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1520 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1521 if (syncWithMaster() == REDIS_OK
) {
1522 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1523 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1529 /* This function gets called every time Redis is entering the
1530 * main loop of the event driven library, that is, before to sleep
1531 * for ready file descriptors. */
1532 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1533 REDIS_NOTUSED(eventLoop
);
1535 /* Awake clients that got all the swapped keys they requested */
1536 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1540 listRewind(server
.io_ready_clients
,&li
);
1541 while((ln
= listNext(&li
))) {
1542 redisClient
*c
= ln
->value
;
1543 struct redisCommand
*cmd
;
1545 /* Resume the client. */
1546 listDelNode(server
.io_ready_clients
,ln
);
1547 c
->flags
&= (~REDIS_IO_WAIT
);
1548 server
.vm_blocked_clients
--;
1549 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1550 readQueryFromClient
, c
);
1551 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1552 assert(cmd
!= NULL
);
1555 /* There may be more data to process in the input buffer. */
1556 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1557 processInputBuffer(c
);
1560 /* Write the AOF buffer on disk */
1561 flushAppendOnlyFile();
1564 static void createSharedObjects(void) {
1567 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1568 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1569 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1570 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1571 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1572 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1573 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1574 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1575 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1576 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1577 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1578 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1579 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1580 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1581 "-ERR no such key\r\n"));
1582 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1583 "-ERR syntax error\r\n"));
1584 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1585 "-ERR source and destination objects are the same\r\n"));
1586 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1587 "-ERR index out of range\r\n"));
1588 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1589 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1590 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1591 shared
.select0
= createStringObject("select 0\r\n",10);
1592 shared
.select1
= createStringObject("select 1\r\n",10);
1593 shared
.select2
= createStringObject("select 2\r\n",10);
1594 shared
.select3
= createStringObject("select 3\r\n",10);
1595 shared
.select4
= createStringObject("select 4\r\n",10);
1596 shared
.select5
= createStringObject("select 5\r\n",10);
1597 shared
.select6
= createStringObject("select 6\r\n",10);
1598 shared
.select7
= createStringObject("select 7\r\n",10);
1599 shared
.select8
= createStringObject("select 8\r\n",10);
1600 shared
.select9
= createStringObject("select 9\r\n",10);
1601 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1602 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1603 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1604 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1605 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1606 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1607 shared
.mbulk3
= createStringObject("*3\r\n",4);
1608 shared
.mbulk4
= createStringObject("*4\r\n",4);
1609 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1610 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1611 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1615 static void appendServerSaveParams(time_t seconds
, int changes
) {
1616 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1617 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1618 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1619 server
.saveparamslen
++;
1622 static void resetServerSaveParams() {
1623 zfree(server
.saveparams
);
1624 server
.saveparams
= NULL
;
1625 server
.saveparamslen
= 0;
1628 static void initServerConfig() {
1629 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1630 server
.port
= REDIS_SERVERPORT
;
1631 server
.verbosity
= REDIS_VERBOSE
;
1632 server
.maxidletime
= REDIS_MAXIDLETIME
;
1633 server
.saveparams
= NULL
;
1634 server
.logfile
= NULL
; /* NULL = log on standard output */
1635 server
.bindaddr
= NULL
;
1636 server
.glueoutputbuf
= 1;
1637 server
.daemonize
= 0;
1638 server
.appendonly
= 0;
1639 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1640 server
.lastfsync
= time(NULL
);
1641 server
.appendfd
= -1;
1642 server
.appendseldb
= -1; /* Make sure the first time will not match */
1643 server
.pidfile
= zstrdup("/var/run/redis.pid");
1644 server
.dbfilename
= zstrdup("dump.rdb");
1645 server
.appendfilename
= zstrdup("appendonly.aof");
1646 server
.requirepass
= NULL
;
1647 server
.rdbcompression
= 1;
1648 server
.activerehashing
= 1;
1649 server
.maxclients
= 0;
1650 server
.blpop_blocked_clients
= 0;
1651 server
.maxmemory
= 0;
1652 server
.vm_enabled
= 0;
1653 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1654 server
.vm_page_size
= 256; /* 256 bytes per page */
1655 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1656 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1657 server
.vm_max_threads
= 4;
1658 server
.vm_blocked_clients
= 0;
1659 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1660 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1662 resetServerSaveParams();
1664 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1665 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1666 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1667 /* Replication related */
1669 server
.masterauth
= NULL
;
1670 server
.masterhost
= NULL
;
1671 server
.masterport
= 6379;
1672 server
.master
= NULL
;
1673 server
.replstate
= REDIS_REPL_NONE
;
1675 /* Double constants initialization */
1677 R_PosInf
= 1.0/R_Zero
;
1678 R_NegInf
= -1.0/R_Zero
;
1679 R_Nan
= R_Zero
/R_Zero
;
1682 static void initServer() {
1685 signal(SIGHUP
, SIG_IGN
);
1686 signal(SIGPIPE
, SIG_IGN
);
1687 setupSigSegvAction();
1689 server
.devnull
= fopen("/dev/null","w");
1690 if (server
.devnull
== NULL
) {
1691 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1694 server
.clients
= listCreate();
1695 server
.slaves
= listCreate();
1696 server
.monitors
= listCreate();
1697 server
.objfreelist
= listCreate();
1698 createSharedObjects();
1699 server
.el
= aeCreateEventLoop();
1700 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1701 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1702 if (server
.fd
== -1) {
1703 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1706 for (j
= 0; j
< server
.dbnum
; j
++) {
1707 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1708 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1709 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1710 if (server
.vm_enabled
)
1711 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1712 server
.db
[j
].id
= j
;
1714 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1715 server
.pubsub_patterns
= listCreate();
1716 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1717 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1718 server
.cronloops
= 0;
1719 server
.bgsavechildpid
= -1;
1720 server
.bgrewritechildpid
= -1;
1721 server
.bgrewritebuf
= sdsempty();
1722 server
.aofbuf
= sdsempty();
1723 server
.lastsave
= time(NULL
);
1725 server
.stat_numcommands
= 0;
1726 server
.stat_numconnections
= 0;
1727 server
.stat_expiredkeys
= 0;
1728 server
.stat_starttime
= time(NULL
);
1729 server
.unixtime
= time(NULL
);
1730 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1731 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1732 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1734 if (server
.appendonly
) {
1735 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1736 if (server
.appendfd
== -1) {
1737 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1743 if (server
.vm_enabled
) vmInit();
1746 /* Empty the whole database */
1747 static long long emptyDb() {
1749 long long removed
= 0;
1751 for (j
= 0; j
< server
.dbnum
; j
++) {
1752 removed
+= dictSize(server
.db
[j
].dict
);
1753 dictEmpty(server
.db
[j
].dict
);
1754 dictEmpty(server
.db
[j
].expires
);
1759 static int yesnotoi(char *s
) {
1760 if (!strcasecmp(s
,"yes")) return 1;
1761 else if (!strcasecmp(s
,"no")) return 0;
1765 /* I agree, this is a very rudimental way to load a configuration...
1766 will improve later if the config gets more complex */
1767 static void loadServerConfig(char *filename
) {
1769 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1773 if (filename
[0] == '-' && filename
[1] == '\0')
1776 if ((fp
= fopen(filename
,"r")) == NULL
) {
1777 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1782 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1788 line
= sdstrim(line
," \t\r\n");
1790 /* Skip comments and blank lines*/
1791 if (line
[0] == '#' || line
[0] == '\0') {
1796 /* Split into arguments */
1797 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1798 sdstolower(argv
[0]);
1800 /* Execute config directives */
1801 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1802 server
.maxidletime
= atoi(argv
[1]);
1803 if (server
.maxidletime
< 0) {
1804 err
= "Invalid timeout value"; goto loaderr
;
1806 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1807 server
.port
= atoi(argv
[1]);
1808 if (server
.port
< 1 || server
.port
> 65535) {
1809 err
= "Invalid port"; goto loaderr
;
1811 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1812 server
.bindaddr
= zstrdup(argv
[1]);
1813 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1814 int seconds
= atoi(argv
[1]);
1815 int changes
= atoi(argv
[2]);
1816 if (seconds
< 1 || changes
< 0) {
1817 err
= "Invalid save parameters"; goto loaderr
;
1819 appendServerSaveParams(seconds
,changes
);
1820 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1821 if (chdir(argv
[1]) == -1) {
1822 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1823 argv
[1], strerror(errno
));
1826 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1827 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1828 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1829 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1830 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1832 err
= "Invalid log level. Must be one of debug, notice, warning";
1835 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1838 server
.logfile
= zstrdup(argv
[1]);
1839 if (!strcasecmp(server
.logfile
,"stdout")) {
1840 zfree(server
.logfile
);
1841 server
.logfile
= NULL
;
1843 if (server
.logfile
) {
1844 /* Test if we are able to open the file. The server will not
1845 * be able to abort just for this problem later... */
1846 logfp
= fopen(server
.logfile
,"a");
1847 if (logfp
== NULL
) {
1848 err
= sdscatprintf(sdsempty(),
1849 "Can't open the log file: %s", strerror(errno
));
1854 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1855 server
.dbnum
= atoi(argv
[1]);
1856 if (server
.dbnum
< 1) {
1857 err
= "Invalid number of databases"; goto loaderr
;
1859 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1860 loadServerConfig(argv
[1]);
1861 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1862 server
.maxclients
= atoi(argv
[1]);
1863 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1864 server
.maxmemory
= memtoll(argv
[1],NULL
);
1865 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1866 server
.masterhost
= sdsnew(argv
[1]);
1867 server
.masterport
= atoi(argv
[2]);
1868 server
.replstate
= REDIS_REPL_CONNECT
;
1869 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1870 server
.masterauth
= zstrdup(argv
[1]);
1871 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1872 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1873 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1875 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1876 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1877 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1879 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1880 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1881 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1883 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1884 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1885 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1887 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1888 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1889 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1891 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1892 if (!strcasecmp(argv
[1],"no")) {
1893 server
.appendfsync
= APPENDFSYNC_NO
;
1894 } else if (!strcasecmp(argv
[1],"always")) {
1895 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1896 } else if (!strcasecmp(argv
[1],"everysec")) {
1897 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1899 err
= "argument must be 'no', 'always' or 'everysec'";
1902 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1903 server
.requirepass
= zstrdup(argv
[1]);
1904 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1905 zfree(server
.pidfile
);
1906 server
.pidfile
= zstrdup(argv
[1]);
1907 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1908 zfree(server
.dbfilename
);
1909 server
.dbfilename
= zstrdup(argv
[1]);
1910 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1911 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1912 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1914 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1915 zfree(server
.vm_swap_file
);
1916 server
.vm_swap_file
= zstrdup(argv
[1]);
1917 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1918 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1919 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1920 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1921 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1922 server
.vm_pages
= memtoll(argv
[1], NULL
);
1923 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1924 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1925 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1926 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1927 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1928 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1930 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1932 for (j
= 0; j
< argc
; j
++)
1937 if (fp
!= stdin
) fclose(fp
);
1941 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1942 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1943 fprintf(stderr
, ">>> '%s'\n", line
);
1944 fprintf(stderr
, "%s\n", err
);
1948 static void freeClientArgv(redisClient
*c
) {
1951 for (j
= 0; j
< c
->argc
; j
++)
1952 decrRefCount(c
->argv
[j
]);
1953 for (j
= 0; j
< c
->mbargc
; j
++)
1954 decrRefCount(c
->mbargv
[j
]);
1959 static void freeClient(redisClient
*c
) {
1962 /* Note that if the client we are freeing is blocked into a blocking
1963 * call, we have to set querybuf to NULL *before* to call
1964 * unblockClientWaitingData() to avoid processInputBuffer() will get
1965 * called. Also it is important to remove the file events after
1966 * this, because this call adds the READABLE event. */
1967 sdsfree(c
->querybuf
);
1969 if (c
->flags
& REDIS_BLOCKED
)
1970 unblockClientWaitingData(c
);
1972 /* Unsubscribe from all the pubsub channels */
1973 pubsubUnsubscribeAllChannels(c
,0);
1974 pubsubUnsubscribeAllPatterns(c
,0);
1975 dictRelease(c
->pubsub_channels
);
1976 listRelease(c
->pubsub_patterns
);
1977 /* Obvious cleanup */
1978 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1979 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1980 listRelease(c
->reply
);
1983 /* Remove from the list of clients */
1984 ln
= listSearchKey(server
.clients
,c
);
1985 redisAssert(ln
!= NULL
);
1986 listDelNode(server
.clients
,ln
);
1987 /* Remove from the list of clients waiting for swapped keys */
1988 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1989 ln
= listSearchKey(server
.io_ready_clients
,c
);
1991 listDelNode(server
.io_ready_clients
,ln
);
1992 server
.vm_blocked_clients
--;
1995 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1996 ln
= listFirst(c
->io_keys
);
1997 dontWaitForSwappedKey(c
,ln
->value
);
1999 listRelease(c
->io_keys
);
2000 /* Master/slave cleanup */
2001 if (c
->flags
& REDIS_SLAVE
) {
2002 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2004 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2005 ln
= listSearchKey(l
,c
);
2006 redisAssert(ln
!= NULL
);
2009 if (c
->flags
& REDIS_MASTER
) {
2010 server
.master
= NULL
;
2011 server
.replstate
= REDIS_REPL_CONNECT
;
2013 /* Release memory */
2016 freeClientMultiState(c
);
2020 #define GLUEREPLY_UP_TO (1024)
2021 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2023 char buf
[GLUEREPLY_UP_TO
];
2028 listRewind(c
->reply
,&li
);
2029 while((ln
= listNext(&li
))) {
2033 objlen
= sdslen(o
->ptr
);
2034 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2035 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2037 listDelNode(c
->reply
,ln
);
2039 if (copylen
== 0) return;
2043 /* Now the output buffer is empty, add the new single element */
2044 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2045 listAddNodeHead(c
->reply
,o
);
2048 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2049 redisClient
*c
= privdata
;
2050 int nwritten
= 0, totwritten
= 0, objlen
;
2053 REDIS_NOTUSED(mask
);
2055 /* Use writev() if we have enough buffers to send */
2056 if (!server
.glueoutputbuf
&&
2057 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2058 !(c
->flags
& REDIS_MASTER
))
2060 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2064 while(listLength(c
->reply
)) {
2065 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2066 glueReplyBuffersIfNeeded(c
);
2068 o
= listNodeValue(listFirst(c
->reply
));
2069 objlen
= sdslen(o
->ptr
);
2072 listDelNode(c
->reply
,listFirst(c
->reply
));
2076 if (c
->flags
& REDIS_MASTER
) {
2077 /* Don't reply to a master */
2078 nwritten
= objlen
- c
->sentlen
;
2080 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2081 if (nwritten
<= 0) break;
2083 c
->sentlen
+= nwritten
;
2084 totwritten
+= nwritten
;
2085 /* If we fully sent the object on head go to the next one */
2086 if (c
->sentlen
== objlen
) {
2087 listDelNode(c
->reply
,listFirst(c
->reply
));
2090 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2091 * bytes, in a single threaded server it's a good idea to serve
2092 * other clients as well, even if a very large request comes from
2093 * super fast link that is always able to accept data (in real world
2094 * scenario think about 'KEYS *' against the loopback interfae) */
2095 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2097 if (nwritten
== -1) {
2098 if (errno
== EAGAIN
) {
2101 redisLog(REDIS_VERBOSE
,
2102 "Error writing to client: %s", strerror(errno
));
2107 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2108 if (listLength(c
->reply
) == 0) {
2110 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2114 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2116 redisClient
*c
= privdata
;
2117 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2119 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2120 int offset
, ion
= 0;
2122 REDIS_NOTUSED(mask
);
2125 while (listLength(c
->reply
)) {
2126 offset
= c
->sentlen
;
2130 /* fill-in the iov[] array */
2131 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2132 o
= listNodeValue(node
);
2133 objlen
= sdslen(o
->ptr
);
2135 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2138 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2139 break; /* no more iovecs */
2141 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2142 iov
[ion
].iov_len
= objlen
- offset
;
2143 willwrite
+= objlen
- offset
;
2144 offset
= 0; /* just for the first item */
2151 /* write all collected blocks at once */
2152 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2153 if (errno
!= EAGAIN
) {
2154 redisLog(REDIS_VERBOSE
,
2155 "Error writing to client: %s", strerror(errno
));
2162 totwritten
+= nwritten
;
2163 offset
= c
->sentlen
;
2165 /* remove written robjs from c->reply */
2166 while (nwritten
&& listLength(c
->reply
)) {
2167 o
= listNodeValue(listFirst(c
->reply
));
2168 objlen
= sdslen(o
->ptr
);
2170 if(nwritten
>= objlen
- offset
) {
2171 listDelNode(c
->reply
, listFirst(c
->reply
));
2172 nwritten
-= objlen
- offset
;
2176 c
->sentlen
+= nwritten
;
2184 c
->lastinteraction
= time(NULL
);
2186 if (listLength(c
->reply
) == 0) {
2188 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2192 static struct redisCommand
*lookupCommand(char *name
) {
2194 while(cmdTable
[j
].name
!= NULL
) {
2195 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2201 /* resetClient prepare the client to process the next command */
2202 static void resetClient(redisClient
*c
) {
2208 /* Call() is the core of Redis execution of a command */
2209 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2212 dirty
= server
.dirty
;
2214 dirty
= server
.dirty
-dirty
;
2216 if (server
.appendonly
&& dirty
)
2217 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2218 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2219 listLength(server
.slaves
))
2220 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2221 if (listLength(server
.monitors
))
2222 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2223 server
.stat_numcommands
++;
2226 /* If this function gets called we already read a whole
2227 * command, argments are in the client argv/argc fields.
2228 * processCommand() execute the command or prepare the
2229 * server for a bulk read from the client.
2231 * If 1 is returned the client is still alive and valid and
2232 * and other operations can be performed by the caller. Otherwise
2233 * if 0 is returned the client was destroied (i.e. after QUIT). */
2234 static int processCommand(redisClient
*c
) {
2235 struct redisCommand
*cmd
;
2237 /* Free some memory if needed (maxmemory setting) */
2238 if (server
.maxmemory
) freeMemoryIfNeeded();
2240 /* Handle the multi bulk command type. This is an alternative protocol
2241 * supported by Redis in order to receive commands that are composed of
2242 * multiple binary-safe "bulk" arguments. The latency of processing is
2243 * a bit higher but this allows things like multi-sets, so if this
2244 * protocol is used only for MSET and similar commands this is a big win. */
2245 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2246 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2247 if (c
->multibulk
<= 0) {
2251 decrRefCount(c
->argv
[c
->argc
-1]);
2255 } else if (c
->multibulk
) {
2256 if (c
->bulklen
== -1) {
2257 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2258 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2262 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2263 decrRefCount(c
->argv
[0]);
2264 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2266 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2271 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2275 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2276 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2280 if (c
->multibulk
== 0) {
2284 /* Here we need to swap the multi-bulk argc/argv with the
2285 * normal argc/argv of the client structure. */
2287 c
->argv
= c
->mbargv
;
2288 c
->mbargv
= auxargv
;
2291 c
->argc
= c
->mbargc
;
2292 c
->mbargc
= auxargc
;
2294 /* We need to set bulklen to something different than -1
2295 * in order for the code below to process the command without
2296 * to try to read the last argument of a bulk command as
2297 * a special argument. */
2299 /* continue below and process the command */
2306 /* -- end of multi bulk commands processing -- */
2308 /* The QUIT command is handled as a special case. Normal command
2309 * procs are unable to close the client connection safely */
2310 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2315 /* Now lookup the command and check ASAP about trivial error conditions
2316 * such wrong arity, bad command name and so forth. */
2317 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2320 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2321 (char*)c
->argv
[0]->ptr
));
2324 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2325 (c
->argc
< -cmd
->arity
)) {
2327 sdscatprintf(sdsempty(),
2328 "-ERR wrong number of arguments for '%s' command\r\n",
2332 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2333 /* This is a bulk command, we have to read the last argument yet. */
2334 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2336 decrRefCount(c
->argv
[c
->argc
-1]);
2337 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2339 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2344 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2345 /* It is possible that the bulk read is already in the
2346 * buffer. Check this condition and handle it accordingly.
2347 * This is just a fast path, alternative to call processInputBuffer().
2348 * It's a good idea since the code is small and this condition
2349 * happens most of the times. */
2350 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2351 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2353 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2355 /* Otherwise return... there is to read the last argument
2356 * from the socket. */
2360 /* Let's try to encode the bulk object to save space. */
2361 if (cmd
->flags
& REDIS_CMD_BULK
)
2362 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2364 /* Check if the user is authenticated */
2365 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2366 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2371 /* Handle the maxmemory directive */
2372 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2373 zmalloc_used_memory() > server
.maxmemory
)
2375 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2380 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2381 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2383 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2384 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2385 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2390 /* Exec the command */
2391 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2392 queueMultiCommand(c
,cmd
);
2393 addReply(c
,shared
.queued
);
2395 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2396 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2400 /* Prepare the client for the next command */
2405 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2410 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2411 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2412 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2413 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2416 if (argc
<= REDIS_STATIC_ARGS
) {
2419 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2422 lenobj
= createObject(REDIS_STRING
,
2423 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2424 lenobj
->refcount
= 0;
2425 outv
[outc
++] = lenobj
;
2426 for (j
= 0; j
< argc
; j
++) {
2427 lenobj
= createObject(REDIS_STRING
,
2428 sdscatprintf(sdsempty(),"$%lu\r\n",
2429 (unsigned long) stringObjectLen(argv
[j
])));
2430 lenobj
->refcount
= 0;
2431 outv
[outc
++] = lenobj
;
2432 outv
[outc
++] = argv
[j
];
2433 outv
[outc
++] = shared
.crlf
;
2436 /* Increment all the refcounts at start and decrement at end in order to
2437 * be sure to free objects if there is no slave in a replication state
2438 * able to be feed with commands */
2439 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2440 listRewind(slaves
,&li
);
2441 while((ln
= listNext(&li
))) {
2442 redisClient
*slave
= ln
->value
;
2444 /* Don't feed slaves that are still waiting for BGSAVE to start */
2445 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2447 /* Feed all the other slaves, MONITORs and so on */
2448 if (slave
->slaveseldb
!= dictid
) {
2452 case 0: selectcmd
= shared
.select0
; break;
2453 case 1: selectcmd
= shared
.select1
; break;
2454 case 2: selectcmd
= shared
.select2
; break;
2455 case 3: selectcmd
= shared
.select3
; break;
2456 case 4: selectcmd
= shared
.select4
; break;
2457 case 5: selectcmd
= shared
.select5
; break;
2458 case 6: selectcmd
= shared
.select6
; break;
2459 case 7: selectcmd
= shared
.select7
; break;
2460 case 8: selectcmd
= shared
.select8
; break;
2461 case 9: selectcmd
= shared
.select9
; break;
2463 selectcmd
= createObject(REDIS_STRING
,
2464 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2465 selectcmd
->refcount
= 0;
2468 addReply(slave
,selectcmd
);
2469 slave
->slaveseldb
= dictid
;
2471 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2473 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2474 if (outv
!= static_outv
) zfree(outv
);
2477 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2478 s
= sdscatlen(s
,"\"",1);
2483 s
= sdscatprintf(s
,"\\%c",*p
);
2485 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2486 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2487 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2488 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2489 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2492 s
= sdscatprintf(s
,"%c",*p
);
2494 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2499 return sdscatlen(s
,"\"",1);
2502 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2506 sds cmdrepr
= sdsnew("+");
2510 gettimeofday(&tv
,NULL
);
2511 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2512 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2514 for (j
= 0; j
< argc
; j
++) {
2515 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2516 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2518 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2519 sdslen(argv
[j
]->ptr
));
2522 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2524 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2525 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2527 listRewind(monitors
,&li
);
2528 while((ln
= listNext(&li
))) {
2529 redisClient
*monitor
= ln
->value
;
2530 addReply(monitor
,cmdobj
);
2532 decrRefCount(cmdobj
);
2535 static void processInputBuffer(redisClient
*c
) {
2537 /* Before to process the input buffer, make sure the client is not
2538 * waitig for a blocking operation such as BLPOP. Note that the first
2539 * iteration the client is never blocked, otherwise the processInputBuffer
2540 * would not be called at all, but after the execution of the first commands
2541 * in the input buffer the client may be blocked, and the "goto again"
2542 * will try to reiterate. The following line will make it return asap. */
2543 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2544 if (c
->bulklen
== -1) {
2545 /* Read the first line of the query */
2546 char *p
= strchr(c
->querybuf
,'\n');
2553 query
= c
->querybuf
;
2554 c
->querybuf
= sdsempty();
2555 querylen
= 1+(p
-(query
));
2556 if (sdslen(query
) > querylen
) {
2557 /* leave data after the first line of the query in the buffer */
2558 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2560 *p
= '\0'; /* remove "\n" */
2561 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2562 sdsupdatelen(query
);
2564 /* Now we can split the query in arguments */
2565 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2568 if (c
->argv
) zfree(c
->argv
);
2569 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2571 for (j
= 0; j
< argc
; j
++) {
2572 if (sdslen(argv
[j
])) {
2573 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2581 /* Execute the command. If the client is still valid
2582 * after processCommand() return and there is something
2583 * on the query buffer try to process the next command. */
2584 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2586 /* Nothing to process, argc == 0. Just process the query
2587 * buffer if it's not empty or return to the caller */
2588 if (sdslen(c
->querybuf
)) goto again
;
2591 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2592 redisLog(REDIS_VERBOSE
, "Client protocol error");
2597 /* Bulk read handling. Note that if we are at this point
2598 the client already sent a command terminated with a newline,
2599 we are reading the bulk data that is actually the last
2600 argument of the command. */
2601 int qbl
= sdslen(c
->querybuf
);
2603 if (c
->bulklen
<= qbl
) {
2604 /* Copy everything but the final CRLF as final argument */
2605 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2607 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2608 /* Process the command. If the client is still valid after
2609 * the processing and there is more data in the buffer
2610 * try to parse it. */
2611 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2617 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2618 redisClient
*c
= (redisClient
*) privdata
;
2619 char buf
[REDIS_IOBUF_LEN
];
2622 REDIS_NOTUSED(mask
);
2624 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2626 if (errno
== EAGAIN
) {
2629 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2633 } else if (nread
== 0) {
2634 redisLog(REDIS_VERBOSE
, "Client closed connection");
2639 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2640 c
->lastinteraction
= time(NULL
);
2644 processInputBuffer(c
);
2647 static int selectDb(redisClient
*c
, int id
) {
2648 if (id
< 0 || id
>= server
.dbnum
)
2650 c
->db
= &server
.db
[id
];
2654 static void *dupClientReplyValue(void *o
) {
2655 incrRefCount((robj
*)o
);
2659 static int listMatchObjects(void *a
, void *b
) {
2660 return equalStringObjects(a
,b
);
2663 static redisClient
*createClient(int fd
) {
2664 redisClient
*c
= zmalloc(sizeof(*c
));
2666 anetNonBlock(NULL
,fd
);
2667 anetTcpNoDelay(NULL
,fd
);
2668 if (!c
) return NULL
;
2671 c
->querybuf
= sdsempty();
2680 c
->lastinteraction
= time(NULL
);
2681 c
->authenticated
= 0;
2682 c
->replstate
= REDIS_REPL_NONE
;
2683 c
->reply
= listCreate();
2684 listSetFreeMethod(c
->reply
,decrRefCount
);
2685 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2686 c
->blockingkeys
= NULL
;
2687 c
->blockingkeysnum
= 0;
2688 c
->io_keys
= listCreate();
2689 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2690 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2691 c
->pubsub_patterns
= listCreate();
2692 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2693 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2694 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2695 readQueryFromClient
, c
) == AE_ERR
) {
2699 listAddNodeTail(server
.clients
,c
);
2700 initClientMultiState(c
);
2704 static void addReply(redisClient
*c
, robj
*obj
) {
2705 if (listLength(c
->reply
) == 0 &&
2706 (c
->replstate
== REDIS_REPL_NONE
||
2707 c
->replstate
== REDIS_REPL_ONLINE
) &&
2708 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2709 sendReplyToClient
, c
) == AE_ERR
) return;
2711 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2712 obj
= dupStringObject(obj
);
2713 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2715 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2718 static void addReplySds(redisClient
*c
, sds s
) {
2719 robj
*o
= createObject(REDIS_STRING
,s
);
2724 static void addReplyDouble(redisClient
*c
, double d
) {
2727 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2728 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2729 (unsigned long) strlen(buf
),buf
));
2732 static void addReplyLong(redisClient
*c
, long l
) {
2737 addReply(c
,shared
.czero
);
2739 } else if (l
== 1) {
2740 addReply(c
,shared
.cone
);
2743 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2744 addReplySds(c
,sdsnewlen(buf
,len
));
2747 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2752 addReply(c
,shared
.czero
);
2754 } else if (ll
== 1) {
2755 addReply(c
,shared
.cone
);
2758 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2759 addReplySds(c
,sdsnewlen(buf
,len
));
2762 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2767 addReply(c
,shared
.czero
);
2769 } else if (ul
== 1) {
2770 addReply(c
,shared
.cone
);
2773 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2774 addReplySds(c
,sdsnewlen(buf
,len
));
2777 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2780 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2781 len
= sdslen(obj
->ptr
);
2783 long n
= (long)obj
->ptr
;
2785 /* Compute how many bytes will take this integer as a radix 10 string */
2791 while((n
= n
/10) != 0) {
2795 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2798 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2799 addReplyBulkLen(c
,obj
);
2801 addReply(c
,shared
.crlf
);
2804 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2805 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2807 addReply(c
,shared
.nullbulk
);
2809 robj
*o
= createStringObject(s
,strlen(s
));
2815 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2820 REDIS_NOTUSED(mask
);
2821 REDIS_NOTUSED(privdata
);
2823 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2824 if (cfd
== AE_ERR
) {
2825 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2828 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2829 if ((c
= createClient(cfd
)) == NULL
) {
2830 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2831 close(cfd
); /* May be already closed, just ingore errors */
2834 /* If maxclient directive is set and this is one client more... close the
2835 * connection. Note that we create the client instead to check before
2836 * for this condition, since now the socket is already set in nonblocking
2837 * mode and we can send an error for free using the Kernel I/O */
2838 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2839 char *err
= "-ERR max number of clients reached\r\n";
2841 /* That's a best effort error message, don't check write errors */
2842 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2843 /* Nothing to do, Just to avoid the warning... */
2848 server
.stat_numconnections
++;
2851 /* ======================= Redis objects implementation ===================== */
2853 static robj
*createObject(int type
, void *ptr
) {
2856 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2857 if (listLength(server
.objfreelist
)) {
2858 listNode
*head
= listFirst(server
.objfreelist
);
2859 o
= listNodeValue(head
);
2860 listDelNode(server
.objfreelist
,head
);
2861 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2863 if (server
.vm_enabled
) {
2864 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2865 o
= zmalloc(sizeof(*o
));
2867 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2871 o
->encoding
= REDIS_ENCODING_RAW
;
2874 if (server
.vm_enabled
) {
2875 /* Note that this code may run in the context of an I/O thread
2876 * and accessing to server.unixtime in theory is an error
2877 * (no locks). But in practice this is safe, and even if we read
2878 * garbage Redis will not fail, as it's just a statistical info */
2879 o
->vm
.atime
= server
.unixtime
;
2880 o
->storage
= REDIS_VM_MEMORY
;
2885 static robj
*createStringObject(char *ptr
, size_t len
) {
2886 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2889 static robj
*createStringObjectFromLongLong(long long value
) {
2891 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2892 incrRefCount(shared
.integers
[value
]);
2893 o
= shared
.integers
[value
];
2895 o
= createObject(REDIS_STRING
, NULL
);
2896 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2897 o
->encoding
= REDIS_ENCODING_INT
;
2898 o
->ptr
= (void*)((long)value
);
2900 o
->ptr
= sdscatprintf(sdsempty(),"%lld",value
);
2906 static robj
*dupStringObject(robj
*o
) {
2907 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2908 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2911 static robj
*createListObject(void) {
2912 list
*l
= listCreate();
2914 listSetFreeMethod(l
,decrRefCount
);
2915 return createObject(REDIS_LIST
,l
);
2918 static robj
*createSetObject(void) {
2919 dict
*d
= dictCreate(&setDictType
,NULL
);
2920 return createObject(REDIS_SET
,d
);
2923 static robj
*createHashObject(void) {
2924 /* All the Hashes start as zipmaps. Will be automatically converted
2925 * into hash tables if there are enough elements or big elements
2927 unsigned char *zm
= zipmapNew();
2928 robj
*o
= createObject(REDIS_HASH
,zm
);
2929 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2933 static robj
*createZsetObject(void) {
2934 zset
*zs
= zmalloc(sizeof(*zs
));
2936 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2937 zs
->zsl
= zslCreate();
2938 return createObject(REDIS_ZSET
,zs
);
2941 static void freeStringObject(robj
*o
) {
2942 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2947 static void freeListObject(robj
*o
) {
2948 listRelease((list
*) o
->ptr
);
2951 static void freeSetObject(robj
*o
) {
2952 dictRelease((dict
*) o
->ptr
);
2955 static void freeZsetObject(robj
*o
) {
2958 dictRelease(zs
->dict
);
2963 static void freeHashObject(robj
*o
) {
2964 switch (o
->encoding
) {
2965 case REDIS_ENCODING_HT
:
2966 dictRelease((dict
*) o
->ptr
);
2968 case REDIS_ENCODING_ZIPMAP
:
2972 redisPanic("Unknown hash encoding type");
2977 static void incrRefCount(robj
*o
) {
2981 static void decrRefCount(void *obj
) {
2984 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
2985 /* Object is a key of a swapped out value, or in the process of being
2987 if (server
.vm_enabled
&&
2988 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2990 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2991 redisAssert(o
->type
== REDIS_STRING
);
2992 freeStringObject(o
);
2993 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2994 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2995 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2996 !listAddNodeHead(server
.objfreelist
,o
))
2998 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2999 server
.vm_stats_swapped_objects
--;
3002 /* Object is in memory, or in the process of being swapped out. */
3003 if (--(o
->refcount
) == 0) {
3004 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3005 vmCancelThreadedIOJob(obj
);
3007 case REDIS_STRING
: freeStringObject(o
); break;
3008 case REDIS_LIST
: freeListObject(o
); break;
3009 case REDIS_SET
: freeSetObject(o
); break;
3010 case REDIS_ZSET
: freeZsetObject(o
); break;
3011 case REDIS_HASH
: freeHashObject(o
); break;
3012 default: redisPanic("Unknown object type"); break;
3014 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3015 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3016 !listAddNodeHead(server
.objfreelist
,o
))
3018 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3022 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3023 dictEntry
*de
= dictFind(db
->dict
,key
);
3025 robj
*key
= dictGetEntryKey(de
);
3026 robj
*val
= dictGetEntryVal(de
);
3028 if (server
.vm_enabled
) {
3029 if (key
->storage
== REDIS_VM_MEMORY
||
3030 key
->storage
== REDIS_VM_SWAPPING
)
3032 /* If we were swapping the object out, stop it, this key
3034 if (key
->storage
== REDIS_VM_SWAPPING
)
3035 vmCancelThreadedIOJob(key
);
3036 /* Update the access time of the key for the aging algorithm. */
3037 key
->vm
.atime
= server
.unixtime
;
3039 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3041 /* Our value was swapped on disk. Bring it at home. */
3042 redisAssert(val
== NULL
);
3043 val
= vmLoadObject(key
);
3044 dictGetEntryVal(de
) = val
;
3046 /* Clients blocked by the VM subsystem may be waiting for
3048 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3057 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3058 expireIfNeeded(db
,key
);
3059 return lookupKey(db
,key
);
3062 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3063 deleteIfVolatile(db
,key
);
3064 return lookupKey(db
,key
);
3067 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3068 robj
*o
= lookupKeyRead(c
->db
, key
);
3069 if (!o
) addReply(c
,reply
);
3073 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3074 robj
*o
= lookupKeyWrite(c
->db
, key
);
3075 if (!o
) addReply(c
,reply
);
3079 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3080 if (o
->type
!= type
) {
3081 addReply(c
,shared
.wrongtypeerr
);
3087 static int deleteKey(redisDb
*db
, robj
*key
) {
3090 /* We need to protect key from destruction: after the first dictDelete()
3091 * it may happen that 'key' is no longer valid if we don't increment
3092 * it's count. This may happen when we get the object reference directly
3093 * from the hash table with dictRandomKey() or dict iterators */
3095 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3096 retval
= dictDelete(db
->dict
,key
);
3099 return retval
== DICT_OK
;
3102 /* Check if the nul-terminated string 's' can be represented by a long
3103 * (that is, is a number that fits into long without any other space or
3104 * character before or after the digits).
3106 * If so, the function returns REDIS_OK and *longval is set to the value
3107 * of the number. Otherwise REDIS_ERR is returned */
3108 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3109 char buf
[32], *endptr
;
3113 value
= strtol(s
, &endptr
, 10);
3114 if (endptr
[0] != '\0') return REDIS_ERR
;
3115 slen
= snprintf(buf
,32,"%ld",value
);
3117 /* If the number converted back into a string is not identical
3118 * then it's not possible to encode the string as integer */
3119 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3120 if (longval
) *longval
= value
;
3124 /* Try to encode a string object in order to save space */
3125 static robj
*tryObjectEncoding(robj
*o
) {
3129 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3130 return o
; /* Already encoded */
3132 /* It's not safe to encode shared objects: shared objects can be shared
3133 * everywhere in the "object space" of Redis. Encoded objects can only
3134 * appear as "values" (and not, for instance, as keys) */
3135 if (o
->refcount
> 1) return o
;
3137 /* Currently we try to encode only strings */
3138 redisAssert(o
->type
== REDIS_STRING
);
3140 /* Check if we can represent this string as a long integer */
3141 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3143 /* Ok, this object can be encoded */
3144 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3146 incrRefCount(shared
.integers
[value
]);
3147 return shared
.integers
[value
];
3149 o
->encoding
= REDIS_ENCODING_INT
;
3151 o
->ptr
= (void*) value
;
3156 /* Get a decoded version of an encoded object (returned as a new object).
3157 * If the object is already raw-encoded just increment the ref count. */
3158 static robj
*getDecodedObject(robj
*o
) {
3161 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3165 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3168 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3169 dec
= createStringObject(buf
,strlen(buf
));
3172 redisPanic("Unknown encoding type");
3176 /* Compare two string objects via strcmp() or alike.
3177 * Note that the objects may be integer-encoded. In such a case we
3178 * use snprintf() to get a string representation of the numbers on the stack
3179 * and compare the strings, it's much faster than calling getDecodedObject().
3181 * Important note: if objects are not integer encoded, but binary-safe strings,
3182 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3184 static int compareStringObjects(robj
*a
, robj
*b
) {
3185 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3186 char bufa
[128], bufb
[128], *astr
, *bstr
;
3189 if (a
== b
) return 0;
3190 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3191 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3197 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3198 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3204 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3207 /* Equal string objects return 1 if the two objects are the same from the
3208 * point of view of a string comparison, otherwise 0 is returned. Note that
3209 * this function is faster then checking for (compareStringObject(a,b) == 0)
3210 * because it can perform some more optimization. */
3211 static int equalStringObjects(robj
*a
, robj
*b
) {
3212 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3213 return a
->ptr
== b
->ptr
;
3215 return compareStringObjects(a
,b
) == 0;
3219 static size_t stringObjectLen(robj
*o
) {
3220 redisAssert(o
->type
== REDIS_STRING
);
3221 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3222 return sdslen(o
->ptr
);
3226 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3230 static int getDoubleFromObject(robj
*o
, double *target
) {
3237 redisAssert(o
->type
== REDIS_STRING
);
3238 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3239 value
= strtod(o
->ptr
, &eptr
);
3240 if (eptr
[0] != '\0') return REDIS_ERR
;
3241 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3242 value
= (long)o
->ptr
;
3244 redisPanic("Unknown string encoding");
3252 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3254 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3256 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3258 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3267 static int getLongLongFromObject(robj
*o
, long long *target
) {
3274 redisAssert(o
->type
== REDIS_STRING
);
3275 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3276 value
= strtoll(o
->ptr
, &eptr
, 10);
3277 if (eptr
[0] != '\0') return REDIS_ERR
;
3278 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3279 value
= (long)o
->ptr
;
3281 redisPanic("Unknown string encoding");
3289 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3291 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3293 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3295 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3304 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3307 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3308 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3310 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3312 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3321 /*============================ RDB saving/loading =========================== */
3323 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3324 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3328 static int rdbSaveTime(FILE *fp
, time_t t
) {
3329 int32_t t32
= (int32_t) t
;
3330 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3334 /* check rdbLoadLen() comments for more info */
3335 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3336 unsigned char buf
[2];
3339 /* Save a 6 bit len */
3340 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3341 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3342 } else if (len
< (1<<14)) {
3343 /* Save a 14 bit len */
3344 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3346 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3348 /* Save a 32 bit len */
3349 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3350 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3352 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3357 /* String objects in the form "2391" "-100" without any space and with a
3358 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3359 * encoded as integers to save space */
3360 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3362 char *endptr
, buf
[32];
3364 /* Check if it's possible to encode this value as a number */
3365 value
= strtoll(s
, &endptr
, 10);
3366 if (endptr
[0] != '\0') return 0;
3367 snprintf(buf
,32,"%lld",value
);
3369 /* If the number converted back into a string is not identical
3370 * then it's not possible to encode the string as integer */
3371 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3373 /* Finally check if it fits in our ranges */
3374 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3375 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3376 enc
[1] = value
&0xFF;
3378 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3379 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3380 enc
[1] = value
&0xFF;
3381 enc
[2] = (value
>>8)&0xFF;
3383 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3384 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3385 enc
[1] = value
&0xFF;
3386 enc
[2] = (value
>>8)&0xFF;
3387 enc
[3] = (value
>>16)&0xFF;
3388 enc
[4] = (value
>>24)&0xFF;
3395 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3396 size_t comprlen
, outlen
;
3400 /* We require at least four bytes compression for this to be worth it */
3401 if (len
<= 4) return 0;
3403 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3404 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3405 if (comprlen
== 0) {
3409 /* Data compressed! Let's save it on disk */
3410 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3411 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3412 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3413 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3414 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3423 /* Save a string objet as [len][data] on disk. If the object is a string
3424 * representation of an integer value we try to safe it in a special form */
3425 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3428 /* Try integer encoding */
3430 unsigned char buf
[5];
3431 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3432 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3437 /* Try LZF compression - under 20 bytes it's unable to compress even
3438 * aaaaaaaaaaaaaaaaaa so skip it */
3439 if (server
.rdbcompression
&& len
> 20) {
3442 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3443 if (retval
== -1) return -1;
3444 if (retval
> 0) return 0;
3445 /* retval == 0 means data can't be compressed, save the old way */
3448 /* Store verbatim */
3449 if (rdbSaveLen(fp
,len
) == -1) return -1;
3450 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3454 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3455 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3458 /* Avoid incr/decr ref count business when possible.
3459 * This plays well with copy-on-write given that we are probably
3460 * in a child process (BGSAVE). Also this makes sure key objects
3461 * of swapped objects are not incRefCount-ed (an assert does not allow
3462 * this in order to avoid bugs) */
3463 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3464 obj
= getDecodedObject(obj
);
3465 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3468 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3473 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3474 * 8 bit integer specifing the length of the representation.
3475 * This 8 bit integer has special values in order to specify the following
3481 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3482 unsigned char buf
[128];
3488 } else if (!isfinite(val
)) {
3490 buf
[0] = (val
< 0) ? 255 : 254;
3492 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3493 buf
[0] = strlen((char*)buf
+1);
3496 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3500 /* Save a Redis object. */
3501 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3502 if (o
->type
== REDIS_STRING
) {
3503 /* Save a string value */
3504 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3505 } else if (o
->type
== REDIS_LIST
) {
3506 /* Save a list value */
3507 list
*list
= o
->ptr
;
3511 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3512 listRewind(list
,&li
);
3513 while((ln
= listNext(&li
))) {
3514 robj
*eleobj
= listNodeValue(ln
);
3516 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3518 } else if (o
->type
== REDIS_SET
) {
3519 /* Save a set value */
3521 dictIterator
*di
= dictGetIterator(set
);
3524 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3525 while((de
= dictNext(di
)) != NULL
) {
3526 robj
*eleobj
= dictGetEntryKey(de
);
3528 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3530 dictReleaseIterator(di
);
3531 } else if (o
->type
== REDIS_ZSET
) {
3532 /* Save a set value */
3534 dictIterator
*di
= dictGetIterator(zs
->dict
);
3537 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3538 while((de
= dictNext(di
)) != NULL
) {
3539 robj
*eleobj
= dictGetEntryKey(de
);
3540 double *score
= dictGetEntryVal(de
);
3542 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3543 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3545 dictReleaseIterator(di
);
3546 } else if (o
->type
== REDIS_HASH
) {
3547 /* Save a hash value */
3548 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3549 unsigned char *p
= zipmapRewind(o
->ptr
);
3550 unsigned int count
= zipmapLen(o
->ptr
);
3551 unsigned char *key
, *val
;
3552 unsigned int klen
, vlen
;
3554 if (rdbSaveLen(fp
,count
) == -1) return -1;
3555 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3556 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3557 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3560 dictIterator
*di
= dictGetIterator(o
->ptr
);
3563 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3564 while((de
= dictNext(di
)) != NULL
) {
3565 robj
*key
= dictGetEntryKey(de
);
3566 robj
*val
= dictGetEntryVal(de
);
3568 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3569 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3571 dictReleaseIterator(di
);
3574 redisPanic("Unknown object type");
3579 /* Return the length the object will have on disk if saved with
3580 * the rdbSaveObject() function. Currently we use a trick to get
3581 * this length with very little changes to the code. In the future
3582 * we could switch to a faster solution. */
3583 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3584 if (fp
== NULL
) fp
= server
.devnull
;
3586 assert(rdbSaveObject(fp
,o
) != 1);
3590 /* Return the number of pages required to save this object in the swap file */
3591 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3592 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3594 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3597 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3598 static int rdbSave(char *filename
) {
3599 dictIterator
*di
= NULL
;
3604 time_t now
= time(NULL
);
3606 /* Wait for I/O therads to terminate, just in case this is a
3607 * foreground-saving, to avoid seeking the swap file descriptor at the
3609 if (server
.vm_enabled
)
3610 waitEmptyIOJobsQueue();
3612 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3613 fp
= fopen(tmpfile
,"w");
3615 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3618 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3619 for (j
= 0; j
< server
.dbnum
; j
++) {
3620 redisDb
*db
= server
.db
+j
;
3622 if (dictSize(d
) == 0) continue;
3623 di
= dictGetIterator(d
);
3629 /* Write the SELECT DB opcode */
3630 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3631 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3633 /* Iterate this DB writing every entry */
3634 while((de
= dictNext(di
)) != NULL
) {
3635 robj
*key
= dictGetEntryKey(de
);
3636 robj
*o
= dictGetEntryVal(de
);
3637 time_t expiretime
= getExpire(db
,key
);
3639 /* Save the expire time */
3640 if (expiretime
!= -1) {
3641 /* If this key is already expired skip it */
3642 if (expiretime
< now
) continue;
3643 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3644 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3646 /* Save the key and associated value. This requires special
3647 * handling if the value is swapped out. */
3648 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3649 key
->storage
== REDIS_VM_SWAPPING
) {
3650 /* Save type, key, value */
3651 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3652 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3653 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3655 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3657 /* Get a preview of the object in memory */
3658 po
= vmPreviewObject(key
);
3659 /* Save type, key, value */
3660 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3661 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3662 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3663 /* Remove the loaded object from memory */
3667 dictReleaseIterator(di
);
3670 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3672 /* Make sure data will not remain on the OS's output buffers */
3677 /* Use RENAME to make sure the DB file is changed atomically only
3678 * if the generate DB file is ok. */
3679 if (rename(tmpfile
,filename
) == -1) {
3680 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3684 redisLog(REDIS_NOTICE
,"DB saved on disk");
3686 server
.lastsave
= time(NULL
);
3692 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3693 if (di
) dictReleaseIterator(di
);
3697 static int rdbSaveBackground(char *filename
) {
3700 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3701 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3702 if ((childpid
= fork()) == 0) {
3704 if (server
.vm_enabled
) vmReopenSwapFile();
3706 if (rdbSave(filename
) == REDIS_OK
) {
3713 if (childpid
== -1) {
3714 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3718 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3719 server
.bgsavechildpid
= childpid
;
3720 updateDictResizePolicy();
3723 return REDIS_OK
; /* unreached */
3726 static void rdbRemoveTempFile(pid_t childpid
) {
3729 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3733 static int rdbLoadType(FILE *fp
) {
3735 if (fread(&type
,1,1,fp
) == 0) return -1;
3739 static time_t rdbLoadTime(FILE *fp
) {
3741 if (fread(&t32
,4,1,fp
) == 0) return -1;
3742 return (time_t) t32
;
3745 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3746 * of this file for a description of how this are stored on disk.
3748 * isencoded is set to 1 if the readed length is not actually a length but
3749 * an "encoding type", check the above comments for more info */
3750 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3751 unsigned char buf
[2];
3755 if (isencoded
) *isencoded
= 0;
3756 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3757 type
= (buf
[0]&0xC0)>>6;
3758 if (type
== REDIS_RDB_6BITLEN
) {
3759 /* Read a 6 bit len */
3761 } else if (type
== REDIS_RDB_ENCVAL
) {
3762 /* Read a 6 bit len encoding type */
3763 if (isencoded
) *isencoded
= 1;
3765 } else if (type
== REDIS_RDB_14BITLEN
) {
3766 /* Read a 14 bit len */
3767 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3768 return ((buf
[0]&0x3F)<<8)|buf
[1];
3770 /* Read a 32 bit len */
3771 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3776 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3777 unsigned char enc
[4];
3780 if (enctype
== REDIS_RDB_ENC_INT8
) {
3781 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3782 val
= (signed char)enc
[0];
3783 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3785 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3786 v
= enc
[0]|(enc
[1]<<8);
3788 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3790 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3791 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3794 val
= 0; /* anti-warning */
3795 redisPanic("Unknown RDB integer encoding type");
3797 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3800 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3801 unsigned int len
, clen
;
3802 unsigned char *c
= NULL
;
3805 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3806 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3807 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3808 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3809 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3810 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3812 return createObject(REDIS_STRING
,val
);
3819 static robj
*rdbLoadStringObject(FILE*fp
) {
3824 len
= rdbLoadLen(fp
,&isencoded
);
3827 case REDIS_RDB_ENC_INT8
:
3828 case REDIS_RDB_ENC_INT16
:
3829 case REDIS_RDB_ENC_INT32
:
3830 return rdbLoadIntegerObject(fp
,len
);
3831 case REDIS_RDB_ENC_LZF
:
3832 return rdbLoadLzfStringObject(fp
);
3834 redisPanic("Unknown RDB encoding type");
3838 if (len
== REDIS_RDB_LENERR
) return NULL
;
3839 val
= sdsnewlen(NULL
,len
);
3840 if (len
&& fread(val
,len
,1,fp
) == 0) {
3844 return createObject(REDIS_STRING
,val
);
3847 /* For information about double serialization check rdbSaveDoubleValue() */
3848 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3852 if (fread(&len
,1,1,fp
) == 0) return -1;
3854 case 255: *val
= R_NegInf
; return 0;
3855 case 254: *val
= R_PosInf
; return 0;
3856 case 253: *val
= R_Nan
; return 0;
3858 if (fread(buf
,len
,1,fp
) == 0) return -1;
3860 sscanf(buf
, "%lg", val
);
3865 /* Load a Redis object of the specified type from the specified file.
3866 * On success a newly allocated object is returned, otherwise NULL. */
3867 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3870 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3871 if (type
== REDIS_STRING
) {
3872 /* Read string value */
3873 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3874 o
= tryObjectEncoding(o
);
3875 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3876 /* Read list/set value */
3879 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3880 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3881 /* It's faster to expand the dict to the right size asap in order
3882 * to avoid rehashing */
3883 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3884 dictExpand(o
->ptr
,listlen
);
3885 /* Load every single element of the list/set */
3889 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3890 ele
= tryObjectEncoding(ele
);
3891 if (type
== REDIS_LIST
) {
3892 listAddNodeTail((list
*)o
->ptr
,ele
);
3894 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3897 } else if (type
== REDIS_ZSET
) {
3898 /* Read list/set value */
3902 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3903 o
= createZsetObject();
3905 /* Load every single element of the list/set */
3908 double *score
= zmalloc(sizeof(double));
3910 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3911 ele
= tryObjectEncoding(ele
);
3912 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3913 dictAdd(zs
->dict
,ele
,score
);
3914 zslInsert(zs
->zsl
,*score
,ele
);
3915 incrRefCount(ele
); /* added to skiplist */
3917 } else if (type
== REDIS_HASH
) {
3920 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3921 o
= createHashObject();
3922 /* Too many entries? Use an hash table. */
3923 if (hashlen
> server
.hash_max_zipmap_entries
)
3924 convertToRealHash(o
);
3925 /* Load every key/value, then set it into the zipmap or hash
3926 * table, as needed. */
3930 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3931 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3932 /* If we are using a zipmap and there are too big values
3933 * the object is converted to real hash table encoding. */
3934 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3935 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3936 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3938 convertToRealHash(o
);
3941 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3942 unsigned char *zm
= o
->ptr
;
3944 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3945 val
->ptr
,sdslen(val
->ptr
),NULL
);
3950 key
= tryObjectEncoding(key
);
3951 val
= tryObjectEncoding(val
);
3952 dictAdd((dict
*)o
->ptr
,key
,val
);
3956 redisPanic("Unknown object type");
3961 static int rdbLoad(char *filename
) {
3964 int type
, retval
, rdbver
;
3965 int swap_all_values
= 0;
3966 dict
*d
= server
.db
[0].dict
;
3967 redisDb
*db
= server
.db
+0;
3969 time_t expiretime
, now
= time(NULL
);
3970 long long loadedkeys
= 0;
3972 fp
= fopen(filename
,"r");
3973 if (!fp
) return REDIS_ERR
;
3974 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3976 if (memcmp(buf
,"REDIS",5) != 0) {
3978 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3981 rdbver
= atoi(buf
+5);
3984 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3992 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3993 if (type
== REDIS_EXPIRETIME
) {
3994 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3995 /* We read the time so we need to read the object type again */
3996 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3998 if (type
== REDIS_EOF
) break;
3999 /* Handle SELECT DB opcode as a special case */
4000 if (type
== REDIS_SELECTDB
) {
4001 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4003 if (dbid
>= (unsigned)server
.dbnum
) {
4004 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4007 db
= server
.db
+dbid
;
4012 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4014 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4015 /* Check if the key already expired */
4016 if (expiretime
!= -1 && expiretime
< now
) {
4021 /* Add the new object in the hash table */
4022 retval
= dictAdd(d
,key
,val
);
4023 if (retval
== DICT_ERR
) {
4024 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4028 /* Set the expire time if needed */
4029 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4031 /* Handle swapping while loading big datasets when VM is on */
4033 /* If we detecter we are hopeless about fitting something in memory
4034 * we just swap every new key on disk. Directly...
4035 * Note that's important to check for this condition before resorting
4036 * to random sampling, otherwise we may try to swap already
4038 if (swap_all_values
) {
4039 dictEntry
*de
= dictFind(d
,key
);
4041 /* de may be NULL since the key already expired */
4043 key
= dictGetEntryKey(de
);
4044 val
= dictGetEntryVal(de
);
4046 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
4047 dictGetEntryVal(de
) = NULL
;
4053 /* If we have still some hope of having some value fitting memory
4054 * then we try random sampling. */
4055 if (!swap_all_values
&& server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4056 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4057 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4059 if (zmalloc_used_memory() > server
.vm_max_memory
)
4060 swap_all_values
= 1; /* We are already using too much mem */
4066 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4067 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4069 return REDIS_ERR
; /* Just to avoid warning */
4072 /*================================== Commands =============================== */
4074 static void authCommand(redisClient
*c
) {
4075 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4076 c
->authenticated
= 1;
4077 addReply(c
,shared
.ok
);
4079 c
->authenticated
= 0;
4080 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4084 static void pingCommand(redisClient
*c
) {
4085 addReply(c
,shared
.pong
);
4088 static void echoCommand(redisClient
*c
) {
4089 addReplyBulk(c
,c
->argv
[1]);
4092 /*=================================== Strings =============================== */
4094 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4096 long seconds
= 0; /* initialized to avoid an harmness warning */
4099 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4102 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4107 if (nx
) deleteIfVolatile(c
->db
,key
);
4108 retval
= dictAdd(c
->db
->dict
,key
,val
);
4109 if (retval
== DICT_ERR
) {
4111 /* If the key is about a swapped value, we want a new key object
4112 * to overwrite the old. So we delete the old key in the database.
4113 * This will also make sure that swap pages about the old object
4114 * will be marked as free. */
4115 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4117 dictReplace(c
->db
->dict
,key
,val
);
4120 addReply(c
,shared
.czero
);
4128 removeExpire(c
->db
,key
);
4129 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4130 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4133 static void setCommand(redisClient
*c
) {
4134 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4137 static void setnxCommand(redisClient
*c
) {
4138 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4141 static void setexCommand(redisClient
*c
) {
4142 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4145 static int getGenericCommand(redisClient
*c
) {
4148 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4151 if (o
->type
!= REDIS_STRING
) {
4152 addReply(c
,shared
.wrongtypeerr
);
4160 static void getCommand(redisClient
*c
) {
4161 getGenericCommand(c
);
4164 static void getsetCommand(redisClient
*c
) {
4165 if (getGenericCommand(c
) == REDIS_ERR
) return;
4166 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4167 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4169 incrRefCount(c
->argv
[1]);
4171 incrRefCount(c
->argv
[2]);
4173 removeExpire(c
->db
,c
->argv
[1]);
4176 static void mgetCommand(redisClient
*c
) {
4179 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4180 for (j
= 1; j
< c
->argc
; j
++) {
4181 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4183 addReply(c
,shared
.nullbulk
);
4185 if (o
->type
!= REDIS_STRING
) {
4186 addReply(c
,shared
.nullbulk
);
4194 static void msetGenericCommand(redisClient
*c
, int nx
) {
4195 int j
, busykeys
= 0;
4197 if ((c
->argc
% 2) == 0) {
4198 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4201 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4202 * set nothing at all if at least one already key exists. */
4204 for (j
= 1; j
< c
->argc
; j
+= 2) {
4205 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4211 addReply(c
, shared
.czero
);
4215 for (j
= 1; j
< c
->argc
; j
+= 2) {
4218 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4219 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4220 if (retval
== DICT_ERR
) {
4221 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4222 incrRefCount(c
->argv
[j
+1]);
4224 incrRefCount(c
->argv
[j
]);
4225 incrRefCount(c
->argv
[j
+1]);
4227 removeExpire(c
->db
,c
->argv
[j
]);
4229 server
.dirty
+= (c
->argc
-1)/2;
4230 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4233 static void msetCommand(redisClient
*c
) {
4234 msetGenericCommand(c
,0);
4237 static void msetnxCommand(redisClient
*c
) {
4238 msetGenericCommand(c
,1);
4241 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4246 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4248 if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return;
4251 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4252 o
= tryObjectEncoding(o
);
4253 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4254 if (retval
== DICT_ERR
) {
4255 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4256 removeExpire(c
->db
,c
->argv
[1]);
4258 incrRefCount(c
->argv
[1]);
4261 addReply(c
,shared
.colon
);
4263 addReply(c
,shared
.crlf
);
4266 static void incrCommand(redisClient
*c
) {
4267 incrDecrCommand(c
,1);
4270 static void decrCommand(redisClient
*c
) {
4271 incrDecrCommand(c
,-1);
4274 static void incrbyCommand(redisClient
*c
) {
4277 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4278 incrDecrCommand(c
,incr
);
4281 static void decrbyCommand(redisClient
*c
) {
4284 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4285 incrDecrCommand(c
,-incr
);
4288 static void appendCommand(redisClient
*c
) {
4293 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4295 /* Create the key */
4296 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4297 incrRefCount(c
->argv
[1]);
4298 incrRefCount(c
->argv
[2]);
4299 totlen
= stringObjectLen(c
->argv
[2]);
4303 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4306 o
= dictGetEntryVal(de
);
4307 if (o
->type
!= REDIS_STRING
) {
4308 addReply(c
,shared
.wrongtypeerr
);
4311 /* If the object is specially encoded or shared we have to make
4313 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4314 robj
*decoded
= getDecodedObject(o
);
4316 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4317 decrRefCount(decoded
);
4318 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4321 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4322 o
->ptr
= sdscatlen(o
->ptr
,
4323 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4325 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4326 (unsigned long) c
->argv
[2]->ptr
);
4328 totlen
= sdslen(o
->ptr
);
4331 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4334 static void substrCommand(redisClient
*c
) {
4336 long start
= atoi(c
->argv
[2]->ptr
);
4337 long end
= atoi(c
->argv
[3]->ptr
);
4338 size_t rangelen
, strlen
;
4341 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4342 checkType(c
,o
,REDIS_STRING
)) return;
4344 o
= getDecodedObject(o
);
4345 strlen
= sdslen(o
->ptr
);
4347 /* convert negative indexes */
4348 if (start
< 0) start
= strlen
+start
;
4349 if (end
< 0) end
= strlen
+end
;
4350 if (start
< 0) start
= 0;
4351 if (end
< 0) end
= 0;
4353 /* indexes sanity checks */
4354 if (start
> end
|| (size_t)start
>= strlen
) {
4355 /* Out of range start or start > end result in null reply */
4356 addReply(c
,shared
.nullbulk
);
4360 if ((size_t)end
>= strlen
) end
= strlen
-1;
4361 rangelen
= (end
-start
)+1;
4363 /* Return the result */
4364 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4365 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4366 addReplySds(c
,range
);
4367 addReply(c
,shared
.crlf
);
4371 /* ========================= Type agnostic commands ========================= */
4373 static void delCommand(redisClient
*c
) {
4376 for (j
= 1; j
< c
->argc
; j
++) {
4377 if (deleteKey(c
->db
,c
->argv
[j
])) {
4382 addReplyLong(c
,deleted
);
4385 static void existsCommand(redisClient
*c
) {
4386 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4389 static void selectCommand(redisClient
*c
) {
4390 int id
= atoi(c
->argv
[1]->ptr
);
4392 if (selectDb(c
,id
) == REDIS_ERR
) {
4393 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4395 addReply(c
,shared
.ok
);
4399 static void randomkeyCommand(redisClient
*c
) {
4404 de
= dictGetRandomKey(c
->db
->dict
);
4405 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4409 addReply(c
,shared
.nullbulk
);
4413 key
= dictGetEntryKey(de
);
4414 if (server
.vm_enabled
) {
4415 key
= dupStringObject(key
);
4416 addReplyBulk(c
,key
);
4419 addReplyBulk(c
,key
);
4423 static void keysCommand(redisClient
*c
) {
4426 sds pattern
= c
->argv
[1]->ptr
;
4427 int plen
= sdslen(pattern
);
4428 unsigned long numkeys
= 0;
4429 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4431 di
= dictGetIterator(c
->db
->dict
);
4433 decrRefCount(lenobj
);
4434 while((de
= dictNext(di
)) != NULL
) {
4435 robj
*keyobj
= dictGetEntryKey(de
);
4437 sds key
= keyobj
->ptr
;
4438 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4439 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4440 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4441 addReplyBulk(c
,keyobj
);
4446 dictReleaseIterator(di
);
4447 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4450 static void dbsizeCommand(redisClient
*c
) {
4452 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4455 static void lastsaveCommand(redisClient
*c
) {
4457 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4460 static void typeCommand(redisClient
*c
) {
4464 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4469 case REDIS_STRING
: type
= "+string"; break;
4470 case REDIS_LIST
: type
= "+list"; break;
4471 case REDIS_SET
: type
= "+set"; break;
4472 case REDIS_ZSET
: type
= "+zset"; break;
4473 case REDIS_HASH
: type
= "+hash"; break;
4474 default: type
= "+unknown"; break;
4477 addReplySds(c
,sdsnew(type
));
4478 addReply(c
,shared
.crlf
);
4481 static void saveCommand(redisClient
*c
) {
4482 if (server
.bgsavechildpid
!= -1) {
4483 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4486 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4487 addReply(c
,shared
.ok
);
4489 addReply(c
,shared
.err
);
4493 static void bgsaveCommand(redisClient
*c
) {
4494 if (server
.bgsavechildpid
!= -1) {
4495 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4498 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4499 char *status
= "+Background saving started\r\n";
4500 addReplySds(c
,sdsnew(status
));
4502 addReply(c
,shared
.err
);
4506 static void shutdownCommand(redisClient
*c
) {
4507 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4508 /* Kill the saving child if there is a background saving in progress.
4509 We want to avoid race conditions, for instance our saving child may
4510 overwrite the synchronous saving did by SHUTDOWN. */
4511 if (server
.bgsavechildpid
!= -1) {
4512 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4513 kill(server
.bgsavechildpid
,SIGKILL
);
4514 rdbRemoveTempFile(server
.bgsavechildpid
);
4516 if (server
.appendonly
) {
4517 /* Append only file: fsync() the AOF and exit */
4518 fsync(server
.appendfd
);
4519 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4522 /* Snapshotting. Perform a SYNC SAVE and exit */
4523 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4524 if (server
.daemonize
)
4525 unlink(server
.pidfile
);
4526 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4527 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4530 /* Ooops.. error saving! The best we can do is to continue
4531 * operating. Note that if there was a background saving process,
4532 * in the next cron() Redis will be notified that the background
4533 * saving aborted, handling special stuff like slaves pending for
4534 * synchronization... */
4535 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4537 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4542 static void renameGenericCommand(redisClient
*c
, int nx
) {
4545 /* To use the same key as src and dst is probably an error */
4546 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4547 addReply(c
,shared
.sameobjecterr
);
4551 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4555 deleteIfVolatile(c
->db
,c
->argv
[2]);
4556 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4559 addReply(c
,shared
.czero
);
4562 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4564 incrRefCount(c
->argv
[2]);
4566 deleteKey(c
->db
,c
->argv
[1]);
4568 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4571 static void renameCommand(redisClient
*c
) {
4572 renameGenericCommand(c
,0);
4575 static void renamenxCommand(redisClient
*c
) {
4576 renameGenericCommand(c
,1);
4579 static void moveCommand(redisClient
*c
) {
4584 /* Obtain source and target DB pointers */
4587 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4588 addReply(c
,shared
.outofrangeerr
);
4592 selectDb(c
,srcid
); /* Back to the source DB */
4594 /* If the user is moving using as target the same
4595 * DB as the source DB it is probably an error. */
4597 addReply(c
,shared
.sameobjecterr
);
4601 /* Check if the element exists and get a reference */
4602 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4604 addReply(c
,shared
.czero
);
4608 /* Try to add the element to the target DB */
4609 deleteIfVolatile(dst
,c
->argv
[1]);
4610 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4611 addReply(c
,shared
.czero
);
4614 incrRefCount(c
->argv
[1]);
4617 /* OK! key moved, free the entry in the source DB */
4618 deleteKey(src
,c
->argv
[1]);
4620 addReply(c
,shared
.cone
);
4623 /* =================================== Lists ================================ */
4624 static void pushGenericCommand(redisClient
*c
, int where
) {
4628 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4630 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4631 addReply(c
,shared
.cone
);
4634 lobj
= createListObject();
4636 if (where
== REDIS_HEAD
) {
4637 listAddNodeHead(list
,c
->argv
[2]);
4639 listAddNodeTail(list
,c
->argv
[2]);
4641 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4642 incrRefCount(c
->argv
[1]);
4643 incrRefCount(c
->argv
[2]);
4645 if (lobj
->type
!= REDIS_LIST
) {
4646 addReply(c
,shared
.wrongtypeerr
);
4649 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4650 addReply(c
,shared
.cone
);
4654 if (where
== REDIS_HEAD
) {
4655 listAddNodeHead(list
,c
->argv
[2]);
4657 listAddNodeTail(list
,c
->argv
[2]);
4659 incrRefCount(c
->argv
[2]);
4662 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4665 static void lpushCommand(redisClient
*c
) {
4666 pushGenericCommand(c
,REDIS_HEAD
);
4669 static void rpushCommand(redisClient
*c
) {
4670 pushGenericCommand(c
,REDIS_TAIL
);
4673 static void llenCommand(redisClient
*c
) {
4677 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4678 checkType(c
,o
,REDIS_LIST
)) return;
4681 addReplyUlong(c
,listLength(l
));
4684 static void lindexCommand(redisClient
*c
) {
4686 int index
= atoi(c
->argv
[2]->ptr
);
4690 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4691 checkType(c
,o
,REDIS_LIST
)) return;
4694 ln
= listIndex(list
, index
);
4696 addReply(c
,shared
.nullbulk
);
4698 robj
*ele
= listNodeValue(ln
);
4699 addReplyBulk(c
,ele
);
4703 static void lsetCommand(redisClient
*c
) {
4705 int index
= atoi(c
->argv
[2]->ptr
);
4709 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4710 checkType(c
,o
,REDIS_LIST
)) return;
4713 ln
= listIndex(list
, index
);
4715 addReply(c
,shared
.outofrangeerr
);
4717 robj
*ele
= listNodeValue(ln
);
4720 listNodeValue(ln
) = c
->argv
[3];
4721 incrRefCount(c
->argv
[3]);
4722 addReply(c
,shared
.ok
);
4727 static void popGenericCommand(redisClient
*c
, int where
) {
4732 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4733 checkType(c
,o
,REDIS_LIST
)) return;
4736 if (where
== REDIS_HEAD
)
4737 ln
= listFirst(list
);
4739 ln
= listLast(list
);
4742 addReply(c
,shared
.nullbulk
);
4744 robj
*ele
= listNodeValue(ln
);
4745 addReplyBulk(c
,ele
);
4746 listDelNode(list
,ln
);
4747 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4752 static void lpopCommand(redisClient
*c
) {
4753 popGenericCommand(c
,REDIS_HEAD
);
4756 static void rpopCommand(redisClient
*c
) {
4757 popGenericCommand(c
,REDIS_TAIL
);
4760 static void lrangeCommand(redisClient
*c
) {
4762 int start
= atoi(c
->argv
[2]->ptr
);
4763 int end
= atoi(c
->argv
[3]->ptr
);
4770 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4771 || checkType(c
,o
,REDIS_LIST
)) return;
4773 llen
= listLength(list
);
4775 /* convert negative indexes */
4776 if (start
< 0) start
= llen
+start
;
4777 if (end
< 0) end
= llen
+end
;
4778 if (start
< 0) start
= 0;
4779 if (end
< 0) end
= 0;
4781 /* indexes sanity checks */
4782 if (start
> end
|| start
>= llen
) {
4783 /* Out of range start or start > end result in empty list */
4784 addReply(c
,shared
.emptymultibulk
);
4787 if (end
>= llen
) end
= llen
-1;
4788 rangelen
= (end
-start
)+1;
4790 /* Return the result in form of a multi-bulk reply */
4791 ln
= listIndex(list
, start
);
4792 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4793 for (j
= 0; j
< rangelen
; j
++) {
4794 ele
= listNodeValue(ln
);
4795 addReplyBulk(c
,ele
);
4800 static void ltrimCommand(redisClient
*c
) {
4802 int start
= atoi(c
->argv
[2]->ptr
);
4803 int end
= atoi(c
->argv
[3]->ptr
);
4805 int j
, ltrim
, rtrim
;
4809 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4810 checkType(c
,o
,REDIS_LIST
)) return;
4812 llen
= listLength(list
);
4814 /* convert negative indexes */
4815 if (start
< 0) start
= llen
+start
;
4816 if (end
< 0) end
= llen
+end
;
4817 if (start
< 0) start
= 0;
4818 if (end
< 0) end
= 0;
4820 /* indexes sanity checks */
4821 if (start
> end
|| start
>= llen
) {
4822 /* Out of range start or start > end result in empty list */
4826 if (end
>= llen
) end
= llen
-1;
4831 /* Remove list elements to perform the trim */
4832 for (j
= 0; j
< ltrim
; j
++) {
4833 ln
= listFirst(list
);
4834 listDelNode(list
,ln
);
4836 for (j
= 0; j
< rtrim
; j
++) {
4837 ln
= listLast(list
);
4838 listDelNode(list
,ln
);
4840 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4842 addReply(c
,shared
.ok
);
4845 static void lremCommand(redisClient
*c
) {
4848 listNode
*ln
, *next
;
4849 int toremove
= atoi(c
->argv
[2]->ptr
);
4853 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4854 checkType(c
,o
,REDIS_LIST
)) return;
4858 toremove
= -toremove
;
4861 ln
= fromtail
? list
->tail
: list
->head
;
4863 robj
*ele
= listNodeValue(ln
);
4865 next
= fromtail
? ln
->prev
: ln
->next
;
4866 if (equalStringObjects(ele
,c
->argv
[3])) {
4867 listDelNode(list
,ln
);
4870 if (toremove
&& removed
== toremove
) break;
4874 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4875 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4878 /* This is the semantic of this command:
4879 * RPOPLPUSH srclist dstlist:
4880 * IF LLEN(srclist) > 0
4881 * element = RPOP srclist
4882 * LPUSH dstlist element
4889 * The idea is to be able to get an element from a list in a reliable way
4890 * since the element is not just returned but pushed against another list
4891 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4893 static void rpoplpushcommand(redisClient
*c
) {
4898 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4899 checkType(c
,sobj
,REDIS_LIST
)) return;
4900 srclist
= sobj
->ptr
;
4901 ln
= listLast(srclist
);
4904 addReply(c
,shared
.nullbulk
);
4906 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4907 robj
*ele
= listNodeValue(ln
);
4910 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4911 addReply(c
,shared
.wrongtypeerr
);
4915 /* Add the element to the target list (unless it's directly
4916 * passed to some BLPOP-ing client */
4917 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4919 /* Create the list if the key does not exist */
4920 dobj
= createListObject();
4921 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4922 incrRefCount(c
->argv
[2]);
4924 dstlist
= dobj
->ptr
;
4925 listAddNodeHead(dstlist
,ele
);
4929 /* Send the element to the client as reply as well */
4930 addReplyBulk(c
,ele
);
4932 /* Finally remove the element from the source list */
4933 listDelNode(srclist
,ln
);
4934 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4939 /* ==================================== Sets ================================ */
4941 static void saddCommand(redisClient
*c
) {
4944 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4946 set
= createSetObject();
4947 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4948 incrRefCount(c
->argv
[1]);
4950 if (set
->type
!= REDIS_SET
) {
4951 addReply(c
,shared
.wrongtypeerr
);
4955 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4956 incrRefCount(c
->argv
[2]);
4958 addReply(c
,shared
.cone
);
4960 addReply(c
,shared
.czero
);
4964 static void sremCommand(redisClient
*c
) {
4967 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4968 checkType(c
,set
,REDIS_SET
)) return;
4970 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4972 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4973 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4974 addReply(c
,shared
.cone
);
4976 addReply(c
,shared
.czero
);
4980 static void smoveCommand(redisClient
*c
) {
4981 robj
*srcset
, *dstset
;
4983 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4984 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4986 /* If the source key does not exist return 0, if it's of the wrong type
4988 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4989 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4992 /* Error if the destination key is not a set as well */
4993 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4994 addReply(c
,shared
.wrongtypeerr
);
4997 /* Remove the element from the source set */
4998 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4999 /* Key not found in the src set! return zero */
5000 addReply(c
,shared
.czero
);
5003 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5004 deleteKey(c
->db
,c
->argv
[1]);
5006 /* Add the element to the destination set */
5008 dstset
= createSetObject();
5009 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
5010 incrRefCount(c
->argv
[2]);
5012 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5013 incrRefCount(c
->argv
[3]);
5014 addReply(c
,shared
.cone
);
5017 static void sismemberCommand(redisClient
*c
) {
5020 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5021 checkType(c
,set
,REDIS_SET
)) return;
5023 if (dictFind(set
->ptr
,c
->argv
[2]))
5024 addReply(c
,shared
.cone
);
5026 addReply(c
,shared
.czero
);
5029 static void scardCommand(redisClient
*c
) {
5033 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5034 checkType(c
,o
,REDIS_SET
)) return;
5037 addReplyUlong(c
,dictSize(s
));
5040 static void spopCommand(redisClient
*c
) {
5044 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5045 checkType(c
,set
,REDIS_SET
)) return;
5047 de
= dictGetRandomKey(set
->ptr
);
5049 addReply(c
,shared
.nullbulk
);
5051 robj
*ele
= dictGetEntryKey(de
);
5053 addReplyBulk(c
,ele
);
5054 dictDelete(set
->ptr
,ele
);
5055 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5056 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5061 static void srandmemberCommand(redisClient
*c
) {
5065 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5066 checkType(c
,set
,REDIS_SET
)) return;
5068 de
= dictGetRandomKey(set
->ptr
);
5070 addReply(c
,shared
.nullbulk
);
5072 robj
*ele
= dictGetEntryKey(de
);
5074 addReplyBulk(c
,ele
);
5078 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5079 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5081 return dictSize(*d1
)-dictSize(*d2
);
5084 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5085 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5088 robj
*lenobj
= NULL
, *dstset
= NULL
;
5089 unsigned long j
, cardinality
= 0;
5091 for (j
= 0; j
< setsnum
; j
++) {
5095 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5096 lookupKeyRead(c
->db
,setskeys
[j
]);
5100 if (deleteKey(c
->db
,dstkey
))
5102 addReply(c
,shared
.czero
);
5104 addReply(c
,shared
.emptymultibulk
);
5108 if (setobj
->type
!= REDIS_SET
) {
5110 addReply(c
,shared
.wrongtypeerr
);
5113 dv
[j
] = setobj
->ptr
;
5115 /* Sort sets from the smallest to largest, this will improve our
5116 * algorithm's performace */
5117 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5119 /* The first thing we should output is the total number of elements...
5120 * since this is a multi-bulk write, but at this stage we don't know
5121 * the intersection set size, so we use a trick, append an empty object
5122 * to the output list and save the pointer to later modify it with the
5125 lenobj
= createObject(REDIS_STRING
,NULL
);
5127 decrRefCount(lenobj
);
5129 /* If we have a target key where to store the resulting set
5130 * create this key with an empty set inside */
5131 dstset
= createSetObject();
5134 /* Iterate all the elements of the first (smallest) set, and test
5135 * the element against all the other sets, if at least one set does
5136 * not include the element it is discarded */
5137 di
= dictGetIterator(dv
[0]);
5139 while((de
= dictNext(di
)) != NULL
) {
5142 for (j
= 1; j
< setsnum
; j
++)
5143 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5145 continue; /* at least one set does not contain the member */
5146 ele
= dictGetEntryKey(de
);
5148 addReplyBulk(c
,ele
);
5151 dictAdd(dstset
->ptr
,ele
,NULL
);
5155 dictReleaseIterator(di
);
5158 /* Store the resulting set into the target, if the intersection
5159 * is not an empty set. */
5160 deleteKey(c
->db
,dstkey
);
5161 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5162 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5163 incrRefCount(dstkey
);
5164 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5166 decrRefCount(dstset
);
5167 addReply(c
,shared
.czero
);
5171 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5176 static void sinterCommand(redisClient
*c
) {
5177 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5180 static void sinterstoreCommand(redisClient
*c
) {
5181 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5184 #define REDIS_OP_UNION 0
5185 #define REDIS_OP_DIFF 1
5186 #define REDIS_OP_INTER 2
5188 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5189 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5192 robj
*dstset
= NULL
;
5193 int j
, cardinality
= 0;
5195 for (j
= 0; j
< setsnum
; j
++) {
5199 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5200 lookupKeyRead(c
->db
,setskeys
[j
]);
5205 if (setobj
->type
!= REDIS_SET
) {
5207 addReply(c
,shared
.wrongtypeerr
);
5210 dv
[j
] = setobj
->ptr
;
5213 /* We need a temp set object to store our union. If the dstkey
5214 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5215 * this set object will be the resulting object to set into the target key*/
5216 dstset
= createSetObject();
5218 /* Iterate all the elements of all the sets, add every element a single
5219 * time to the result set */
5220 for (j
= 0; j
< setsnum
; j
++) {
5221 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5222 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5224 di
= dictGetIterator(dv
[j
]);
5226 while((de
= dictNext(di
)) != NULL
) {
5229 /* dictAdd will not add the same element multiple times */
5230 ele
= dictGetEntryKey(de
);
5231 if (op
== REDIS_OP_UNION
|| j
== 0) {
5232 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5236 } else if (op
== REDIS_OP_DIFF
) {
5237 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5242 dictReleaseIterator(di
);
5244 /* result set is empty? Exit asap. */
5245 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5248 /* Output the content of the resulting set, if not in STORE mode */
5250 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5251 di
= dictGetIterator(dstset
->ptr
);
5252 while((de
= dictNext(di
)) != NULL
) {
5255 ele
= dictGetEntryKey(de
);
5256 addReplyBulk(c
,ele
);
5258 dictReleaseIterator(di
);
5259 decrRefCount(dstset
);
5261 /* If we have a target key where to store the resulting set
5262 * create this key with the result set inside */
5263 deleteKey(c
->db
,dstkey
);
5264 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5265 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5266 incrRefCount(dstkey
);
5267 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5269 decrRefCount(dstset
);
5270 addReply(c
,shared
.czero
);
5277 static void sunionCommand(redisClient
*c
) {
5278 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5281 static void sunionstoreCommand(redisClient
*c
) {
5282 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5285 static void sdiffCommand(redisClient
*c
) {
5286 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5289 static void sdiffstoreCommand(redisClient
*c
) {
5290 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5293 /* ==================================== ZSets =============================== */
5295 /* ZSETs are ordered sets using two data structures to hold the same elements
5296 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5299 * The elements are added to an hash table mapping Redis objects to scores.
5300 * At the same time the elements are added to a skip list mapping scores
5301 * to Redis objects (so objects are sorted by scores in this "view"). */
5303 /* This skiplist implementation is almost a C translation of the original
5304 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5305 * Alternative to Balanced Trees", modified in three ways:
5306 * a) this implementation allows for repeated values.
5307 * b) the comparison is not just by key (our 'score') but by satellite data.
5308 * c) there is a back pointer, so it's a doubly linked list with the back
5309 * pointers being only at "level 1". This allows to traverse the list
5310 * from tail to head, useful for ZREVRANGE. */
5312 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5313 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5315 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5317 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5323 static zskiplist
*zslCreate(void) {
5327 zsl
= zmalloc(sizeof(*zsl
));
5330 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5331 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5332 zsl
->header
->forward
[j
] = NULL
;
5334 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5335 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5336 zsl
->header
->span
[j
] = 0;
5338 zsl
->header
->backward
= NULL
;
5343 static void zslFreeNode(zskiplistNode
*node
) {
5344 decrRefCount(node
->obj
);
5345 zfree(node
->forward
);
5350 static void zslFree(zskiplist
*zsl
) {
5351 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5353 zfree(zsl
->header
->forward
);
5354 zfree(zsl
->header
->span
);
5357 next
= node
->forward
[0];
5364 static int zslRandomLevel(void) {
5366 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5368 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5371 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5372 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5373 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5377 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5378 /* store rank that is crossed to reach the insert position */
5379 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5381 while (x
->forward
[i
] &&
5382 (x
->forward
[i
]->score
< score
||
5383 (x
->forward
[i
]->score
== score
&&
5384 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5385 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5390 /* we assume the key is not already inside, since we allow duplicated
5391 * scores, and the re-insertion of score and redis object should never
5392 * happpen since the caller of zslInsert() should test in the hash table
5393 * if the element is already inside or not. */
5394 level
= zslRandomLevel();
5395 if (level
> zsl
->level
) {
5396 for (i
= zsl
->level
; i
< level
; i
++) {
5398 update
[i
] = zsl
->header
;
5399 update
[i
]->span
[i
-1] = zsl
->length
;
5403 x
= zslCreateNode(level
,score
,obj
);
5404 for (i
= 0; i
< level
; i
++) {
5405 x
->forward
[i
] = update
[i
]->forward
[i
];
5406 update
[i
]->forward
[i
] = x
;
5408 /* update span covered by update[i] as x is inserted here */
5410 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5411 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5415 /* increment span for untouched levels */
5416 for (i
= level
; i
< zsl
->level
; i
++) {
5417 update
[i
]->span
[i
-1]++;
5420 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5422 x
->forward
[0]->backward
= x
;
5428 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5429 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5431 for (i
= 0; i
< zsl
->level
; i
++) {
5432 if (update
[i
]->forward
[i
] == x
) {
5434 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5436 update
[i
]->forward
[i
] = x
->forward
[i
];
5438 /* invariant: i > 0, because update[0]->forward[0]
5439 * is always equal to x */
5440 update
[i
]->span
[i
-1] -= 1;
5443 if (x
->forward
[0]) {
5444 x
->forward
[0]->backward
= x
->backward
;
5446 zsl
->tail
= x
->backward
;
5448 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5453 /* Delete an element with matching score/object from the skiplist. */
5454 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5455 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5459 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5460 while (x
->forward
[i
] &&
5461 (x
->forward
[i
]->score
< score
||
5462 (x
->forward
[i
]->score
== score
&&
5463 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5467 /* We may have multiple elements with the same score, what we need
5468 * is to find the element with both the right score and object. */
5470 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5471 zslDeleteNode(zsl
, x
, update
);
5475 return 0; /* not found */
5477 return 0; /* not found */
5480 /* Delete all the elements with score between min and max from the skiplist.
5481 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5482 * Note that this function takes the reference to the hash table view of the
5483 * sorted set, in order to remove the elements from the hash table too. */
5484 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5485 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5486 unsigned long removed
= 0;
5490 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5491 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5495 /* We may have multiple elements with the same score, what we need
5496 * is to find the element with both the right score and object. */
5498 while (x
&& x
->score
<= max
) {
5499 zskiplistNode
*next
= x
->forward
[0];
5500 zslDeleteNode(zsl
, x
, update
);
5501 dictDelete(dict
,x
->obj
);
5506 return removed
; /* not found */
5509 /* Delete all the elements with rank between start and end from the skiplist.
5510 * Start and end are inclusive. Note that start and end need to be 1-based */
5511 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5512 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5513 unsigned long traversed
= 0, removed
= 0;
5517 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5518 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5519 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5527 while (x
&& traversed
<= end
) {
5528 zskiplistNode
*next
= x
->forward
[0];
5529 zslDeleteNode(zsl
, x
, update
);
5530 dictDelete(dict
,x
->obj
);
5539 /* Find the first node having a score equal or greater than the specified one.
5540 * Returns NULL if there is no match. */
5541 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5546 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5547 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5550 /* We may have multiple elements with the same score, what we need
5551 * is to find the element with both the right score and object. */
5552 return x
->forward
[0];
5555 /* Find the rank for an element by both score and key.
5556 * Returns 0 when the element cannot be found, rank otherwise.
5557 * Note that the rank is 1-based due to the span of zsl->header to the
5559 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5561 unsigned long rank
= 0;
5565 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5566 while (x
->forward
[i
] &&
5567 (x
->forward
[i
]->score
< score
||
5568 (x
->forward
[i
]->score
== score
&&
5569 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5570 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5574 /* x might be equal to zsl->header, so test if obj is non-NULL */
5575 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5582 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5583 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5585 unsigned long traversed
= 0;
5589 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5590 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5592 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5595 if (traversed
== rank
) {
5602 /* The actual Z-commands implementations */
5604 /* This generic command implements both ZADD and ZINCRBY.
5605 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5606 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5607 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5612 zsetobj
= lookupKeyWrite(c
->db
,key
);
5613 if (zsetobj
== NULL
) {
5614 zsetobj
= createZsetObject();
5615 dictAdd(c
->db
->dict
,key
,zsetobj
);
5618 if (zsetobj
->type
!= REDIS_ZSET
) {
5619 addReply(c
,shared
.wrongtypeerr
);
5625 /* Ok now since we implement both ZADD and ZINCRBY here the code
5626 * needs to handle the two different conditions. It's all about setting
5627 * '*score', that is, the new score to set, to the right value. */
5628 score
= zmalloc(sizeof(double));
5632 /* Read the old score. If the element was not present starts from 0 */
5633 de
= dictFind(zs
->dict
,ele
);
5635 double *oldscore
= dictGetEntryVal(de
);
5636 *score
= *oldscore
+ scoreval
;
5644 /* What follows is a simple remove and re-insert operation that is common
5645 * to both ZADD and ZINCRBY... */
5646 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5647 /* case 1: New element */
5648 incrRefCount(ele
); /* added to hash */
5649 zslInsert(zs
->zsl
,*score
,ele
);
5650 incrRefCount(ele
); /* added to skiplist */
5653 addReplyDouble(c
,*score
);
5655 addReply(c
,shared
.cone
);
5660 /* case 2: Score update operation */
5661 de
= dictFind(zs
->dict
,ele
);
5662 redisAssert(de
!= NULL
);
5663 oldscore
= dictGetEntryVal(de
);
5664 if (*score
!= *oldscore
) {
5667 /* Remove and insert the element in the skip list with new score */
5668 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5669 redisAssert(deleted
!= 0);
5670 zslInsert(zs
->zsl
,*score
,ele
);
5672 /* Update the score in the hash table */
5673 dictReplace(zs
->dict
,ele
,score
);
5679 addReplyDouble(c
,*score
);
5681 addReply(c
,shared
.czero
);
5685 static void zaddCommand(redisClient
*c
) {
5688 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5689 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5692 static void zincrbyCommand(redisClient
*c
) {
5695 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5696 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5699 static void zremCommand(redisClient
*c
) {
5706 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5707 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5710 de
= dictFind(zs
->dict
,c
->argv
[2]);
5712 addReply(c
,shared
.czero
);
5715 /* Delete from the skiplist */
5716 oldscore
= dictGetEntryVal(de
);
5717 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5718 redisAssert(deleted
!= 0);
5720 /* Delete from the hash table */
5721 dictDelete(zs
->dict
,c
->argv
[2]);
5722 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5723 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5725 addReply(c
,shared
.cone
);
5728 static void zremrangebyscoreCommand(redisClient
*c
) {
5735 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5736 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5738 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5739 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5742 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5743 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5744 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5745 server
.dirty
+= deleted
;
5746 addReplyLong(c
,deleted
);
5749 static void zremrangebyrankCommand(redisClient
*c
) {
5757 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5758 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5760 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5761 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5763 llen
= zs
->zsl
->length
;
5765 /* convert negative indexes */
5766 if (start
< 0) start
= llen
+start
;
5767 if (end
< 0) end
= llen
+end
;
5768 if (start
< 0) start
= 0;
5769 if (end
< 0) end
= 0;
5771 /* indexes sanity checks */
5772 if (start
> end
|| start
>= llen
) {
5773 addReply(c
,shared
.czero
);
5776 if (end
>= llen
) end
= llen
-1;
5778 /* increment start and end because zsl*Rank functions
5779 * use 1-based rank */
5780 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5781 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5782 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5783 server
.dirty
+= deleted
;
5784 addReplyLong(c
, deleted
);
5792 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5793 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5794 unsigned long size1
, size2
;
5795 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5796 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5797 return size1
- size2
;
5800 #define REDIS_AGGR_SUM 1
5801 #define REDIS_AGGR_MIN 2
5802 #define REDIS_AGGR_MAX 3
5804 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5805 if (aggregate
== REDIS_AGGR_SUM
) {
5806 *target
= *target
+ val
;
5807 } else if (aggregate
== REDIS_AGGR_MIN
) {
5808 *target
= val
< *target
? val
: *target
;
5809 } else if (aggregate
== REDIS_AGGR_MAX
) {
5810 *target
= val
> *target
? val
: *target
;
5813 redisPanic("Unknown ZUNION/INTER aggregate type");
5817 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5819 int aggregate
= REDIS_AGGR_SUM
;
5826 /* expect zsetnum input keys to be given */
5827 zsetnum
= atoi(c
->argv
[2]->ptr
);
5829 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5833 /* test if the expected number of keys would overflow */
5834 if (3+zsetnum
> c
->argc
) {
5835 addReply(c
,shared
.syntaxerr
);
5839 /* read keys to be used for input */
5840 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5841 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5842 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5846 if (zsetobj
->type
!= REDIS_ZSET
) {
5848 addReply(c
,shared
.wrongtypeerr
);
5851 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5854 /* default all weights to 1 */
5855 src
[i
].weight
= 1.0;
5858 /* parse optional extra arguments */
5860 int remaining
= c
->argc
- j
;
5863 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5865 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5866 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5869 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5871 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5872 aggregate
= REDIS_AGGR_SUM
;
5873 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5874 aggregate
= REDIS_AGGR_MIN
;
5875 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5876 aggregate
= REDIS_AGGR_MAX
;
5879 addReply(c
,shared
.syntaxerr
);
5885 addReply(c
,shared
.syntaxerr
);
5891 /* sort sets from the smallest to largest, this will improve our
5892 * algorithm's performance */
5893 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5895 dstobj
= createZsetObject();
5896 dstzset
= dstobj
->ptr
;
5898 if (op
== REDIS_OP_INTER
) {
5899 /* skip going over all entries if the smallest zset is NULL or empty */
5900 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5901 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5902 * from small to large, all src[i > 0].dict are non-empty too */
5903 di
= dictGetIterator(src
[0].dict
);
5904 while((de
= dictNext(di
)) != NULL
) {
5905 double *score
= zmalloc(sizeof(double)), value
;
5906 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5908 for (j
= 1; j
< zsetnum
; j
++) {
5909 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5911 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5912 zunionInterAggregate(score
, value
, aggregate
);
5918 /* skip entry when not present in every source dict */
5922 robj
*o
= dictGetEntryKey(de
);
5923 dictAdd(dstzset
->dict
,o
,score
);
5924 incrRefCount(o
); /* added to dictionary */
5925 zslInsert(dstzset
->zsl
,*score
,o
);
5926 incrRefCount(o
); /* added to skiplist */
5929 dictReleaseIterator(di
);
5931 } else if (op
== REDIS_OP_UNION
) {
5932 for (i
= 0; i
< zsetnum
; i
++) {
5933 if (!src
[i
].dict
) continue;
5935 di
= dictGetIterator(src
[i
].dict
);
5936 while((de
= dictNext(di
)) != NULL
) {
5937 /* skip key when already processed */
5938 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5940 double *score
= zmalloc(sizeof(double)), value
;
5941 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5943 /* because the zsets are sorted by size, its only possible
5944 * for sets at larger indices to hold this entry */
5945 for (j
= (i
+1); j
< zsetnum
; j
++) {
5946 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5948 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5949 zunionInterAggregate(score
, value
, aggregate
);
5953 robj
*o
= dictGetEntryKey(de
);
5954 dictAdd(dstzset
->dict
,o
,score
);
5955 incrRefCount(o
); /* added to dictionary */
5956 zslInsert(dstzset
->zsl
,*score
,o
);
5957 incrRefCount(o
); /* added to skiplist */
5959 dictReleaseIterator(di
);
5962 /* unknown operator */
5963 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5966 deleteKey(c
->db
,dstkey
);
5967 if (dstzset
->zsl
->length
) {
5968 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5969 incrRefCount(dstkey
);
5970 addReplyLong(c
, dstzset
->zsl
->length
);
5973 decrRefCount(dstobj
);
5974 addReply(c
, shared
.czero
);
5979 static void zunionCommand(redisClient
*c
) {
5980 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5983 static void zinterCommand(redisClient
*c
) {
5984 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5987 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5999 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6000 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6002 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6004 } else if (c
->argc
>= 5) {
6005 addReply(c
,shared
.syntaxerr
);
6009 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6010 || checkType(c
,o
,REDIS_ZSET
)) return;
6015 /* convert negative indexes */
6016 if (start
< 0) start
= llen
+start
;
6017 if (end
< 0) end
= llen
+end
;
6018 if (start
< 0) start
= 0;
6019 if (end
< 0) end
= 0;
6021 /* indexes sanity checks */
6022 if (start
> end
|| start
>= llen
) {
6023 /* Out of range start or start > end result in empty list */
6024 addReply(c
,shared
.emptymultibulk
);
6027 if (end
>= llen
) end
= llen
-1;
6028 rangelen
= (end
-start
)+1;
6030 /* check if starting point is trivial, before searching
6031 * the element in log(N) time */
6033 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6036 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6039 /* Return the result in form of a multi-bulk reply */
6040 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6041 withscores
? (rangelen
*2) : rangelen
));
6042 for (j
= 0; j
< rangelen
; j
++) {
6044 addReplyBulk(c
,ele
);
6046 addReplyDouble(c
,ln
->score
);
6047 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6051 static void zrangeCommand(redisClient
*c
) {
6052 zrangeGenericCommand(c
,0);
6055 static void zrevrangeCommand(redisClient
*c
) {
6056 zrangeGenericCommand(c
,1);
6059 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6060 * If justcount is non-zero, just the count is returned. */
6061 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6064 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6065 int offset
= 0, limit
= -1;
6069 /* Parse the min-max interval. If one of the values is prefixed
6070 * by the "(" character, it's considered "open". For instance
6071 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6072 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6073 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6074 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6077 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6079 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6080 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6083 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6086 /* Parse "WITHSCORES": note that if the command was called with
6087 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6088 * enter the following paths to parse WITHSCORES and LIMIT. */
6089 if (c
->argc
== 5 || c
->argc
== 8) {
6090 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6095 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6099 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6104 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6105 addReply(c
,shared
.syntaxerr
);
6107 } else if (c
->argc
== (7 + withscores
)) {
6108 offset
= atoi(c
->argv
[5]->ptr
);
6109 limit
= atoi(c
->argv
[6]->ptr
);
6110 if (offset
< 0) offset
= 0;
6113 /* Ok, lookup the key and get the range */
6114 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6116 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6118 if (o
->type
!= REDIS_ZSET
) {
6119 addReply(c
,shared
.wrongtypeerr
);
6121 zset
*zsetobj
= o
->ptr
;
6122 zskiplist
*zsl
= zsetobj
->zsl
;
6124 robj
*ele
, *lenobj
= NULL
;
6125 unsigned long rangelen
= 0;
6127 /* Get the first node with the score >= min, or with
6128 * score > min if 'minex' is true. */
6129 ln
= zslFirstWithScore(zsl
,min
);
6130 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6133 /* No element matching the speciifed interval */
6134 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6138 /* We don't know in advance how many matching elements there
6139 * are in the list, so we push this object that will represent
6140 * the multi-bulk length in the output buffer, and will "fix"
6143 lenobj
= createObject(REDIS_STRING
,NULL
);
6145 decrRefCount(lenobj
);
6148 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6151 ln
= ln
->forward
[0];
6154 if (limit
== 0) break;
6157 addReplyBulk(c
,ele
);
6159 addReplyDouble(c
,ln
->score
);
6161 ln
= ln
->forward
[0];
6163 if (limit
> 0) limit
--;
6166 addReplyLong(c
,(long)rangelen
);
6168 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6169 withscores
? (rangelen
*2) : rangelen
);
6175 static void zrangebyscoreCommand(redisClient
*c
) {
6176 genericZrangebyscoreCommand(c
,0);
6179 static void zcountCommand(redisClient
*c
) {
6180 genericZrangebyscoreCommand(c
,1);
6183 static void zcardCommand(redisClient
*c
) {
6187 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6188 checkType(c
,o
,REDIS_ZSET
)) return;
6191 addReplyUlong(c
,zs
->zsl
->length
);
6194 static void zscoreCommand(redisClient
*c
) {
6199 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6200 checkType(c
,o
,REDIS_ZSET
)) return;
6203 de
= dictFind(zs
->dict
,c
->argv
[2]);
6205 addReply(c
,shared
.nullbulk
);
6207 double *score
= dictGetEntryVal(de
);
6209 addReplyDouble(c
,*score
);
6213 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6221 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6222 checkType(c
,o
,REDIS_ZSET
)) return;
6226 de
= dictFind(zs
->dict
,c
->argv
[2]);
6228 addReply(c
,shared
.nullbulk
);
6232 score
= dictGetEntryVal(de
);
6233 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6236 addReplyLong(c
, zsl
->length
- rank
);
6238 addReplyLong(c
, rank
-1);
6241 addReply(c
,shared
.nullbulk
);
6245 static void zrankCommand(redisClient
*c
) {
6246 zrankGenericCommand(c
, 0);
6249 static void zrevrankCommand(redisClient
*c
) {
6250 zrankGenericCommand(c
, 1);
6253 /* ========================= Hashes utility functions ======================= */
6254 #define REDIS_HASH_KEY 1
6255 #define REDIS_HASH_VALUE 2
6257 /* Check the length of a number of objects to see if we need to convert a
6258 * zipmap to a real hash. Note that we only check string encoded objects
6259 * as their string length can be queried in constant time. */
6260 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6262 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6264 for (i
= start
; i
<= end
; i
++) {
6265 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6266 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6268 convertToRealHash(subject
);
6274 /* Encode given objects in-place when the hash uses a dict. */
6275 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6276 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6277 if (o1
) *o1
= tryObjectEncoding(*o1
);
6278 if (o2
) *o2
= tryObjectEncoding(*o2
);
6282 /* Get the value from a hash identified by key. Returns either a string
6283 * object or NULL if the value cannot be found. The refcount of the object
6284 * is always increased by 1 when the value was found. */
6285 static robj
*hashGet(robj
*o
, robj
*key
) {
6287 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6290 key
= getDecodedObject(key
);
6291 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6292 value
= createStringObject((char*)v
,vlen
);
6296 dictEntry
*de
= dictFind(o
->ptr
,key
);
6298 value
= dictGetEntryVal(de
);
6299 incrRefCount(value
);
6305 /* Test if the key exists in the given hash. Returns 1 if the key
6306 * exists and 0 when it doesn't. */
6307 static int hashExists(robj
*o
, robj
*key
) {
6308 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6309 key
= getDecodedObject(key
);
6310 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6316 if (dictFind(o
->ptr
,key
) != NULL
) {
6323 /* Add an element, discard the old if the key already exists.
6324 * Return 0 on insert and 1 on update. */
6325 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6327 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6328 key
= getDecodedObject(key
);
6329 value
= getDecodedObject(value
);
6330 o
->ptr
= zipmapSet(o
->ptr
,
6331 key
->ptr
,sdslen(key
->ptr
),
6332 value
->ptr
,sdslen(value
->ptr
), &update
);
6334 decrRefCount(value
);
6336 /* Check if the zipmap needs to be upgraded to a real hash table */
6337 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6338 convertToRealHash(o
);
6340 if (dictReplace(o
->ptr
,key
,value
)) {
6347 incrRefCount(value
);
6352 /* Delete an element from a hash.
6353 * Return 1 on deleted and 0 on not found. */
6354 static int hashDelete(robj
*o
, robj
*key
) {
6356 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6357 key
= getDecodedObject(key
);
6358 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6361 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6362 /* Always check if the dictionary needs a resize after a delete. */
6363 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6368 /* Return the number of elements in a hash. */
6369 static unsigned long hashLength(robj
*o
) {
6370 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6371 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6374 /* Structure to hold hash iteration abstration. Note that iteration over
6375 * hashes involves both fields and values. Because it is possible that
6376 * not both are required, store pointers in the iterator to avoid
6377 * unnecessary memory allocation for fields/values. */
6381 unsigned char *zk
, *zv
;
6382 unsigned int zklen
, zvlen
;
6388 static hashIterator
*hashInitIterator(robj
*subject
) {
6389 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6390 hi
->encoding
= subject
->encoding
;
6391 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6392 hi
->zi
= zipmapRewind(subject
->ptr
);
6393 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6394 hi
->di
= dictGetIterator(subject
->ptr
);
6401 static void hashReleaseIterator(hashIterator
*hi
) {
6402 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6403 dictReleaseIterator(hi
->di
);
6408 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6409 * could be found and REDIS_ERR when the iterator reaches the end. */
6410 static int hashNext(hashIterator
*hi
) {
6411 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6412 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6413 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6415 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6420 /* Get key or value object at current iteration position.
6421 * This increases the refcount of the field object by 1. */
6422 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6424 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6425 if (what
& REDIS_HASH_KEY
) {
6426 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6428 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6431 if (what
& REDIS_HASH_KEY
) {
6432 o
= dictGetEntryKey(hi
->de
);
6434 o
= dictGetEntryVal(hi
->de
);
6441 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6442 robj
*o
= lookupKeyWrite(c
->db
,key
);
6444 o
= createHashObject();
6445 dictAdd(c
->db
->dict
,key
,o
);
6448 if (o
->type
!= REDIS_HASH
) {
6449 addReply(c
,shared
.wrongtypeerr
);
6456 /* ============================= Hash commands ============================== */
6457 static void hsetCommand(redisClient
*c
) {
6461 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6462 hashTryConversion(o
,c
->argv
,2,3);
6463 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6464 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6465 addReply(c
, update
? shared
.czero
: shared
.cone
);
6469 static void hsetnxCommand(redisClient
*c
) {
6471 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6472 hashTryConversion(o
,c
->argv
,2,3);
6474 if (hashExists(o
, c
->argv
[2])) {
6475 addReply(c
, shared
.czero
);
6477 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6478 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6479 addReply(c
, shared
.cone
);
6484 static void hmsetCommand(redisClient
*c
) {
6488 if ((c
->argc
% 2) == 1) {
6489 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6493 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6494 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6495 for (i
= 2; i
< c
->argc
; i
+= 2) {
6496 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6497 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6499 addReply(c
, shared
.ok
);
6503 static void hincrbyCommand(redisClient
*c
) {
6504 long long value
, incr
;
6505 robj
*o
, *current
, *new;
6507 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6508 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6509 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6510 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6511 "hash value is not an integer") != REDIS_OK
) {
6512 decrRefCount(current
);
6515 decrRefCount(current
);
6521 new = createStringObjectFromLongLong(value
);
6522 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6523 hashSet(o
,c
->argv
[2],new);
6525 addReplyLongLong(c
,value
);
6529 static void hgetCommand(redisClient
*c
) {
6531 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6532 checkType(c
,o
,REDIS_HASH
)) return;
6534 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6535 addReplyBulk(c
,value
);
6536 decrRefCount(value
);
6538 addReply(c
,shared
.nullbulk
);
6542 static void hmgetCommand(redisClient
*c
) {
6545 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6546 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6547 addReply(c
,shared
.wrongtypeerr
);
6550 /* Note the check for o != NULL happens inside the loop. This is
6551 * done because objects that cannot be found are considered to be
6552 * an empty hash. The reply should then be a series of NULLs. */
6553 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6554 for (i
= 2; i
< c
->argc
; i
++) {
6555 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6556 addReplyBulk(c
,value
);
6557 decrRefCount(value
);
6559 addReply(c
,shared
.nullbulk
);
6564 static void hdelCommand(redisClient
*c
) {
6566 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6567 checkType(c
,o
,REDIS_HASH
)) return;
6569 if (hashDelete(o
,c
->argv
[2])) {
6570 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6571 addReply(c
,shared
.cone
);
6574 addReply(c
,shared
.czero
);
6578 static void hlenCommand(redisClient
*c
) {
6580 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6581 checkType(c
,o
,REDIS_HASH
)) return;
6583 addReplyUlong(c
,hashLength(o
));
6586 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6587 robj
*o
, *lenobj
, *obj
;
6588 unsigned long count
= 0;
6591 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6592 || checkType(c
,o
,REDIS_HASH
)) return;
6594 lenobj
= createObject(REDIS_STRING
,NULL
);
6596 decrRefCount(lenobj
);
6598 hi
= hashInitIterator(o
);
6599 while (hashNext(hi
) != REDIS_ERR
) {
6600 if (flags
& REDIS_HASH_KEY
) {
6601 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6602 addReplyBulk(c
,obj
);
6606 if (flags
& REDIS_HASH_VALUE
) {
6607 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6608 addReplyBulk(c
,obj
);
6613 hashReleaseIterator(hi
);
6615 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6618 static void hkeysCommand(redisClient
*c
) {
6619 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6622 static void hvalsCommand(redisClient
*c
) {
6623 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6626 static void hgetallCommand(redisClient
*c
) {
6627 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6630 static void hexistsCommand(redisClient
*c
) {
6632 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6633 checkType(c
,o
,REDIS_HASH
)) return;
6635 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6638 static void convertToRealHash(robj
*o
) {
6639 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6640 unsigned int klen
, vlen
;
6641 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6643 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6644 p
= zipmapRewind(zm
);
6645 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6646 robj
*keyobj
, *valobj
;
6648 keyobj
= createStringObject((char*)key
,klen
);
6649 valobj
= createStringObject((char*)val
,vlen
);
6650 keyobj
= tryObjectEncoding(keyobj
);
6651 valobj
= tryObjectEncoding(valobj
);
6652 dictAdd(dict
,keyobj
,valobj
);
6654 o
->encoding
= REDIS_ENCODING_HT
;
6659 /* ========================= Non type-specific commands ==================== */
6661 static void flushdbCommand(redisClient
*c
) {
6662 server
.dirty
+= dictSize(c
->db
->dict
);
6663 dictEmpty(c
->db
->dict
);
6664 dictEmpty(c
->db
->expires
);
6665 addReply(c
,shared
.ok
);
6668 static void flushallCommand(redisClient
*c
) {
6669 server
.dirty
+= emptyDb();
6670 addReply(c
,shared
.ok
);
6671 if (server
.bgsavechildpid
!= -1) {
6672 kill(server
.bgsavechildpid
,SIGKILL
);
6673 rdbRemoveTempFile(server
.bgsavechildpid
);
6675 rdbSave(server
.dbfilename
);
6679 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6680 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6682 so
->pattern
= pattern
;
6686 /* Return the value associated to the key with a name obtained
6687 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6688 * The returned object will always have its refcount increased by 1
6689 * when it is non-NULL. */
6690 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6693 robj keyobj
, fieldobj
, *o
;
6694 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6695 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6699 char buf
[REDIS_SORTKEY_MAX
+1];
6700 } keyname
, fieldname
;
6702 /* If the pattern is "#" return the substitution object itself in order
6703 * to implement the "SORT ... GET #" feature. */
6704 spat
= pattern
->ptr
;
6705 if (spat
[0] == '#' && spat
[1] == '\0') {
6706 incrRefCount(subst
);
6710 /* The substitution object may be specially encoded. If so we create
6711 * a decoded object on the fly. Otherwise getDecodedObject will just
6712 * increment the ref count, that we'll decrement later. */
6713 subst
= getDecodedObject(subst
);
6716 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6717 p
= strchr(spat
,'*');
6719 decrRefCount(subst
);
6723 /* Find out if we're dealing with a hash dereference. */
6724 if ((f
= strstr(p
+1, "->")) != NULL
) {
6725 fieldlen
= sdslen(spat
)-(f
-spat
);
6726 /* this also copies \0 character */
6727 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6728 fieldname
.len
= fieldlen
-2;
6734 sublen
= sdslen(ssub
);
6735 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6736 memcpy(keyname
.buf
,spat
,prefixlen
);
6737 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6738 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6739 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6740 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6741 decrRefCount(subst
);
6743 /* Lookup substituted key */
6744 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6745 o
= lookupKeyRead(db
,&keyobj
);
6746 if (o
== NULL
) return NULL
;
6749 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6751 /* Retrieve value from hash by the field name. This operation
6752 * already increases the refcount of the returned object. */
6753 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6754 o
= hashGet(o
, &fieldobj
);
6756 if (o
->type
!= REDIS_STRING
) return NULL
;
6758 /* Every object that this function returns needs to have its refcount
6759 * increased. sortCommand decreases it again. */
6766 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6767 * the additional parameter is not standard but a BSD-specific we have to
6768 * pass sorting parameters via the global 'server' structure */
6769 static int sortCompare(const void *s1
, const void *s2
) {
6770 const redisSortObject
*so1
= s1
, *so2
= s2
;
6773 if (!server
.sort_alpha
) {
6774 /* Numeric sorting. Here it's trivial as we precomputed scores */
6775 if (so1
->u
.score
> so2
->u
.score
) {
6777 } else if (so1
->u
.score
< so2
->u
.score
) {
6783 /* Alphanumeric sorting */
6784 if (server
.sort_bypattern
) {
6785 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6786 /* At least one compare object is NULL */
6787 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6789 else if (so1
->u
.cmpobj
== NULL
)
6794 /* We have both the objects, use strcoll */
6795 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6798 /* Compare elements directly. */
6799 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6802 return server
.sort_desc
? -cmp
: cmp
;
6805 /* The SORT command is the most complex command in Redis. Warning: this code
6806 * is optimized for speed and a bit less for readability */
6807 static void sortCommand(redisClient
*c
) {
6810 int desc
= 0, alpha
= 0;
6811 int limit_start
= 0, limit_count
= -1, start
, end
;
6812 int j
, dontsort
= 0, vectorlen
;
6813 int getop
= 0; /* GET operation counter */
6814 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6815 redisSortObject
*vector
; /* Resulting vector to sort */
6817 /* Lookup the key to sort. It must be of the right types */
6818 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6819 if (sortval
== NULL
) {
6820 addReply(c
,shared
.emptymultibulk
);
6823 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6824 sortval
->type
!= REDIS_ZSET
)
6826 addReply(c
,shared
.wrongtypeerr
);
6830 /* Create a list of operations to perform for every sorted element.
6831 * Operations can be GET/DEL/INCR/DECR */
6832 operations
= listCreate();
6833 listSetFreeMethod(operations
,zfree
);
6836 /* Now we need to protect sortval incrementing its count, in the future
6837 * SORT may have options able to overwrite/delete keys during the sorting
6838 * and the sorted key itself may get destroied */
6839 incrRefCount(sortval
);
6841 /* The SORT command has an SQL-alike syntax, parse it */
6842 while(j
< c
->argc
) {
6843 int leftargs
= c
->argc
-j
-1;
6844 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6846 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6848 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6850 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6851 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6852 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6854 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6855 storekey
= c
->argv
[j
+1];
6857 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6858 sortby
= c
->argv
[j
+1];
6859 /* If the BY pattern does not contain '*', i.e. it is constant,
6860 * we don't need to sort nor to lookup the weight keys. */
6861 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6863 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6864 listAddNodeTail(operations
,createSortOperation(
6865 REDIS_SORT_GET
,c
->argv
[j
+1]));
6869 decrRefCount(sortval
);
6870 listRelease(operations
);
6871 addReply(c
,shared
.syntaxerr
);
6877 /* Load the sorting vector with all the objects to sort */
6878 switch(sortval
->type
) {
6879 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6880 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6881 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6882 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
6884 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6887 if (sortval
->type
== REDIS_LIST
) {
6888 list
*list
= sortval
->ptr
;
6892 listRewind(list
,&li
);
6893 while((ln
= listNext(&li
))) {
6894 robj
*ele
= ln
->value
;
6895 vector
[j
].obj
= ele
;
6896 vector
[j
].u
.score
= 0;
6897 vector
[j
].u
.cmpobj
= NULL
;
6905 if (sortval
->type
== REDIS_SET
) {
6908 zset
*zs
= sortval
->ptr
;
6912 di
= dictGetIterator(set
);
6913 while((setele
= dictNext(di
)) != NULL
) {
6914 vector
[j
].obj
= dictGetEntryKey(setele
);
6915 vector
[j
].u
.score
= 0;
6916 vector
[j
].u
.cmpobj
= NULL
;
6919 dictReleaseIterator(di
);
6921 redisAssert(j
== vectorlen
);
6923 /* Now it's time to load the right scores in the sorting vector */
6924 if (dontsort
== 0) {
6925 for (j
= 0; j
< vectorlen
; j
++) {
6928 /* lookup value to sort by */
6929 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6930 if (!byval
) continue;
6932 /* use object itself to sort by */
6933 byval
= vector
[j
].obj
;
6937 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6939 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6940 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6941 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
6942 /* Don't need to decode the object if it's
6943 * integer-encoded (the only encoding supported) so
6944 * far. We can just cast it */
6945 vector
[j
].u
.score
= (long)byval
->ptr
;
6947 redisAssert(1 != 1);
6951 /* when the object was retrieved using lookupKeyByPattern,
6952 * its refcount needs to be decreased. */
6954 decrRefCount(byval
);
6959 /* We are ready to sort the vector... perform a bit of sanity check
6960 * on the LIMIT option too. We'll use a partial version of quicksort. */
6961 start
= (limit_start
< 0) ? 0 : limit_start
;
6962 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6963 if (start
>= vectorlen
) {
6964 start
= vectorlen
-1;
6967 if (end
>= vectorlen
) end
= vectorlen
-1;
6969 if (dontsort
== 0) {
6970 server
.sort_desc
= desc
;
6971 server
.sort_alpha
= alpha
;
6972 server
.sort_bypattern
= sortby
? 1 : 0;
6973 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6974 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6976 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6979 /* Send command output to the output buffer, performing the specified
6980 * GET/DEL/INCR/DECR operations if any. */
6981 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6982 if (storekey
== NULL
) {
6983 /* STORE option not specified, sent the sorting result to client */
6984 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6985 for (j
= start
; j
<= end
; j
++) {
6989 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6990 listRewind(operations
,&li
);
6991 while((ln
= listNext(&li
))) {
6992 redisSortOperation
*sop
= ln
->value
;
6993 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6996 if (sop
->type
== REDIS_SORT_GET
) {
6998 addReply(c
,shared
.nullbulk
);
7000 addReplyBulk(c
,val
);
7004 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7009 robj
*listObject
= createListObject();
7010 list
*listPtr
= (list
*) listObject
->ptr
;
7012 /* STORE option specified, set the sorting result as a List object */
7013 for (j
= start
; j
<= end
; j
++) {
7018 listAddNodeTail(listPtr
,vector
[j
].obj
);
7019 incrRefCount(vector
[j
].obj
);
7021 listRewind(operations
,&li
);
7022 while((ln
= listNext(&li
))) {
7023 redisSortOperation
*sop
= ln
->value
;
7024 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7027 if (sop
->type
== REDIS_SORT_GET
) {
7029 listAddNodeTail(listPtr
,createStringObject("",0));
7031 /* We should do a incrRefCount on val because it is
7032 * added to the list, but also a decrRefCount because
7033 * it is returned by lookupKeyByPattern. This results
7034 * in doing nothing at all. */
7035 listAddNodeTail(listPtr
,val
);
7038 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7042 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7043 incrRefCount(storekey
);
7045 /* Note: we add 1 because the DB is dirty anyway since even if the
7046 * SORT result is empty a new key is set and maybe the old content
7048 server
.dirty
+= 1+outputlen
;
7049 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7053 decrRefCount(sortval
);
7054 listRelease(operations
);
7055 for (j
= 0; j
< vectorlen
; j
++) {
7056 if (alpha
&& vector
[j
].u
.cmpobj
)
7057 decrRefCount(vector
[j
].u
.cmpobj
);
7062 /* Convert an amount of bytes into a human readable string in the form
7063 * of 100B, 2G, 100M, 4K, and so forth. */
7064 static void bytesToHuman(char *s
, unsigned long long n
) {
7069 sprintf(s
,"%lluB",n
);
7071 } else if (n
< (1024*1024)) {
7072 d
= (double)n
/(1024);
7073 sprintf(s
,"%.2fK",d
);
7074 } else if (n
< (1024LL*1024*1024)) {
7075 d
= (double)n
/(1024*1024);
7076 sprintf(s
,"%.2fM",d
);
7077 } else if (n
< (1024LL*1024*1024*1024)) {
7078 d
= (double)n
/(1024LL*1024*1024);
7079 sprintf(s
,"%.2fG",d
);
7083 /* Create the string returned by the INFO command. This is decoupled
7084 * by the INFO command itself as we need to report the same information
7085 * on memory corruption problems. */
7086 static sds
genRedisInfoString(void) {
7088 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7092 bytesToHuman(hmem
,zmalloc_used_memory());
7093 info
= sdscatprintf(sdsempty(),
7094 "redis_version:%s\r\n"
7096 "multiplexing_api:%s\r\n"
7097 "process_id:%ld\r\n"
7098 "uptime_in_seconds:%ld\r\n"
7099 "uptime_in_days:%ld\r\n"
7100 "connected_clients:%d\r\n"
7101 "connected_slaves:%d\r\n"
7102 "blocked_clients:%d\r\n"
7103 "used_memory:%zu\r\n"
7104 "used_memory_human:%s\r\n"
7105 "changes_since_last_save:%lld\r\n"
7106 "bgsave_in_progress:%d\r\n"
7107 "last_save_time:%ld\r\n"
7108 "bgrewriteaof_in_progress:%d\r\n"
7109 "total_connections_received:%lld\r\n"
7110 "total_commands_processed:%lld\r\n"
7111 "expired_keys:%lld\r\n"
7112 "hash_max_zipmap_entries:%ld\r\n"
7113 "hash_max_zipmap_value:%ld\r\n"
7114 "pubsub_channels:%ld\r\n"
7115 "pubsub_patterns:%u\r\n"
7119 (sizeof(long) == 8) ? "64" : "32",
7124 listLength(server
.clients
)-listLength(server
.slaves
),
7125 listLength(server
.slaves
),
7126 server
.blpop_blocked_clients
,
7127 zmalloc_used_memory(),
7130 server
.bgsavechildpid
!= -1,
7132 server
.bgrewritechildpid
!= -1,
7133 server
.stat_numconnections
,
7134 server
.stat_numcommands
,
7135 server
.stat_expiredkeys
,
7136 server
.hash_max_zipmap_entries
,
7137 server
.hash_max_zipmap_value
,
7138 dictSize(server
.pubsub_channels
),
7139 listLength(server
.pubsub_patterns
),
7140 server
.vm_enabled
!= 0,
7141 server
.masterhost
== NULL
? "master" : "slave"
7143 if (server
.masterhost
) {
7144 info
= sdscatprintf(info
,
7145 "master_host:%s\r\n"
7146 "master_port:%d\r\n"
7147 "master_link_status:%s\r\n"
7148 "master_last_io_seconds_ago:%d\r\n"
7151 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7153 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7156 if (server
.vm_enabled
) {
7158 info
= sdscatprintf(info
,
7159 "vm_conf_max_memory:%llu\r\n"
7160 "vm_conf_page_size:%llu\r\n"
7161 "vm_conf_pages:%llu\r\n"
7162 "vm_stats_used_pages:%llu\r\n"
7163 "vm_stats_swapped_objects:%llu\r\n"
7164 "vm_stats_swappin_count:%llu\r\n"
7165 "vm_stats_swappout_count:%llu\r\n"
7166 "vm_stats_io_newjobs_len:%lu\r\n"
7167 "vm_stats_io_processing_len:%lu\r\n"
7168 "vm_stats_io_processed_len:%lu\r\n"
7169 "vm_stats_io_active_threads:%lu\r\n"
7170 "vm_stats_blocked_clients:%lu\r\n"
7171 ,(unsigned long long) server
.vm_max_memory
,
7172 (unsigned long long) server
.vm_page_size
,
7173 (unsigned long long) server
.vm_pages
,
7174 (unsigned long long) server
.vm_stats_used_pages
,
7175 (unsigned long long) server
.vm_stats_swapped_objects
,
7176 (unsigned long long) server
.vm_stats_swapins
,
7177 (unsigned long long) server
.vm_stats_swapouts
,
7178 (unsigned long) listLength(server
.io_newjobs
),
7179 (unsigned long) listLength(server
.io_processing
),
7180 (unsigned long) listLength(server
.io_processed
),
7181 (unsigned long) server
.io_active_threads
,
7182 (unsigned long) server
.vm_blocked_clients
7186 for (j
= 0; j
< server
.dbnum
; j
++) {
7187 long long keys
, vkeys
;
7189 keys
= dictSize(server
.db
[j
].dict
);
7190 vkeys
= dictSize(server
.db
[j
].expires
);
7191 if (keys
|| vkeys
) {
7192 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7199 static void infoCommand(redisClient
*c
) {
7200 sds info
= genRedisInfoString();
7201 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7202 (unsigned long)sdslen(info
)));
7203 addReplySds(c
,info
);
7204 addReply(c
,shared
.crlf
);
7207 static void monitorCommand(redisClient
*c
) {
7208 /* ignore MONITOR if aleady slave or in monitor mode */
7209 if (c
->flags
& REDIS_SLAVE
) return;
7211 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7213 listAddNodeTail(server
.monitors
,c
);
7214 addReply(c
,shared
.ok
);
7217 /* ================================= Expire ================================= */
7218 static int removeExpire(redisDb
*db
, robj
*key
) {
7219 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7226 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7227 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7235 /* Return the expire time of the specified key, or -1 if no expire
7236 * is associated with this key (i.e. the key is non volatile) */
7237 static time_t getExpire(redisDb
*db
, robj
*key
) {
7240 /* No expire? return ASAP */
7241 if (dictSize(db
->expires
) == 0 ||
7242 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7244 return (time_t) dictGetEntryVal(de
);
7247 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7251 /* No expire? return ASAP */
7252 if (dictSize(db
->expires
) == 0 ||
7253 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7255 /* Lookup the expire */
7256 when
= (time_t) dictGetEntryVal(de
);
7257 if (time(NULL
) <= when
) return 0;
7259 /* Delete the key */
7260 dictDelete(db
->expires
,key
);
7261 server
.stat_expiredkeys
++;
7262 return dictDelete(db
->dict
,key
) == DICT_OK
;
7265 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7268 /* No expire? return ASAP */
7269 if (dictSize(db
->expires
) == 0 ||
7270 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7272 /* Delete the key */
7274 server
.stat_expiredkeys
++;
7275 dictDelete(db
->expires
,key
);
7276 return dictDelete(db
->dict
,key
) == DICT_OK
;
7279 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7283 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7287 de
= dictFind(c
->db
->dict
,key
);
7289 addReply(c
,shared
.czero
);
7293 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7294 addReply(c
, shared
.cone
);
7297 time_t when
= time(NULL
)+seconds
;
7298 if (setExpire(c
->db
,key
,when
)) {
7299 addReply(c
,shared
.cone
);
7302 addReply(c
,shared
.czero
);
7308 static void expireCommand(redisClient
*c
) {
7309 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7312 static void expireatCommand(redisClient
*c
) {
7313 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7316 static void ttlCommand(redisClient
*c
) {
7320 expire
= getExpire(c
->db
,c
->argv
[1]);
7322 ttl
= (int) (expire
-time(NULL
));
7323 if (ttl
< 0) ttl
= -1;
7325 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7328 /* ================================ MULTI/EXEC ============================== */
7330 /* Client state initialization for MULTI/EXEC */
7331 static void initClientMultiState(redisClient
*c
) {
7332 c
->mstate
.commands
= NULL
;
7333 c
->mstate
.count
= 0;
7336 /* Release all the resources associated with MULTI/EXEC state */
7337 static void freeClientMultiState(redisClient
*c
) {
7340 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7342 multiCmd
*mc
= c
->mstate
.commands
+j
;
7344 for (i
= 0; i
< mc
->argc
; i
++)
7345 decrRefCount(mc
->argv
[i
]);
7348 zfree(c
->mstate
.commands
);
7351 /* Add a new command into the MULTI commands queue */
7352 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7356 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7357 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7358 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7361 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7362 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7363 for (j
= 0; j
< c
->argc
; j
++)
7364 incrRefCount(mc
->argv
[j
]);
7368 static void multiCommand(redisClient
*c
) {
7369 c
->flags
|= REDIS_MULTI
;
7370 addReply(c
,shared
.ok
);
7373 static void discardCommand(redisClient
*c
) {
7374 if (!(c
->flags
& REDIS_MULTI
)) {
7375 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7379 freeClientMultiState(c
);
7380 initClientMultiState(c
);
7381 c
->flags
&= (~REDIS_MULTI
);
7382 addReply(c
,shared
.ok
);
7385 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7386 * implememntation for more information. */
7387 static void execCommandReplicateMulti(redisClient
*c
) {
7388 struct redisCommand
*cmd
;
7389 robj
*multistring
= createStringObject("MULTI",5);
7391 cmd
= lookupCommand("multi");
7392 if (server
.appendonly
)
7393 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7394 if (listLength(server
.slaves
))
7395 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7396 decrRefCount(multistring
);
7399 static void execCommand(redisClient
*c
) {
7404 if (!(c
->flags
& REDIS_MULTI
)) {
7405 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7409 /* Replicate a MULTI request now that we are sure the block is executed.
7410 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7411 * both the AOF and the replication link will have the same consistency
7412 * and atomicity guarantees. */
7413 execCommandReplicateMulti(c
);
7415 /* Exec all the queued commands */
7416 orig_argv
= c
->argv
;
7417 orig_argc
= c
->argc
;
7418 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7419 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7420 c
->argc
= c
->mstate
.commands
[j
].argc
;
7421 c
->argv
= c
->mstate
.commands
[j
].argv
;
7422 call(c
,c
->mstate
.commands
[j
].cmd
);
7424 c
->argv
= orig_argv
;
7425 c
->argc
= orig_argc
;
7426 freeClientMultiState(c
);
7427 initClientMultiState(c
);
7428 c
->flags
&= (~REDIS_MULTI
);
7429 /* Make sure the EXEC command is always replicated / AOF, since we
7430 * always send the MULTI command (we can't know beforehand if the
7431 * next operations will contain at least a modification to the DB). */
7435 /* =========================== Blocking Operations ========================= */
7437 /* Currently Redis blocking operations support is limited to list POP ops,
7438 * so the current implementation is not fully generic, but it is also not
7439 * completely specific so it will not require a rewrite to support new
7440 * kind of blocking operations in the future.
7442 * Still it's important to note that list blocking operations can be already
7443 * used as a notification mechanism in order to implement other blocking
7444 * operations at application level, so there must be a very strong evidence
7445 * of usefulness and generality before new blocking operations are implemented.
7447 * This is how the current blocking POP works, we use BLPOP as example:
7448 * - If the user calls BLPOP and the key exists and contains a non empty list
7449 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7450 * if there is not to block.
7451 * - If instead BLPOP is called and the key does not exists or the list is
7452 * empty we need to block. In order to do so we remove the notification for
7453 * new data to read in the client socket (so that we'll not serve new
7454 * requests if the blocking request is not served). Also we put the client
7455 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7456 * blocking for this keys.
7457 * - If a PUSH operation against a key with blocked clients waiting is
7458 * performed, we serve the first in the list: basically instead to push
7459 * the new element inside the list we return it to the (first / oldest)
7460 * blocking client, unblock the client, and remove it form the list.
7462 * The above comment and the source code should be enough in order to understand
7463 * the implementation and modify / fix it later.
7466 /* Set a client in blocking mode for the specified key, with the specified
7468 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7473 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7474 c
->blockingkeysnum
= numkeys
;
7475 c
->blockingto
= timeout
;
7476 for (j
= 0; j
< numkeys
; j
++) {
7477 /* Add the key in the client structure, to map clients -> keys */
7478 c
->blockingkeys
[j
] = keys
[j
];
7479 incrRefCount(keys
[j
]);
7481 /* And in the other "side", to map keys -> clients */
7482 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7486 /* For every key we take a list of clients blocked for it */
7488 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7489 incrRefCount(keys
[j
]);
7490 assert(retval
== DICT_OK
);
7492 l
= dictGetEntryVal(de
);
7494 listAddNodeTail(l
,c
);
7496 /* Mark the client as a blocked client */
7497 c
->flags
|= REDIS_BLOCKED
;
7498 server
.blpop_blocked_clients
++;
7501 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7502 static void unblockClientWaitingData(redisClient
*c
) {
7507 assert(c
->blockingkeys
!= NULL
);
7508 /* The client may wait for multiple keys, so unblock it for every key. */
7509 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7510 /* Remove this client from the list of clients waiting for this key. */
7511 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7513 l
= dictGetEntryVal(de
);
7514 listDelNode(l
,listSearchKey(l
,c
));
7515 /* If the list is empty we need to remove it to avoid wasting memory */
7516 if (listLength(l
) == 0)
7517 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7518 decrRefCount(c
->blockingkeys
[j
]);
7520 /* Cleanup the client structure */
7521 zfree(c
->blockingkeys
);
7522 c
->blockingkeys
= NULL
;
7523 c
->flags
&= (~REDIS_BLOCKED
);
7524 server
.blpop_blocked_clients
--;
7525 /* We want to process data if there is some command waiting
7526 * in the input buffer. Note that this is safe even if
7527 * unblockClientWaitingData() gets called from freeClient() because
7528 * freeClient() will be smart enough to call this function
7529 * *after* c->querybuf was set to NULL. */
7530 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7533 /* This should be called from any function PUSHing into lists.
7534 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7535 * 'ele' is the element pushed.
7537 * If the function returns 0 there was no client waiting for a list push
7540 * If the function returns 1 there was a client waiting for a list push
7541 * against this key, the element was passed to this client thus it's not
7542 * needed to actually add it to the list and the caller should return asap. */
7543 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7544 struct dictEntry
*de
;
7545 redisClient
*receiver
;
7549 de
= dictFind(c
->db
->blockingkeys
,key
);
7550 if (de
== NULL
) return 0;
7551 l
= dictGetEntryVal(de
);
7554 receiver
= ln
->value
;
7556 addReplySds(receiver
,sdsnew("*2\r\n"));
7557 addReplyBulk(receiver
,key
);
7558 addReplyBulk(receiver
,ele
);
7559 unblockClientWaitingData(receiver
);
7563 /* Blocking RPOP/LPOP */
7564 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7569 for (j
= 1; j
< c
->argc
-1; j
++) {
7570 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7572 if (o
->type
!= REDIS_LIST
) {
7573 addReply(c
,shared
.wrongtypeerr
);
7576 list
*list
= o
->ptr
;
7577 if (listLength(list
) != 0) {
7578 /* If the list contains elements fall back to the usual
7579 * non-blocking POP operation */
7580 robj
*argv
[2], **orig_argv
;
7583 /* We need to alter the command arguments before to call
7584 * popGenericCommand() as the command takes a single key. */
7585 orig_argv
= c
->argv
;
7586 orig_argc
= c
->argc
;
7587 argv
[1] = c
->argv
[j
];
7591 /* Also the return value is different, we need to output
7592 * the multi bulk reply header and the key name. The
7593 * "real" command will add the last element (the value)
7594 * for us. If this souds like an hack to you it's just
7595 * because it is... */
7596 addReplySds(c
,sdsnew("*2\r\n"));
7597 addReplyBulk(c
,argv
[1]);
7598 popGenericCommand(c
,where
);
7600 /* Fix the client structure with the original stuff */
7601 c
->argv
= orig_argv
;
7602 c
->argc
= orig_argc
;
7608 /* If the list is empty or the key does not exists we must block */
7609 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7610 if (timeout
> 0) timeout
+= time(NULL
);
7611 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7614 static void blpopCommand(redisClient
*c
) {
7615 blockingPopGenericCommand(c
,REDIS_HEAD
);
7618 static void brpopCommand(redisClient
*c
) {
7619 blockingPopGenericCommand(c
,REDIS_TAIL
);
7622 /* =============================== Replication ============================= */
7624 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7625 ssize_t nwritten
, ret
= size
;
7626 time_t start
= time(NULL
);
7630 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7631 nwritten
= write(fd
,ptr
,size
);
7632 if (nwritten
== -1) return -1;
7636 if ((time(NULL
)-start
) > timeout
) {
7644 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7645 ssize_t nread
, totread
= 0;
7646 time_t start
= time(NULL
);
7650 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7651 nread
= read(fd
,ptr
,size
);
7652 if (nread
== -1) return -1;
7657 if ((time(NULL
)-start
) > timeout
) {
7665 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7672 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7675 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7686 static void syncCommand(redisClient
*c
) {
7687 /* ignore SYNC if aleady slave or in monitor mode */
7688 if (c
->flags
& REDIS_SLAVE
) return;
7690 /* SYNC can't be issued when the server has pending data to send to
7691 * the client about already issued commands. We need a fresh reply
7692 * buffer registering the differences between the BGSAVE and the current
7693 * dataset, so that we can copy to other slaves if needed. */
7694 if (listLength(c
->reply
) != 0) {
7695 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7699 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7700 /* Here we need to check if there is a background saving operation
7701 * in progress, or if it is required to start one */
7702 if (server
.bgsavechildpid
!= -1) {
7703 /* Ok a background save is in progress. Let's check if it is a good
7704 * one for replication, i.e. if there is another slave that is
7705 * registering differences since the server forked to save */
7710 listRewind(server
.slaves
,&li
);
7711 while((ln
= listNext(&li
))) {
7713 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7716 /* Perfect, the server is already registering differences for
7717 * another slave. Set the right state, and copy the buffer. */
7718 listRelease(c
->reply
);
7719 c
->reply
= listDup(slave
->reply
);
7720 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7721 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7723 /* No way, we need to wait for the next BGSAVE in order to
7724 * register differences */
7725 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7726 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7729 /* Ok we don't have a BGSAVE in progress, let's start one */
7730 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7731 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7732 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7733 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7736 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7739 c
->flags
|= REDIS_SLAVE
;
7741 listAddNodeTail(server
.slaves
,c
);
7745 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7746 redisClient
*slave
= privdata
;
7748 REDIS_NOTUSED(mask
);
7749 char buf
[REDIS_IOBUF_LEN
];
7750 ssize_t nwritten
, buflen
;
7752 if (slave
->repldboff
== 0) {
7753 /* Write the bulk write count before to transfer the DB. In theory here
7754 * we don't know how much room there is in the output buffer of the
7755 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7756 * operations) will never be smaller than the few bytes we need. */
7759 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7761 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7769 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7770 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7772 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7773 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7777 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7778 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7783 slave
->repldboff
+= nwritten
;
7784 if (slave
->repldboff
== slave
->repldbsize
) {
7785 close(slave
->repldbfd
);
7786 slave
->repldbfd
= -1;
7787 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7788 slave
->replstate
= REDIS_REPL_ONLINE
;
7789 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7790 sendReplyToClient
, slave
) == AE_ERR
) {
7794 addReplySds(slave
,sdsempty());
7795 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7799 /* This function is called at the end of every backgrond saving.
7800 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7801 * otherwise REDIS_ERR is passed to the function.
7803 * The goal of this function is to handle slaves waiting for a successful
7804 * background saving in order to perform non-blocking synchronization. */
7805 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7807 int startbgsave
= 0;
7810 listRewind(server
.slaves
,&li
);
7811 while((ln
= listNext(&li
))) {
7812 redisClient
*slave
= ln
->value
;
7814 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7816 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7817 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7818 struct redis_stat buf
;
7820 if (bgsaveerr
!= REDIS_OK
) {
7822 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7825 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7826 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7828 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7831 slave
->repldboff
= 0;
7832 slave
->repldbsize
= buf
.st_size
;
7833 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7834 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7835 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7842 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7845 listRewind(server
.slaves
,&li
);
7846 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7847 while((ln
= listNext(&li
))) {
7848 redisClient
*slave
= ln
->value
;
7850 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7857 static int syncWithMaster(void) {
7858 char buf
[1024], tmpfile
[256], authcmd
[1024];
7860 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7861 int dfd
, maxtries
= 5;
7864 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7869 /* AUTH with the master if required. */
7870 if(server
.masterauth
) {
7871 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7872 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7874 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7878 /* Read the AUTH result. */
7879 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7881 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7885 if (buf
[0] != '+') {
7887 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7892 /* Issue the SYNC command */
7893 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7895 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7899 /* Read the bulk write count */
7900 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7902 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7906 if (buf
[0] != '$') {
7908 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7911 dumpsize
= strtol(buf
+1,NULL
,10);
7912 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7913 /* Read the bulk write data on a temp file */
7915 snprintf(tmpfile
,256,
7916 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7917 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7918 if (dfd
!= -1) break;
7923 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7927 int nread
, nwritten
;
7929 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7931 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7937 nwritten
= write(dfd
,buf
,nread
);
7938 if (nwritten
== -1) {
7939 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7947 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7948 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7954 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7955 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7959 server
.master
= createClient(fd
);
7960 server
.master
->flags
|= REDIS_MASTER
;
7961 server
.master
->authenticated
= 1;
7962 server
.replstate
= REDIS_REPL_CONNECTED
;
7966 static void slaveofCommand(redisClient
*c
) {
7967 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7968 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7969 if (server
.masterhost
) {
7970 sdsfree(server
.masterhost
);
7971 server
.masterhost
= NULL
;
7972 if (server
.master
) freeClient(server
.master
);
7973 server
.replstate
= REDIS_REPL_NONE
;
7974 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7977 sdsfree(server
.masterhost
);
7978 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7979 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7980 if (server
.master
) freeClient(server
.master
);
7981 server
.replstate
= REDIS_REPL_CONNECT
;
7982 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7983 server
.masterhost
, server
.masterport
);
7985 addReply(c
,shared
.ok
);
7988 /* ============================ Maxmemory directive ======================== */
7990 /* Try to free one object form the pre-allocated objects free list.
7991 * This is useful under low mem conditions as by default we take 1 million
7992 * free objects allocated. On success REDIS_OK is returned, otherwise
7994 static int tryFreeOneObjectFromFreelist(void) {
7997 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7998 if (listLength(server
.objfreelist
)) {
7999 listNode
*head
= listFirst(server
.objfreelist
);
8000 o
= listNodeValue(head
);
8001 listDelNode(server
.objfreelist
,head
);
8002 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8006 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8011 /* This function gets called when 'maxmemory' is set on the config file to limit
8012 * the max memory used by the server, and we are out of memory.
8013 * This function will try to, in order:
8015 * - Free objects from the free list
8016 * - Try to remove keys with an EXPIRE set
8018 * It is not possible to free enough memory to reach used-memory < maxmemory
8019 * the server will start refusing commands that will enlarge even more the
8022 static void freeMemoryIfNeeded(void) {
8023 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8024 int j
, k
, freed
= 0;
8026 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8027 for (j
= 0; j
< server
.dbnum
; j
++) {
8029 robj
*minkey
= NULL
;
8030 struct dictEntry
*de
;
8032 if (dictSize(server
.db
[j
].expires
)) {
8034 /* From a sample of three keys drop the one nearest to
8035 * the natural expire */
8036 for (k
= 0; k
< 3; k
++) {
8039 de
= dictGetRandomKey(server
.db
[j
].expires
);
8040 t
= (time_t) dictGetEntryVal(de
);
8041 if (minttl
== -1 || t
< minttl
) {
8042 minkey
= dictGetEntryKey(de
);
8046 deleteKey(server
.db
+j
,minkey
);
8049 if (!freed
) return; /* nothing to free... */
8053 /* ============================== Append Only file ========================== */
8055 /* Write the append only file buffer on disk.
8057 * Since we are required to write the AOF before replying to the client,
8058 * and the only way the client socket can get a write is entering when the
8059 * the event loop, we accumulate all the AOF writes in a memory
8060 * buffer and write it on disk using this function just before entering
8061 * the event loop again. */
8062 static void flushAppendOnlyFile(void) {
8066 if (sdslen(server
.aofbuf
) == 0) return;
8068 /* We want to perform a single write. This should be guaranteed atomic
8069 * at least if the filesystem we are writing is a real physical one.
8070 * While this will save us against the server being killed I don't think
8071 * there is much to do about the whole server stopping for power problems
8073 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8074 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8075 /* Ooops, we are in troubles. The best thing to do for now is
8076 * aborting instead of giving the illusion that everything is
8077 * working as expected. */
8078 if (nwritten
== -1) {
8079 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8081 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8085 sdsfree(server
.aofbuf
);
8086 server
.aofbuf
= sdsempty();
8088 /* Fsync if needed */
8090 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8091 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8092 now
-server
.lastfsync
> 1))
8094 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8095 * flushing metadata. */
8096 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8097 server
.lastfsync
= now
;
8101 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8102 sds buf
= sdsempty();
8106 /* The DB this command was targetting is not the same as the last command
8107 * we appendend. To issue a SELECT command is needed. */
8108 if (dictid
!= server
.appendseldb
) {
8111 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8112 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8113 (unsigned long)strlen(seldb
),seldb
);
8114 server
.appendseldb
= dictid
;
8117 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
8118 * EXPIREs into EXPIREATs calls */
8119 if (cmd
->proc
== expireCommand
) {
8122 tmpargv
[0] = createStringObject("EXPIREAT",8);
8123 tmpargv
[1] = argv
[1];
8124 incrRefCount(argv
[1]);
8125 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
8126 tmpargv
[2] = createObject(REDIS_STRING
,
8127 sdscatprintf(sdsempty(),"%ld",when
));
8131 /* Append the actual command */
8132 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8133 for (j
= 0; j
< argc
; j
++) {
8136 o
= getDecodedObject(o
);
8137 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8138 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8139 buf
= sdscatlen(buf
,"\r\n",2);
8143 /* Free the objects from the modified argv for EXPIREAT */
8144 if (cmd
->proc
== expireCommand
) {
8145 for (j
= 0; j
< 3; j
++)
8146 decrRefCount(argv
[j
]);
8149 /* Append to the AOF buffer. This will be flushed on disk just before
8150 * of re-entering the event loop, so before the client will get a
8151 * positive reply about the operation performed. */
8152 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8154 /* If a background append only file rewriting is in progress we want to
8155 * accumulate the differences between the child DB and the current one
8156 * in a buffer, so that when the child process will do its work we
8157 * can append the differences to the new append only file. */
8158 if (server
.bgrewritechildpid
!= -1)
8159 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8164 /* In Redis commands are always executed in the context of a client, so in
8165 * order to load the append only file we need to create a fake client. */
8166 static struct redisClient
*createFakeClient(void) {
8167 struct redisClient
*c
= zmalloc(sizeof(*c
));
8171 c
->querybuf
= sdsempty();
8175 /* We set the fake client as a slave waiting for the synchronization
8176 * so that Redis will not try to send replies to this client. */
8177 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8178 c
->reply
= listCreate();
8179 listSetFreeMethod(c
->reply
,decrRefCount
);
8180 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8181 initClientMultiState(c
);
8185 static void freeFakeClient(struct redisClient
*c
) {
8186 sdsfree(c
->querybuf
);
8187 listRelease(c
->reply
);
8188 freeClientMultiState(c
);
8192 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8193 * error (the append only file is zero-length) REDIS_ERR is returned. On
8194 * fatal error an error message is logged and the program exists. */
8195 int loadAppendOnlyFile(char *filename
) {
8196 struct redisClient
*fakeClient
;
8197 FILE *fp
= fopen(filename
,"r");
8198 struct redis_stat sb
;
8199 unsigned long long loadedkeys
= 0;
8200 int appendonly
= server
.appendonly
;
8202 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8206 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8210 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8211 * to the same file we're about to read. */
8212 server
.appendonly
= 0;
8214 fakeClient
= createFakeClient();
8221 struct redisCommand
*cmd
;
8223 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8229 if (buf
[0] != '*') goto fmterr
;
8231 argv
= zmalloc(sizeof(robj
*)*argc
);
8232 for (j
= 0; j
< argc
; j
++) {
8233 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8234 if (buf
[0] != '$') goto fmterr
;
8235 len
= strtol(buf
+1,NULL
,10);
8236 argsds
= sdsnewlen(NULL
,len
);
8237 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8238 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8239 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8242 /* Command lookup */
8243 cmd
= lookupCommand(argv
[0]->ptr
);
8245 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8248 /* Try object encoding */
8249 if (cmd
->flags
& REDIS_CMD_BULK
)
8250 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8251 /* Run the command in the context of a fake client */
8252 fakeClient
->argc
= argc
;
8253 fakeClient
->argv
= argv
;
8254 cmd
->proc(fakeClient
);
8255 /* Discard the reply objects list from the fake client */
8256 while(listLength(fakeClient
->reply
))
8257 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8258 /* Clean up, ready for the next command */
8259 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8261 /* Handle swapping while loading big datasets when VM is on */
8263 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8264 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8265 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8270 /* This point can only be reached when EOF is reached without errors.
8271 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8272 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8275 freeFakeClient(fakeClient
);
8276 server
.appendonly
= appendonly
;
8281 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8283 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8287 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8291 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8292 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8296 /* Avoid the incr/decr ref count business if possible to help
8297 * copy-on-write (we are often in a child process when this function
8299 * Also makes sure that key objects don't get incrRefCount-ed when VM
8301 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8302 obj
= getDecodedObject(obj
);
8305 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8306 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8307 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8309 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8310 if (decrrc
) decrRefCount(obj
);
8313 if (decrrc
) decrRefCount(obj
);
8317 /* Write binary-safe string into a file in the bulkformat
8318 * $<count>\r\n<payload>\r\n */
8319 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8322 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8323 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8324 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8325 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8329 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8330 static int fwriteBulkDouble(FILE *fp
, double d
) {
8331 char buf
[128], dbuf
[128];
8333 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8334 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8335 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8336 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8340 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8341 static int fwriteBulkLong(FILE *fp
, long l
) {
8342 char buf
[128], lbuf
[128];
8344 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8345 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8346 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8347 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8351 /* Write a sequence of commands able to fully rebuild the dataset into
8352 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8353 static int rewriteAppendOnlyFile(char *filename
) {
8354 dictIterator
*di
= NULL
;
8359 time_t now
= time(NULL
);
8361 /* Note that we have to use a different temp name here compared to the
8362 * one used by rewriteAppendOnlyFileBackground() function. */
8363 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8364 fp
= fopen(tmpfile
,"w");
8366 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8369 for (j
= 0; j
< server
.dbnum
; j
++) {
8370 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8371 redisDb
*db
= server
.db
+j
;
8373 if (dictSize(d
) == 0) continue;
8374 di
= dictGetIterator(d
);
8380 /* SELECT the new DB */
8381 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8382 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8384 /* Iterate this DB writing every entry */
8385 while((de
= dictNext(di
)) != NULL
) {
8390 key
= dictGetEntryKey(de
);
8391 /* If the value for this key is swapped, load a preview in memory.
8392 * We use a "swapped" flag to remember if we need to free the
8393 * value object instead to just increment the ref count anyway
8394 * in order to avoid copy-on-write of pages if we are forked() */
8395 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8396 key
->storage
== REDIS_VM_SWAPPING
) {
8397 o
= dictGetEntryVal(de
);
8400 o
= vmPreviewObject(key
);
8403 expiretime
= getExpire(db
,key
);
8405 /* Save the key and associated value */
8406 if (o
->type
== REDIS_STRING
) {
8407 /* Emit a SET command */
8408 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8409 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8411 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8412 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8413 } else if (o
->type
== REDIS_LIST
) {
8414 /* Emit the RPUSHes needed to rebuild the list */
8415 list
*list
= o
->ptr
;
8419 listRewind(list
,&li
);
8420 while((ln
= listNext(&li
))) {
8421 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8422 robj
*eleobj
= listNodeValue(ln
);
8424 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8425 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8426 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8428 } else if (o
->type
== REDIS_SET
) {
8429 /* Emit the SADDs needed to rebuild the set */
8431 dictIterator
*di
= dictGetIterator(set
);
8434 while((de
= dictNext(di
)) != NULL
) {
8435 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8436 robj
*eleobj
= dictGetEntryKey(de
);
8438 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8439 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8440 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8442 dictReleaseIterator(di
);
8443 } else if (o
->type
== REDIS_ZSET
) {
8444 /* Emit the ZADDs needed to rebuild the sorted set */
8446 dictIterator
*di
= dictGetIterator(zs
->dict
);
8449 while((de
= dictNext(di
)) != NULL
) {
8450 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8451 robj
*eleobj
= dictGetEntryKey(de
);
8452 double *score
= dictGetEntryVal(de
);
8454 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8455 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8456 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8457 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8459 dictReleaseIterator(di
);
8460 } else if (o
->type
== REDIS_HASH
) {
8461 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8463 /* Emit the HSETs needed to rebuild the hash */
8464 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8465 unsigned char *p
= zipmapRewind(o
->ptr
);
8466 unsigned char *field
, *val
;
8467 unsigned int flen
, vlen
;
8469 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8470 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8471 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8472 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8474 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8478 dictIterator
*di
= dictGetIterator(o
->ptr
);
8481 while((de
= dictNext(di
)) != NULL
) {
8482 robj
*field
= dictGetEntryKey(de
);
8483 robj
*val
= dictGetEntryVal(de
);
8485 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8486 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8487 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8488 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8490 dictReleaseIterator(di
);
8493 redisPanic("Unknown object type");
8495 /* Save the expire time */
8496 if (expiretime
!= -1) {
8497 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8498 /* If this key is already expired skip it */
8499 if (expiretime
< now
) continue;
8500 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8501 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8502 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8504 if (swapped
) decrRefCount(o
);
8506 dictReleaseIterator(di
);
8509 /* Make sure data will not remain on the OS's output buffers */
8514 /* Use RENAME to make sure the DB file is changed atomically only
8515 * if the generate DB file is ok. */
8516 if (rename(tmpfile
,filename
) == -1) {
8517 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8521 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8527 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8528 if (di
) dictReleaseIterator(di
);
8532 /* This is how rewriting of the append only file in background works:
8534 * 1) The user calls BGREWRITEAOF
8535 * 2) Redis calls this function, that forks():
8536 * 2a) the child rewrite the append only file in a temp file.
8537 * 2b) the parent accumulates differences in server.bgrewritebuf.
8538 * 3) When the child finished '2a' exists.
8539 * 4) The parent will trap the exit code, if it's OK, will append the
8540 * data accumulated into server.bgrewritebuf into the temp file, and
8541 * finally will rename(2) the temp file in the actual file name.
8542 * The the new file is reopened as the new append only file. Profit!
8544 static int rewriteAppendOnlyFileBackground(void) {
8547 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8548 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8549 if ((childpid
= fork()) == 0) {
8553 if (server
.vm_enabled
) vmReopenSwapFile();
8555 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8556 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8563 if (childpid
== -1) {
8564 redisLog(REDIS_WARNING
,
8565 "Can't rewrite append only file in background: fork: %s",
8569 redisLog(REDIS_NOTICE
,
8570 "Background append only file rewriting started by pid %d",childpid
);
8571 server
.bgrewritechildpid
= childpid
;
8572 updateDictResizePolicy();
8573 /* We set appendseldb to -1 in order to force the next call to the
8574 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8575 * accumulated by the parent into server.bgrewritebuf will start
8576 * with a SELECT statement and it will be safe to merge. */
8577 server
.appendseldb
= -1;
8580 return REDIS_OK
; /* unreached */
8583 static void bgrewriteaofCommand(redisClient
*c
) {
8584 if (server
.bgrewritechildpid
!= -1) {
8585 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8588 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8589 char *status
= "+Background append only file rewriting started\r\n";
8590 addReplySds(c
,sdsnew(status
));
8592 addReply(c
,shared
.err
);
8596 static void aofRemoveTempFile(pid_t childpid
) {
8599 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8603 /* Virtual Memory is composed mainly of two subsystems:
8604 * - Blocking Virutal Memory
8605 * - Threaded Virtual Memory I/O
8606 * The two parts are not fully decoupled, but functions are split among two
8607 * different sections of the source code (delimited by comments) in order to
8608 * make more clear what functionality is about the blocking VM and what about
8609 * the threaded (not blocking) VM.
8613 * Redis VM is a blocking VM (one that blocks reading swapped values from
8614 * disk into memory when a value swapped out is needed in memory) that is made
8615 * unblocking by trying to examine the command argument vector in order to
8616 * load in background values that will likely be needed in order to exec
8617 * the command. The command is executed only once all the relevant keys
8618 * are loaded into memory.
8620 * This basically is almost as simple of a blocking VM, but almost as parallel
8621 * as a fully non-blocking VM.
8624 /* =================== Virtual Memory - Blocking Side ====================== */
8626 static void vmInit(void) {
8632 if (server
.vm_max_threads
!= 0)
8633 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8635 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8636 /* Try to open the old swap file, otherwise create it */
8637 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8638 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8640 if (server
.vm_fp
== NULL
) {
8641 redisLog(REDIS_WARNING
,
8642 "Can't open the swap file: %s. Exiting.",
8646 server
.vm_fd
= fileno(server
.vm_fp
);
8647 /* Lock the swap file for writing, this is useful in order to avoid
8648 * another instance to use the same swap file for a config error. */
8649 fl
.l_type
= F_WRLCK
;
8650 fl
.l_whence
= SEEK_SET
;
8651 fl
.l_start
= fl
.l_len
= 0;
8652 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8653 redisLog(REDIS_WARNING
,
8654 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8658 server
.vm_next_page
= 0;
8659 server
.vm_near_pages
= 0;
8660 server
.vm_stats_used_pages
= 0;
8661 server
.vm_stats_swapped_objects
= 0;
8662 server
.vm_stats_swapouts
= 0;
8663 server
.vm_stats_swapins
= 0;
8664 totsize
= server
.vm_pages
*server
.vm_page_size
;
8665 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8666 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8667 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8671 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8673 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8674 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8675 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8676 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8678 /* Initialize threaded I/O (used by Virtual Memory) */
8679 server
.io_newjobs
= listCreate();
8680 server
.io_processing
= listCreate();
8681 server
.io_processed
= listCreate();
8682 server
.io_ready_clients
= listCreate();
8683 pthread_mutex_init(&server
.io_mutex
,NULL
);
8684 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8685 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8686 server
.io_active_threads
= 0;
8687 if (pipe(pipefds
) == -1) {
8688 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8692 server
.io_ready_pipe_read
= pipefds
[0];
8693 server
.io_ready_pipe_write
= pipefds
[1];
8694 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8695 /* LZF requires a lot of stack */
8696 pthread_attr_init(&server
.io_threads_attr
);
8697 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8698 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8699 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8700 /* Listen for events in the threaded I/O pipe */
8701 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8702 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8703 oom("creating file event");
8706 /* Mark the page as used */
8707 static void vmMarkPageUsed(off_t page
) {
8708 off_t byte
= page
/8;
8710 redisAssert(vmFreePage(page
) == 1);
8711 server
.vm_bitmap
[byte
] |= 1<<bit
;
8714 /* Mark N contiguous pages as used, with 'page' being the first. */
8715 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8718 for (j
= 0; j
< count
; j
++)
8719 vmMarkPageUsed(page
+j
);
8720 server
.vm_stats_used_pages
+= count
;
8721 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8722 (long long)count
, (long long)page
);
8725 /* Mark the page as free */
8726 static void vmMarkPageFree(off_t page
) {
8727 off_t byte
= page
/8;
8729 redisAssert(vmFreePage(page
) == 0);
8730 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8733 /* Mark N contiguous pages as free, with 'page' being the first. */
8734 static void vmMarkPagesFree(off_t page
, off_t count
) {
8737 for (j
= 0; j
< count
; j
++)
8738 vmMarkPageFree(page
+j
);
8739 server
.vm_stats_used_pages
-= count
;
8740 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8741 (long long)count
, (long long)page
);
8744 /* Test if the page is free */
8745 static int vmFreePage(off_t page
) {
8746 off_t byte
= page
/8;
8748 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8751 /* Find N contiguous free pages storing the first page of the cluster in *first.
8752 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8753 * REDIS_ERR is returned.
8755 * This function uses a simple algorithm: we try to allocate
8756 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8757 * again from the start of the swap file searching for free spaces.
8759 * If it looks pretty clear that there are no free pages near our offset
8760 * we try to find less populated places doing a forward jump of
8761 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8762 * without hurry, and then we jump again and so forth...
8764 * This function can be improved using a free list to avoid to guess
8765 * too much, since we could collect data about freed pages.
8767 * note: I implemented this function just after watching an episode of
8768 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8770 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8771 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8773 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8774 server
.vm_near_pages
= 0;
8775 server
.vm_next_page
= 0;
8777 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8778 base
= server
.vm_next_page
;
8780 while(offset
< server
.vm_pages
) {
8781 off_t
this = base
+offset
;
8783 /* If we overflow, restart from page zero */
8784 if (this >= server
.vm_pages
) {
8785 this -= server
.vm_pages
;
8787 /* Just overflowed, what we found on tail is no longer
8788 * interesting, as it's no longer contiguous. */
8792 if (vmFreePage(this)) {
8793 /* This is a free page */
8795 /* Already got N free pages? Return to the caller, with success */
8797 *first
= this-(n
-1);
8798 server
.vm_next_page
= this+1;
8799 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8803 /* The current one is not a free page */
8807 /* Fast-forward if the current page is not free and we already
8808 * searched enough near this place. */
8810 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8811 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8813 /* Note that even if we rewind after the jump, we are don't need
8814 * to make sure numfree is set to zero as we only jump *if* it
8815 * is set to zero. */
8817 /* Otherwise just check the next page */
8824 /* Write the specified object at the specified page of the swap file */
8825 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8826 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8827 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8828 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8829 redisLog(REDIS_WARNING
,
8830 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8834 rdbSaveObject(server
.vm_fp
,o
);
8835 fflush(server
.vm_fp
);
8836 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8840 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8841 * needed to later retrieve the object into the key object.
8842 * If we can't find enough contiguous empty pages to swap the object on disk
8843 * REDIS_ERR is returned. */
8844 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8845 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8848 assert(key
->storage
== REDIS_VM_MEMORY
);
8849 assert(key
->refcount
== 1);
8850 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8851 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8852 key
->vm
.page
= page
;
8853 key
->vm
.usedpages
= pages
;
8854 key
->storage
= REDIS_VM_SWAPPED
;
8855 key
->vtype
= val
->type
;
8856 decrRefCount(val
); /* Deallocate the object from memory. */
8857 vmMarkPagesUsed(page
,pages
);
8858 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8859 (unsigned char*) key
->ptr
,
8860 (unsigned long long) page
, (unsigned long long) pages
);
8861 server
.vm_stats_swapped_objects
++;
8862 server
.vm_stats_swapouts
++;
8866 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8869 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8870 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8871 redisLog(REDIS_WARNING
,
8872 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8876 o
= rdbLoadObject(type
,server
.vm_fp
);
8878 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8881 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8885 /* Load the value object relative to the 'key' object from swap to memory.
8886 * The newly allocated object is returned.
8888 * If preview is true the unserialized object is returned to the caller but
8889 * no changes are made to the key object, nor the pages are marked as freed */
8890 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8893 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8894 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8896 key
->storage
= REDIS_VM_MEMORY
;
8897 key
->vm
.atime
= server
.unixtime
;
8898 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8899 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8900 (unsigned char*) key
->ptr
);
8901 server
.vm_stats_swapped_objects
--;
8903 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8904 (unsigned char*) key
->ptr
);
8906 server
.vm_stats_swapins
++;
8910 /* Plain object loading, from swap to memory */
8911 static robj
*vmLoadObject(robj
*key
) {
8912 /* If we are loading the object in background, stop it, we
8913 * need to load this object synchronously ASAP. */
8914 if (key
->storage
== REDIS_VM_LOADING
)
8915 vmCancelThreadedIOJob(key
);
8916 return vmGenericLoadObject(key
,0);
8919 /* Just load the value on disk, without to modify the key.
8920 * This is useful when we want to perform some operation on the value
8921 * without to really bring it from swap to memory, like while saving the
8922 * dataset or rewriting the append only log. */
8923 static robj
*vmPreviewObject(robj
*key
) {
8924 return vmGenericLoadObject(key
,1);
8927 /* How a good candidate is this object for swapping?
8928 * The better candidate it is, the greater the returned value.
8930 * Currently we try to perform a fast estimation of the object size in
8931 * memory, and combine it with aging informations.
8933 * Basically swappability = idle-time * log(estimated size)
8935 * Bigger objects are preferred over smaller objects, but not
8936 * proportionally, this is why we use the logarithm. This algorithm is
8937 * just a first try and will probably be tuned later. */
8938 static double computeObjectSwappability(robj
*o
) {
8939 time_t age
= server
.unixtime
- o
->vm
.atime
;
8943 struct dictEntry
*de
;
8946 if (age
<= 0) return 0;
8949 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8952 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8957 listNode
*ln
= listFirst(l
);
8959 asize
= sizeof(list
);
8961 robj
*ele
= ln
->value
;
8964 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8965 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8967 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8972 z
= (o
->type
== REDIS_ZSET
);
8973 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8975 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8976 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8981 de
= dictGetRandomKey(d
);
8982 ele
= dictGetEntryKey(de
);
8983 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8984 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8986 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8987 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8991 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8992 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8993 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8994 unsigned int klen
, vlen
;
8995 unsigned char *key
, *val
;
8997 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9001 asize
= len
*(klen
+vlen
+3);
9002 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9004 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9009 de
= dictGetRandomKey(d
);
9010 ele
= dictGetEntryKey(de
);
9011 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9012 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9014 ele
= dictGetEntryVal(de
);
9015 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9016 (sizeof(*o
)+sdslen(ele
->ptr
)) :
9018 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9023 return (double)age
*log(1+asize
);
9026 /* Try to swap an object that's a good candidate for swapping.
9027 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9028 * to swap any object at all.
9030 * If 'usethreaded' is true, Redis will try to swap the object in background
9031 * using I/O threads. */
9032 static int vmSwapOneObject(int usethreads
) {
9034 struct dictEntry
*best
= NULL
;
9035 double best_swappability
= 0;
9036 redisDb
*best_db
= NULL
;
9039 for (j
= 0; j
< server
.dbnum
; j
++) {
9040 redisDb
*db
= server
.db
+j
;
9041 /* Why maxtries is set to 100?
9042 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9043 * are swappable objects */
9046 if (dictSize(db
->dict
) == 0) continue;
9047 for (i
= 0; i
< 5; i
++) {
9049 double swappability
;
9051 if (maxtries
) maxtries
--;
9052 de
= dictGetRandomKey(db
->dict
);
9053 key
= dictGetEntryKey(de
);
9054 val
= dictGetEntryVal(de
);
9055 /* Only swap objects that are currently in memory.
9057 * Also don't swap shared objects if threaded VM is on, as we
9058 * try to ensure that the main thread does not touch the
9059 * object while the I/O thread is using it, but we can't
9060 * control other keys without adding additional mutex. */
9061 if (key
->storage
!= REDIS_VM_MEMORY
||
9062 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9063 if (maxtries
) i
--; /* don't count this try */
9066 swappability
= computeObjectSwappability(val
);
9067 if (!best
|| swappability
> best_swappability
) {
9069 best_swappability
= swappability
;
9074 if (best
== NULL
) return REDIS_ERR
;
9075 key
= dictGetEntryKey(best
);
9076 val
= dictGetEntryVal(best
);
9078 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9079 key
->ptr
, best_swappability
);
9081 /* Unshare the key if needed */
9082 if (key
->refcount
> 1) {
9083 robj
*newkey
= dupStringObject(key
);
9085 key
= dictGetEntryKey(best
) = newkey
;
9089 vmSwapObjectThreaded(key
,val
,best_db
);
9092 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9093 dictGetEntryVal(best
) = NULL
;
9101 static int vmSwapOneObjectBlocking() {
9102 return vmSwapOneObject(0);
9105 static int vmSwapOneObjectThreaded() {
9106 return vmSwapOneObject(1);
9109 /* Return true if it's safe to swap out objects in a given moment.
9110 * Basically we don't want to swap objects out while there is a BGSAVE
9111 * or a BGAEOREWRITE running in backgroud. */
9112 static int vmCanSwapOut(void) {
9113 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9116 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9117 * and was deleted. Otherwise 0 is returned. */
9118 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9122 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9123 foundkey
= dictGetEntryKey(de
);
9124 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9129 /* =================== Virtual Memory - Threaded I/O ======================= */
9131 static void freeIOJob(iojob
*j
) {
9132 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9133 j
->type
== REDIS_IOJOB_DO_SWAP
||
9134 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9135 decrRefCount(j
->val
);
9136 /* We don't decrRefCount the j->key field as we did't incremented
9137 * the count creating IO Jobs. This is because the key field here is
9138 * just used as an indentifier and if a key is removed the Job should
9139 * never be touched again. */
9143 /* Every time a thread finished a Job, it writes a byte into the write side
9144 * of an unix pipe in order to "awake" the main thread, and this function
9146 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9150 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9152 REDIS_NOTUSED(mask
);
9153 REDIS_NOTUSED(privdata
);
9155 /* For every byte we read in the read side of the pipe, there is one
9156 * I/O job completed to process. */
9157 while((retval
= read(fd
,buf
,1)) == 1) {
9161 struct dictEntry
*de
;
9163 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9165 /* Get the processed element (the oldest one) */
9167 assert(listLength(server
.io_processed
) != 0);
9168 if (toprocess
== -1) {
9169 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9170 if (toprocess
<= 0) toprocess
= 1;
9172 ln
= listFirst(server
.io_processed
);
9174 listDelNode(server
.io_processed
,ln
);
9176 /* If this job is marked as canceled, just ignore it */
9181 /* Post process it in the main thread, as there are things we
9182 * can do just here to avoid race conditions and/or invasive locks */
9183 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9184 de
= dictFind(j
->db
->dict
,j
->key
);
9186 key
= dictGetEntryKey(de
);
9187 if (j
->type
== REDIS_IOJOB_LOAD
) {
9190 /* Key loaded, bring it at home */
9191 key
->storage
= REDIS_VM_MEMORY
;
9192 key
->vm
.atime
= server
.unixtime
;
9193 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9194 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9195 (unsigned char*) key
->ptr
);
9196 server
.vm_stats_swapped_objects
--;
9197 server
.vm_stats_swapins
++;
9198 dictGetEntryVal(de
) = j
->val
;
9199 incrRefCount(j
->val
);
9202 /* Handle clients waiting for this key to be loaded. */
9203 handleClientsBlockedOnSwappedKey(db
,key
);
9204 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9205 /* Now we know the amount of pages required to swap this object.
9206 * Let's find some space for it, and queue this task again
9207 * rebranded as REDIS_IOJOB_DO_SWAP. */
9208 if (!vmCanSwapOut() ||
9209 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9211 /* Ooops... no space or we can't swap as there is
9212 * a fork()ed Redis trying to save stuff on disk. */
9214 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9216 /* Note that we need to mark this pages as used now,
9217 * if the job will be canceled, we'll mark them as freed
9219 vmMarkPagesUsed(j
->page
,j
->pages
);
9220 j
->type
= REDIS_IOJOB_DO_SWAP
;
9225 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9228 /* Key swapped. We can finally free some memory. */
9229 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9230 printf("key->storage: %d\n",key
->storage
);
9231 printf("key->name: %s\n",(char*)key
->ptr
);
9232 printf("key->refcount: %d\n",key
->refcount
);
9233 printf("val: %p\n",(void*)j
->val
);
9234 printf("val->type: %d\n",j
->val
->type
);
9235 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9237 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9238 val
= dictGetEntryVal(de
);
9239 key
->vm
.page
= j
->page
;
9240 key
->vm
.usedpages
= j
->pages
;
9241 key
->storage
= REDIS_VM_SWAPPED
;
9242 key
->vtype
= j
->val
->type
;
9243 decrRefCount(val
); /* Deallocate the object from memory. */
9244 dictGetEntryVal(de
) = NULL
;
9245 redisLog(REDIS_DEBUG
,
9246 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9247 (unsigned char*) key
->ptr
,
9248 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9249 server
.vm_stats_swapped_objects
++;
9250 server
.vm_stats_swapouts
++;
9252 /* Put a few more swap requests in queue if we are still
9254 if (trytoswap
&& vmCanSwapOut() &&
9255 zmalloc_used_memory() > server
.vm_max_memory
)
9260 more
= listLength(server
.io_newjobs
) <
9261 (unsigned) server
.vm_max_threads
;
9263 /* Don't waste CPU time if swappable objects are rare. */
9264 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9272 if (processed
== toprocess
) return;
9274 if (retval
< 0 && errno
!= EAGAIN
) {
9275 redisLog(REDIS_WARNING
,
9276 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9281 static void lockThreadedIO(void) {
9282 pthread_mutex_lock(&server
.io_mutex
);
9285 static void unlockThreadedIO(void) {
9286 pthread_mutex_unlock(&server
.io_mutex
);
9289 /* Remove the specified object from the threaded I/O queue if still not
9290 * processed, otherwise make sure to flag it as canceled. */
9291 static void vmCancelThreadedIOJob(robj
*o
) {
9293 server
.io_newjobs
, /* 0 */
9294 server
.io_processing
, /* 1 */
9295 server
.io_processed
/* 2 */
9299 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9302 /* Search for a matching key in one of the queues */
9303 for (i
= 0; i
< 3; i
++) {
9307 listRewind(lists
[i
],&li
);
9308 while ((ln
= listNext(&li
)) != NULL
) {
9309 iojob
*job
= ln
->value
;
9311 if (job
->canceled
) continue; /* Skip this, already canceled. */
9312 if (job
->key
== o
) {
9313 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9314 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9315 /* Mark the pages as free since the swap didn't happened
9316 * or happened but is now discarded. */
9317 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9318 vmMarkPagesFree(job
->page
,job
->pages
);
9319 /* Cancel the job. It depends on the list the job is
9322 case 0: /* io_newjobs */
9323 /* If the job was yet not processed the best thing to do
9324 * is to remove it from the queue at all */
9326 listDelNode(lists
[i
],ln
);
9328 case 1: /* io_processing */
9329 /* Oh Shi- the thread is messing with the Job:
9331 * Probably it's accessing the object if this is a
9332 * PREPARE_SWAP or DO_SWAP job.
9333 * If it's a LOAD job it may be reading from disk and
9334 * if we don't wait for the job to terminate before to
9335 * cancel it, maybe in a few microseconds data can be
9336 * corrupted in this pages. So the short story is:
9338 * Better to wait for the job to move into the
9339 * next queue (processed)... */
9341 /* We try again and again until the job is completed. */
9343 /* But let's wait some time for the I/O thread
9344 * to finish with this job. After all this condition
9345 * should be very rare. */
9348 case 2: /* io_processed */
9349 /* The job was already processed, that's easy...
9350 * just mark it as canceled so that we'll ignore it
9351 * when processing completed jobs. */
9355 /* Finally we have to adjust the storage type of the object
9356 * in order to "UNDO" the operaiton. */
9357 if (o
->storage
== REDIS_VM_LOADING
)
9358 o
->storage
= REDIS_VM_SWAPPED
;
9359 else if (o
->storage
== REDIS_VM_SWAPPING
)
9360 o
->storage
= REDIS_VM_MEMORY
;
9367 assert(1 != 1); /* We should never reach this */
9370 static void *IOThreadEntryPoint(void *arg
) {
9375 pthread_detach(pthread_self());
9377 /* Get a new job to process */
9379 if (listLength(server
.io_newjobs
) == 0) {
9380 /* No new jobs in queue, exit. */
9381 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9382 (long) pthread_self());
9383 server
.io_active_threads
--;
9387 ln
= listFirst(server
.io_newjobs
);
9389 listDelNode(server
.io_newjobs
,ln
);
9390 /* Add the job in the processing queue */
9391 j
->thread
= pthread_self();
9392 listAddNodeTail(server
.io_processing
,j
);
9393 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9395 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9396 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9398 /* Process the Job */
9399 if (j
->type
== REDIS_IOJOB_LOAD
) {
9400 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9401 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9402 FILE *fp
= fopen("/dev/null","w+");
9403 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9405 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9406 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9410 /* Done: insert the job into the processed queue */
9411 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9412 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9414 listDelNode(server
.io_processing
,ln
);
9415 listAddNodeTail(server
.io_processed
,j
);
9418 /* Signal the main thread there is new stuff to process */
9419 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9421 return NULL
; /* never reached */
9424 static void spawnIOThread(void) {
9426 sigset_t mask
, omask
;
9430 sigaddset(&mask
,SIGCHLD
);
9431 sigaddset(&mask
,SIGHUP
);
9432 sigaddset(&mask
,SIGPIPE
);
9433 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9434 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9435 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9439 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9440 server
.io_active_threads
++;
9443 /* We need to wait for the last thread to exit before we are able to
9444 * fork() in order to BGSAVE or BGREWRITEAOF. */
9445 static void waitEmptyIOJobsQueue(void) {
9447 int io_processed_len
;
9450 if (listLength(server
.io_newjobs
) == 0 &&
9451 listLength(server
.io_processing
) == 0 &&
9452 server
.io_active_threads
== 0)
9457 /* While waiting for empty jobs queue condition we post-process some
9458 * finshed job, as I/O threads may be hanging trying to write against
9459 * the io_ready_pipe_write FD but there are so much pending jobs that
9461 io_processed_len
= listLength(server
.io_processed
);
9463 if (io_processed_len
) {
9464 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9465 usleep(1000); /* 1 millisecond */
9467 usleep(10000); /* 10 milliseconds */
9472 static void vmReopenSwapFile(void) {
9473 /* Note: we don't close the old one as we are in the child process
9474 * and don't want to mess at all with the original file object. */
9475 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9476 if (server
.vm_fp
== NULL
) {
9477 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9478 server
.vm_swap_file
);
9481 server
.vm_fd
= fileno(server
.vm_fp
);
9484 /* This function must be called while with threaded IO locked */
9485 static void queueIOJob(iojob
*j
) {
9486 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9487 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9488 listAddNodeTail(server
.io_newjobs
,j
);
9489 if (server
.io_active_threads
< server
.vm_max_threads
)
9493 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9496 assert(key
->storage
== REDIS_VM_MEMORY
);
9497 assert(key
->refcount
== 1);
9499 j
= zmalloc(sizeof(*j
));
9500 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9506 j
->thread
= (pthread_t
) -1;
9507 key
->storage
= REDIS_VM_SWAPPING
;
9515 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9517 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9518 * If there is not already a job loading the key, it is craeted.
9519 * The key is added to the io_keys list in the client structure, and also
9520 * in the hash table mapping swapped keys to waiting clients, that is,
9521 * server.io_waited_keys. */
9522 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9523 struct dictEntry
*de
;
9527 /* If the key does not exist or is already in RAM we don't need to
9528 * block the client at all. */
9529 de
= dictFind(c
->db
->dict
,key
);
9530 if (de
== NULL
) return 0;
9531 o
= dictGetEntryKey(de
);
9532 if (o
->storage
== REDIS_VM_MEMORY
) {
9534 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9535 /* We were swapping the key, undo it! */
9536 vmCancelThreadedIOJob(o
);
9540 /* OK: the key is either swapped, or being loaded just now. */
9542 /* Add the key to the list of keys this client is waiting for.
9543 * This maps clients to keys they are waiting for. */
9544 listAddNodeTail(c
->io_keys
,key
);
9547 /* Add the client to the swapped keys => clients waiting map. */
9548 de
= dictFind(c
->db
->io_keys
,key
);
9552 /* For every key we take a list of clients blocked for it */
9554 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9556 assert(retval
== DICT_OK
);
9558 l
= dictGetEntryVal(de
);
9560 listAddNodeTail(l
,c
);
9562 /* Are we already loading the key from disk? If not create a job */
9563 if (o
->storage
== REDIS_VM_SWAPPED
) {
9566 o
->storage
= REDIS_VM_LOADING
;
9567 j
= zmalloc(sizeof(*j
));
9568 j
->type
= REDIS_IOJOB_LOAD
;
9571 j
->key
->vtype
= o
->vtype
;
9572 j
->page
= o
->vm
.page
;
9575 j
->thread
= (pthread_t
) -1;
9583 /* Preload keys needed for the ZUNION and ZINTER commands. */
9584 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9586 num
= atoi(c
->argv
[2]->ptr
);
9587 for (i
= 0; i
< num
; i
++) {
9588 waitForSwappedKey(c
,c
->argv
[3+i
]);
9592 /* Is this client attempting to run a command against swapped keys?
9593 * If so, block it ASAP, load the keys in background, then resume it.
9595 * The important idea about this function is that it can fail! If keys will
9596 * still be swapped when the client is resumed, this key lookups will
9597 * just block loading keys from disk. In practical terms this should only
9598 * happen with SORT BY command or if there is a bug in this function.
9600 * Return 1 if the client is marked as blocked, 0 if the client can
9601 * continue as the keys it is going to access appear to be in memory. */
9602 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9605 if (cmd
->vm_preload_proc
!= NULL
) {
9606 cmd
->vm_preload_proc(c
);
9608 if (cmd
->vm_firstkey
== 0) return 0;
9609 last
= cmd
->vm_lastkey
;
9610 if (last
< 0) last
= c
->argc
+last
;
9611 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9612 waitForSwappedKey(c
,c
->argv
[j
]);
9615 /* If the client was blocked for at least one key, mark it as blocked. */
9616 if (listLength(c
->io_keys
)) {
9617 c
->flags
|= REDIS_IO_WAIT
;
9618 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9619 server
.vm_blocked_clients
++;
9626 /* Remove the 'key' from the list of blocked keys for a given client.
9628 * The function returns 1 when there are no longer blocking keys after
9629 * the current one was removed (and the client can be unblocked). */
9630 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9634 struct dictEntry
*de
;
9636 /* Remove the key from the list of keys this client is waiting for. */
9637 listRewind(c
->io_keys
,&li
);
9638 while ((ln
= listNext(&li
)) != NULL
) {
9639 if (equalStringObjects(ln
->value
,key
)) {
9640 listDelNode(c
->io_keys
,ln
);
9646 /* Remove the client form the key => waiting clients map. */
9647 de
= dictFind(c
->db
->io_keys
,key
);
9649 l
= dictGetEntryVal(de
);
9650 ln
= listSearchKey(l
,c
);
9653 if (listLength(l
) == 0)
9654 dictDelete(c
->db
->io_keys
,key
);
9656 return listLength(c
->io_keys
) == 0;
9659 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9660 struct dictEntry
*de
;
9665 de
= dictFind(db
->io_keys
,key
);
9668 l
= dictGetEntryVal(de
);
9669 len
= listLength(l
);
9670 /* Note: we can't use something like while(listLength(l)) as the list
9671 * can be freed by the calling function when we remove the last element. */
9674 redisClient
*c
= ln
->value
;
9676 if (dontWaitForSwappedKey(c
,key
)) {
9677 /* Put the client in the list of clients ready to go as we
9678 * loaded all the keys about it. */
9679 listAddNodeTail(server
.io_ready_clients
,c
);
9684 /* =========================== Remote Configuration ========================= */
9686 static void configSetCommand(redisClient
*c
) {
9687 robj
*o
= getDecodedObject(c
->argv
[3]);
9688 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9689 zfree(server
.dbfilename
);
9690 server
.dbfilename
= zstrdup(o
->ptr
);
9691 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9692 zfree(server
.requirepass
);
9693 server
.requirepass
= zstrdup(o
->ptr
);
9694 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9695 zfree(server
.masterauth
);
9696 server
.masterauth
= zstrdup(o
->ptr
);
9697 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9698 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9699 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
9700 if (!strcasecmp(o
->ptr
,"no")) {
9701 server
.appendfsync
= APPENDFSYNC_NO
;
9702 } else if (!strcasecmp(o
->ptr
,"everysec")) {
9703 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
9704 } else if (!strcasecmp(o
->ptr
,"always")) {
9705 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
9709 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
9711 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
9713 /* Perform sanity check before setting the new config:
9714 * - Even number of args
9715 * - Seconds >= 1, changes >= 0 */
9717 sdsfreesplitres(v
,vlen
);
9720 for (j
= 0; j
< vlen
; j
++) {
9724 val
= strtoll(v
[j
], &eptr
, 10);
9725 if (eptr
[0] != '\0' ||
9726 ((j
& 1) == 0 && val
< 1) ||
9727 ((j
& 1) == 1 && val
< 0)) {
9728 sdsfreesplitres(v
,vlen
);
9732 /* Finally set the new config */
9733 resetServerSaveParams();
9734 for (j
= 0; j
< vlen
; j
+= 2) {
9738 seconds
= strtoll(v
[j
],NULL
,10);
9739 changes
= strtoll(v
[j
+1],NULL
,10);
9740 appendServerSaveParams(seconds
, changes
);
9742 sdsfreesplitres(v
,vlen
);
9744 addReplySds(c
,sdscatprintf(sdsempty(),
9745 "-ERR not supported CONFIG parameter %s\r\n",
9746 (char*)c
->argv
[2]->ptr
));
9751 addReply(c
,shared
.ok
);
9754 badfmt
: /* Bad format errors */
9755 addReplySds(c
,sdscatprintf(sdsempty(),
9756 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
9758 (char*)c
->argv
[2]->ptr
));
9762 static void configGetCommand(redisClient
*c
) {
9763 robj
*o
= getDecodedObject(c
->argv
[2]);
9764 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9765 char *pattern
= o
->ptr
;
9769 decrRefCount(lenobj
);
9771 if (stringmatch(pattern
,"dbfilename",0)) {
9772 addReplyBulkCString(c
,"dbfilename");
9773 addReplyBulkCString(c
,server
.dbfilename
);
9776 if (stringmatch(pattern
,"requirepass",0)) {
9777 addReplyBulkCString(c
,"requirepass");
9778 addReplyBulkCString(c
,server
.requirepass
);
9781 if (stringmatch(pattern
,"masterauth",0)) {
9782 addReplyBulkCString(c
,"masterauth");
9783 addReplyBulkCString(c
,server
.masterauth
);
9786 if (stringmatch(pattern
,"maxmemory",0)) {
9789 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9790 addReplyBulkCString(c
,"maxmemory");
9791 addReplyBulkCString(c
,buf
);
9794 if (stringmatch(pattern
,"appendfsync",0)) {
9797 switch(server
.appendfsync
) {
9798 case APPENDFSYNC_NO
: policy
= "no"; break;
9799 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
9800 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
9801 default: policy
= "unknown"; break; /* too harmless to panic */
9803 addReplyBulkCString(c
,"appendfsync");
9804 addReplyBulkCString(c
,policy
);
9807 if (stringmatch(pattern
,"save",0)) {
9808 sds buf
= sdsempty();
9811 for (j
= 0; j
< server
.saveparamslen
; j
++) {
9812 buf
= sdscatprintf(buf
,"%ld %d",
9813 server
.saveparams
[j
].seconds
,
9814 server
.saveparams
[j
].changes
);
9815 if (j
!= server
.saveparamslen
-1)
9816 buf
= sdscatlen(buf
," ",1);
9818 addReplyBulkCString(c
,"save");
9819 addReplyBulkCString(c
,buf
);
9824 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9827 static void configCommand(redisClient
*c
) {
9828 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9829 if (c
->argc
!= 4) goto badarity
;
9830 configSetCommand(c
);
9831 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9832 if (c
->argc
!= 3) goto badarity
;
9833 configGetCommand(c
);
9834 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9835 if (c
->argc
!= 2) goto badarity
;
9836 server
.stat_numcommands
= 0;
9837 server
.stat_numconnections
= 0;
9838 server
.stat_expiredkeys
= 0;
9839 server
.stat_starttime
= time(NULL
);
9840 addReply(c
,shared
.ok
);
9842 addReplySds(c
,sdscatprintf(sdsempty(),
9843 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9848 addReplySds(c
,sdscatprintf(sdsempty(),
9849 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9850 (char*) c
->argv
[1]->ptr
));
9853 /* =========================== Pubsub implementation ======================== */
9855 static void freePubsubPattern(void *p
) {
9856 pubsubPattern
*pat
= p
;
9858 decrRefCount(pat
->pattern
);
9862 static int listMatchPubsubPattern(void *a
, void *b
) {
9863 pubsubPattern
*pa
= a
, *pb
= b
;
9865 return (pa
->client
== pb
->client
) &&
9866 (equalStringObjects(pa
->pattern
,pb
->pattern
));
9869 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9870 * 0 if the client was already subscribed to that channel. */
9871 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9872 struct dictEntry
*de
;
9873 list
*clients
= NULL
;
9876 /* Add the channel to the client -> channels hash table */
9877 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9879 incrRefCount(channel
);
9880 /* Add the client to the channel -> list of clients hash table */
9881 de
= dictFind(server
.pubsub_channels
,channel
);
9883 clients
= listCreate();
9884 dictAdd(server
.pubsub_channels
,channel
,clients
);
9885 incrRefCount(channel
);
9887 clients
= dictGetEntryVal(de
);
9889 listAddNodeTail(clients
,c
);
9891 /* Notify the client */
9892 addReply(c
,shared
.mbulk3
);
9893 addReply(c
,shared
.subscribebulk
);
9894 addReplyBulk(c
,channel
);
9895 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9899 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9900 * 0 if the client was not subscribed to the specified channel. */
9901 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9902 struct dictEntry
*de
;
9907 /* Remove the channel from the client -> channels hash table */
9908 incrRefCount(channel
); /* channel may be just a pointer to the same object
9909 we have in the hash tables. Protect it... */
9910 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9912 /* Remove the client from the channel -> clients list hash table */
9913 de
= dictFind(server
.pubsub_channels
,channel
);
9915 clients
= dictGetEntryVal(de
);
9916 ln
= listSearchKey(clients
,c
);
9918 listDelNode(clients
,ln
);
9919 if (listLength(clients
) == 0) {
9920 /* Free the list and associated hash entry at all if this was
9921 * the latest client, so that it will be possible to abuse
9922 * Redis PUBSUB creating millions of channels. */
9923 dictDelete(server
.pubsub_channels
,channel
);
9926 /* Notify the client */
9928 addReply(c
,shared
.mbulk3
);
9929 addReply(c
,shared
.unsubscribebulk
);
9930 addReplyBulk(c
,channel
);
9931 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9932 listLength(c
->pubsub_patterns
));
9935 decrRefCount(channel
); /* it is finally safe to release it */
9939 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9940 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9943 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9946 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9947 incrRefCount(pattern
);
9948 pat
= zmalloc(sizeof(*pat
));
9949 pat
->pattern
= getDecodedObject(pattern
);
9951 listAddNodeTail(server
.pubsub_patterns
,pat
);
9953 /* Notify the client */
9954 addReply(c
,shared
.mbulk3
);
9955 addReply(c
,shared
.psubscribebulk
);
9956 addReplyBulk(c
,pattern
);
9957 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9961 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9962 * 0 if the client was not subscribed to the specified channel. */
9963 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9968 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9969 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9971 listDelNode(c
->pubsub_patterns
,ln
);
9973 pat
.pattern
= pattern
;
9974 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9975 listDelNode(server
.pubsub_patterns
,ln
);
9977 /* Notify the client */
9979 addReply(c
,shared
.mbulk3
);
9980 addReply(c
,shared
.punsubscribebulk
);
9981 addReplyBulk(c
,pattern
);
9982 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9983 listLength(c
->pubsub_patterns
));
9985 decrRefCount(pattern
);
9989 /* Unsubscribe from all the channels. Return the number of channels the
9990 * client was subscribed from. */
9991 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9992 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9996 while((de
= dictNext(di
)) != NULL
) {
9997 robj
*channel
= dictGetEntryKey(de
);
9999 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10001 dictReleaseIterator(di
);
10005 /* Unsubscribe from all the patterns. Return the number of patterns the
10006 * client was subscribed from. */
10007 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10012 listRewind(c
->pubsub_patterns
,&li
);
10013 while ((ln
= listNext(&li
)) != NULL
) {
10014 robj
*pattern
= ln
->value
;
10016 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10021 /* Publish a message */
10022 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10024 struct dictEntry
*de
;
10028 /* Send to clients listening for that channel */
10029 de
= dictFind(server
.pubsub_channels
,channel
);
10031 list
*list
= dictGetEntryVal(de
);
10035 listRewind(list
,&li
);
10036 while ((ln
= listNext(&li
)) != NULL
) {
10037 redisClient
*c
= ln
->value
;
10039 addReply(c
,shared
.mbulk3
);
10040 addReply(c
,shared
.messagebulk
);
10041 addReplyBulk(c
,channel
);
10042 addReplyBulk(c
,message
);
10046 /* Send to clients listening to matching channels */
10047 if (listLength(server
.pubsub_patterns
)) {
10048 listRewind(server
.pubsub_patterns
,&li
);
10049 channel
= getDecodedObject(channel
);
10050 while ((ln
= listNext(&li
)) != NULL
) {
10051 pubsubPattern
*pat
= ln
->value
;
10053 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10054 sdslen(pat
->pattern
->ptr
),
10055 (char*)channel
->ptr
,
10056 sdslen(channel
->ptr
),0)) {
10057 addReply(pat
->client
,shared
.mbulk4
);
10058 addReply(pat
->client
,shared
.pmessagebulk
);
10059 addReplyBulk(pat
->client
,pat
->pattern
);
10060 addReplyBulk(pat
->client
,channel
);
10061 addReplyBulk(pat
->client
,message
);
10065 decrRefCount(channel
);
10070 static void subscribeCommand(redisClient
*c
) {
10073 for (j
= 1; j
< c
->argc
; j
++)
10074 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10077 static void unsubscribeCommand(redisClient
*c
) {
10078 if (c
->argc
== 1) {
10079 pubsubUnsubscribeAllChannels(c
,1);
10084 for (j
= 1; j
< c
->argc
; j
++)
10085 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10089 static void psubscribeCommand(redisClient
*c
) {
10092 for (j
= 1; j
< c
->argc
; j
++)
10093 pubsubSubscribePattern(c
,c
->argv
[j
]);
10096 static void punsubscribeCommand(redisClient
*c
) {
10097 if (c
->argc
== 1) {
10098 pubsubUnsubscribeAllPatterns(c
,1);
10103 for (j
= 1; j
< c
->argc
; j
++)
10104 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10108 static void publishCommand(redisClient
*c
) {
10109 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10110 addReplyLong(c
,receivers
);
10113 /* ================================= Debugging ============================== */
10115 static void debugCommand(redisClient
*c
) {
10116 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10117 *((char*)-1) = 'x';
10118 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10119 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10120 addReply(c
,shared
.err
);
10124 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10125 addReply(c
,shared
.err
);
10128 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10129 addReply(c
,shared
.ok
);
10130 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10132 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10133 addReply(c
,shared
.err
);
10136 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10137 addReply(c
,shared
.ok
);
10138 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10139 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10143 addReply(c
,shared
.nokeyerr
);
10146 key
= dictGetEntryKey(de
);
10147 val
= dictGetEntryVal(de
);
10148 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10149 key
->storage
== REDIS_VM_SWAPPING
)) {
10153 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10154 strenc
= strencoding
[val
->encoding
];
10156 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10159 addReplySds(c
,sdscatprintf(sdsempty(),
10160 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10161 "encoding:%s serializedlength:%lld\r\n",
10162 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10163 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10165 addReplySds(c
,sdscatprintf(sdsempty(),
10166 "+Key at:%p refcount:%d, value swapped at: page %llu "
10167 "using %llu pages\r\n",
10168 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10169 (unsigned long long) key
->vm
.usedpages
));
10171 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10172 lookupKeyRead(c
->db
,c
->argv
[2]);
10173 addReply(c
,shared
.ok
);
10174 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10175 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10178 if (!server
.vm_enabled
) {
10179 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10183 addReply(c
,shared
.nokeyerr
);
10186 key
= dictGetEntryKey(de
);
10187 val
= dictGetEntryVal(de
);
10188 /* If the key is shared we want to create a copy */
10189 if (key
->refcount
> 1) {
10190 robj
*newkey
= dupStringObject(key
);
10192 key
= dictGetEntryKey(de
) = newkey
;
10195 if (key
->storage
!= REDIS_VM_MEMORY
) {
10196 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10197 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10198 dictGetEntryVal(de
) = NULL
;
10199 addReply(c
,shared
.ok
);
10201 addReply(c
,shared
.err
);
10203 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10208 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10210 for (j
= 0; j
< keys
; j
++) {
10211 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10212 key
= createStringObject(buf
,strlen(buf
));
10213 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10217 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10218 val
= createStringObject(buf
,strlen(buf
));
10219 dictAdd(c
->db
->dict
,key
,val
);
10221 addReply(c
,shared
.ok
);
10223 addReplySds(c
,sdsnew(
10224 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10228 static void _redisAssert(char *estr
, char *file
, int line
) {
10229 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10230 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
10231 #ifdef HAVE_BACKTRACE
10232 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10233 *((char*)-1) = 'x';
10237 static void _redisPanic(char *msg
, char *file
, int line
) {
10238 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10239 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10240 #ifdef HAVE_BACKTRACE
10241 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10242 *((char*)-1) = 'x';
10246 /* =================================== Main! ================================ */
10249 int linuxOvercommitMemoryValue(void) {
10250 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10253 if (!fp
) return -1;
10254 if (fgets(buf
,64,fp
) == NULL
) {
10263 void linuxOvercommitMemoryWarning(void) {
10264 if (linuxOvercommitMemoryValue() == 0) {
10265 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10268 #endif /* __linux__ */
10270 static void daemonize(void) {
10274 if (fork() != 0) exit(0); /* parent exits */
10275 setsid(); /* create a new session */
10277 /* Every output goes to /dev/null. If Redis is daemonized but
10278 * the 'logfile' is set to 'stdout' in the configuration file
10279 * it will not log at all. */
10280 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10281 dup2(fd
, STDIN_FILENO
);
10282 dup2(fd
, STDOUT_FILENO
);
10283 dup2(fd
, STDERR_FILENO
);
10284 if (fd
> STDERR_FILENO
) close(fd
);
10286 /* Try to write the pid file */
10287 fp
= fopen(server
.pidfile
,"w");
10289 fprintf(fp
,"%d\n",getpid());
10294 static void version() {
10295 printf("Redis server version %s\n", REDIS_VERSION
);
10299 static void usage() {
10300 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10301 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10305 int main(int argc
, char **argv
) {
10308 initServerConfig();
10310 if (strcmp(argv
[1], "-v") == 0 ||
10311 strcmp(argv
[1], "--version") == 0) version();
10312 if (strcmp(argv
[1], "--help") == 0) usage();
10313 resetServerSaveParams();
10314 loadServerConfig(argv
[1]);
10315 } else if ((argc
> 2)) {
10318 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10320 if (server
.daemonize
) daemonize();
10322 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10324 linuxOvercommitMemoryWarning();
10326 start
= time(NULL
);
10327 if (server
.appendonly
) {
10328 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10329 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10331 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10332 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10334 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10335 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10337 aeDeleteEventLoop(server
.el
);
10341 /* ============================= Backtrace support ========================= */
10343 #ifdef HAVE_BACKTRACE
10344 static char *findFuncName(void *pointer
, unsigned long *offset
);
10346 static void *getMcontextEip(ucontext_t
*uc
) {
10347 #if defined(__FreeBSD__)
10348 return (void*) uc
->uc_mcontext
.mc_eip
;
10349 #elif defined(__dietlibc__)
10350 return (void*) uc
->uc_mcontext
.eip
;
10351 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10353 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10355 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10357 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10358 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10359 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10361 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10363 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10364 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10365 #elif defined(__ia64__) /* Linux IA64 */
10366 return (void*) uc
->uc_mcontext
.sc_ip
;
10372 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10374 char **messages
= NULL
;
10375 int i
, trace_size
= 0;
10376 unsigned long offset
=0;
10377 ucontext_t
*uc
= (ucontext_t
*) secret
;
10379 REDIS_NOTUSED(info
);
10381 redisLog(REDIS_WARNING
,
10382 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10383 infostring
= genRedisInfoString();
10384 redisLog(REDIS_WARNING
, "%s",infostring
);
10385 /* It's not safe to sdsfree() the returned string under memory
10386 * corruption conditions. Let it leak as we are going to abort */
10388 trace_size
= backtrace(trace
, 100);
10389 /* overwrite sigaction with caller's address */
10390 if (getMcontextEip(uc
) != NULL
) {
10391 trace
[1] = getMcontextEip(uc
);
10393 messages
= backtrace_symbols(trace
, trace_size
);
10395 for (i
=1; i
<trace_size
; ++i
) {
10396 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10398 p
= strchr(messages
[i
],'+');
10399 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10400 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10402 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10405 /* free(messages); Don't call free() with possibly corrupted memory. */
10409 static void setupSigSegvAction(void) {
10410 struct sigaction act
;
10412 sigemptyset (&act
.sa_mask
);
10413 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10414 * is used. Otherwise, sa_handler is used */
10415 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10416 act
.sa_sigaction
= segvHandler
;
10417 sigaction (SIGSEGV
, &act
, NULL
);
10418 sigaction (SIGBUS
, &act
, NULL
);
10419 sigaction (SIGFPE
, &act
, NULL
);
10420 sigaction (SIGILL
, &act
, NULL
);
10421 sigaction (SIGBUS
, &act
, NULL
);
10425 #include "staticsymbols.h"
10426 /* This function try to convert a pointer into a function name. It's used in
10427 * oreder to provide a backtrace under segmentation fault that's able to
10428 * display functions declared as static (otherwise the backtrace is useless). */
10429 static char *findFuncName(void *pointer
, unsigned long *offset
){
10431 unsigned long off
, minoff
= 0;
10433 /* Try to match against the Symbol with the smallest offset */
10434 for (i
=0; symsTable
[i
].pointer
; i
++) {
10435 unsigned long lp
= (unsigned long) pointer
;
10437 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10438 off
=lp
-symsTable
[i
].pointer
;
10439 if (ret
< 0 || off
< minoff
) {
10445 if (ret
== -1) return NULL
;
10447 return symsTable
[ret
].name
;
10449 #else /* HAVE_BACKTRACE */
10450 static void setupSigSegvAction(void) {
10452 #endif /* HAVE_BACKTRACE */