2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.10"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
64 #include "solarisfixes.h"
68 #include "ae.h" /* Event driven programming library */
69 #include "sds.h" /* Dynamic safe strings */
70 #include "anet.h" /* Networking the easy way */
71 #include "dict.h" /* Hash tables */
72 #include "adlist.h" /* Linked lists */
73 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
74 #include "lzf.h" /* LZF compression library */
75 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
82 /* Static server configuration */
83 #define REDIS_SERVERPORT 6379 /* TCP port */
84 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
85 #define REDIS_IOBUF_LEN 1024
86 #define REDIS_LOADBUF_LEN 1024
87 #define REDIS_STATIC_ARGS 8
88 #define REDIS_DEFAULT_DBNUM 16
89 #define REDIS_CONFIGLINE_MAX 1024
90 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
91 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
92 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
93 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
94 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
96 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
97 #define REDIS_WRITEV_THRESHOLD 3
98 /* Max number of iovecs used for each writev call */
99 #define REDIS_WRITEV_IOVEC_COUNT 256
101 /* Hash table parameters */
102 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
105 #define REDIS_CMD_BULK 1 /* Bulk write command */
106 #define REDIS_CMD_INLINE 2 /* Inline command */
107 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
108 this flags will return an error when the 'maxmemory' option is set in the
109 config file and the server is using more than maxmemory bytes of memory.
110 In short this commands are denied on low memory conditions. */
111 #define REDIS_CMD_DENYOOM 4
112 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
115 #define REDIS_STRING 0
121 /* Objects encoding. Some kind of objects like Strings and Hashes can be
122 * internally represented in multiple ways. The 'encoding' field of the object
123 * is set to one of this fields for this object. */
124 #define REDIS_ENCODING_RAW 0 /* Raw representation */
125 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
126 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
127 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
129 static char* strencoding
[] = {
130 "raw", "int", "zipmap", "hashtable"
133 /* Object types only used for dumping to disk */
134 #define REDIS_EXPIRETIME 253
135 #define REDIS_SELECTDB 254
136 #define REDIS_EOF 255
138 /* Defines related to the dump file format. To store 32 bits lengths for short
139 * keys requires a lot of space, so we check the most significant 2 bits of
140 * the first byte to interpreter the length:
142 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
143 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
144 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
145 * 11|000000 this means: specially encoded object will follow. The six bits
146 * number specify the kind of object that follows.
147 * See the REDIS_RDB_ENC_* defines.
149 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
150 * values, will fit inside. */
151 #define REDIS_RDB_6BITLEN 0
152 #define REDIS_RDB_14BITLEN 1
153 #define REDIS_RDB_32BITLEN 2
154 #define REDIS_RDB_ENCVAL 3
155 #define REDIS_RDB_LENERR UINT_MAX
157 /* When a length of a string object stored on disk has the first two bits
158 * set, the remaining two bits specify a special encoding for the object
159 * accordingly to the following defines: */
160 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
161 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
162 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
163 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
165 /* Virtual memory object->where field. */
166 #define REDIS_VM_MEMORY 0 /* The object is on memory */
167 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
168 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
169 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
171 /* Virtual memory static configuration stuff.
172 * Check vmFindContiguousPages() to know more about this magic numbers. */
173 #define REDIS_VM_MAX_NEAR_PAGES 65536
174 #define REDIS_VM_MAX_RANDOM_JUMP 4096
175 #define REDIS_VM_MAX_THREADS 32
176 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
177 /* The following is the *percentage* of completed I/O jobs to process when the
178 * handelr is called. While Virtual Memory I/O operations are performed by
179 * threads, this operations must be processed by the main thread when completed
180 * in order to take effect. */
181 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
184 #define REDIS_SLAVE 1 /* This client is a slave server */
185 #define REDIS_MASTER 2 /* This client is a master server */
186 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
187 #define REDIS_MULTI 8 /* This client is in a MULTI context */
188 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
189 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
191 /* Slave replication state - slave side */
192 #define REDIS_REPL_NONE 0 /* No active replication */
193 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
194 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
196 /* Slave replication state - from the point of view of master
197 * Note that in SEND_BULK and ONLINE state the slave receives new updates
198 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
199 * to start the next background saving in order to send updates to it. */
200 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
201 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
202 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
203 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
205 /* List related stuff */
209 /* Sort operations */
210 #define REDIS_SORT_GET 0
211 #define REDIS_SORT_ASC 1
212 #define REDIS_SORT_DESC 2
213 #define REDIS_SORTKEY_MAX 1024
216 #define REDIS_DEBUG 0
217 #define REDIS_VERBOSE 1
218 #define REDIS_NOTICE 2
219 #define REDIS_WARNING 3
221 /* Anti-warning macro... */
222 #define REDIS_NOTUSED(V) ((void) V)
224 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
225 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
227 /* Append only defines */
228 #define APPENDFSYNC_NO 0
229 #define APPENDFSYNC_ALWAYS 1
230 #define APPENDFSYNC_EVERYSEC 2
232 /* Hashes related defaults */
233 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
234 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
236 /* We can print the stacktrace, so our assert is defined this way: */
237 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
238 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
239 static void _redisAssert(char *estr
, char *file
, int line
);
240 static void _redisPanic(char *msg
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 sds aofbuf
; /* AOF buffer, written before entering the event loop */
374 struct saveparam
*saveparams
;
379 char *appendfilename
;
383 /* Replication related */
388 redisClient
*master
; /* client that is master for this slave */
390 unsigned int maxclients
;
391 unsigned long long maxmemory
;
392 unsigned int blpop_blocked_clients
;
393 unsigned int vm_blocked_clients
;
394 /* Sort parameters - qsort_r() is only available under BSD so we
395 * have to take this state global, in order to pass it to sortCompare() */
399 /* Virtual memory configuration */
404 unsigned long long vm_max_memory
;
406 size_t hash_max_zipmap_entries
;
407 size_t hash_max_zipmap_value
;
408 /* Virtual memory state */
411 off_t vm_next_page
; /* Next probably empty page */
412 off_t vm_near_pages
; /* Number of pages allocated sequentially */
413 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
414 time_t unixtime
; /* Unix time sampled every second. */
415 /* Virtual memory I/O threads stuff */
416 /* An I/O thread process an element taken from the io_jobs queue and
417 * put the result of the operation in the io_done list. While the
418 * job is being processed, it's put on io_processing queue. */
419 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
420 list
*io_processing
; /* List of VM I/O jobs being processed */
421 list
*io_processed
; /* List of VM I/O jobs already processed */
422 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
423 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
424 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
425 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
426 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
427 int io_active_threads
; /* Number of running I/O threads */
428 int vm_max_threads
; /* Max number of I/O threads running at the same time */
429 /* Our main thread is blocked on the event loop, locking for sockets ready
430 * to be read or written, so when a threaded I/O operation is ready to be
431 * processed by the main thread, the I/O thread will use a unix pipe to
432 * awake the main thread. The followings are the two pipe FDs. */
433 int io_ready_pipe_read
;
434 int io_ready_pipe_write
;
435 /* Virtual memory stats */
436 unsigned long long vm_stats_used_pages
;
437 unsigned long long vm_stats_swapped_objects
;
438 unsigned long long vm_stats_swapouts
;
439 unsigned long long vm_stats_swapins
;
441 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
442 list
*pubsub_patterns
; /* A list of pubsub_patterns */
447 typedef struct pubsubPattern
{
452 typedef void redisCommandProc(redisClient
*c
);
453 struct redisCommand
{
455 redisCommandProc
*proc
;
458 /* Use a function to determine which keys need to be loaded
459 * in the background prior to executing this command. Takes precedence
460 * over vm_firstkey and others, ignored when NULL */
461 redisCommandProc
*vm_preload_proc
;
462 /* What keys should be loaded in background when calling this command? */
463 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
464 int vm_lastkey
; /* THe last argument that's a key */
465 int vm_keystep
; /* The step between first and last key */
468 struct redisFunctionSym
{
470 unsigned long pointer
;
473 typedef struct _redisSortObject
{
481 typedef struct _redisSortOperation
{
484 } redisSortOperation
;
486 /* ZSETs use a specialized version of Skiplists */
488 typedef struct zskiplistNode
{
489 struct zskiplistNode
**forward
;
490 struct zskiplistNode
*backward
;
496 typedef struct zskiplist
{
497 struct zskiplistNode
*header
, *tail
;
498 unsigned long length
;
502 typedef struct zset
{
507 /* Our shared "common" objects */
509 #define REDIS_SHARED_INTEGERS 10000
510 struct sharedObjectsStruct
{
511 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
512 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
513 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
514 *outofrangeerr
, *plus
,
515 *select0
, *select1
, *select2
, *select3
, *select4
,
516 *select5
, *select6
, *select7
, *select8
, *select9
,
517 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
518 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
519 *integers
[REDIS_SHARED_INTEGERS
];
522 /* Global vars that are actally used as constants. The following double
523 * values are used for double on-disk serialization, and are initialized
524 * at runtime to avoid strange compiler optimizations. */
526 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
528 /* VM threaded I/O request message */
529 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
530 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
531 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
532 typedef struct iojob
{
533 int type
; /* Request type, REDIS_IOJOB_* */
534 redisDb
*db
;/* Redis database */
535 robj
*key
; /* This I/O request is about swapping this key */
536 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
537 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
538 off_t page
; /* Swap page where to read/write the object */
539 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
540 int canceled
; /* True if this command was canceled by blocking side of VM */
541 pthread_t thread
; /* ID of the thread processing this entry */
544 /*================================ Prototypes =============================== */
546 static void freeStringObject(robj
*o
);
547 static void freeListObject(robj
*o
);
548 static void freeSetObject(robj
*o
);
549 static void decrRefCount(void *o
);
550 static robj
*createObject(int type
, void *ptr
);
551 static void freeClient(redisClient
*c
);
552 static int rdbLoad(char *filename
);
553 static void addReply(redisClient
*c
, robj
*obj
);
554 static void addReplySds(redisClient
*c
, sds s
);
555 static void incrRefCount(robj
*o
);
556 static int rdbSaveBackground(char *filename
);
557 static robj
*createStringObject(char *ptr
, size_t len
);
558 static robj
*dupStringObject(robj
*o
);
559 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
560 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
561 static void flushAppendOnlyFile(void);
562 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
563 static int syncWithMaster(void);
564 static robj
*tryObjectEncoding(robj
*o
);
565 static robj
*getDecodedObject(robj
*o
);
566 static int removeExpire(redisDb
*db
, robj
*key
);
567 static int expireIfNeeded(redisDb
*db
, robj
*key
);
568 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
569 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
570 static int deleteKey(redisDb
*db
, robj
*key
);
571 static time_t getExpire(redisDb
*db
, robj
*key
);
572 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
573 static void updateSlavesWaitingBgsave(int bgsaveerr
);
574 static void freeMemoryIfNeeded(void);
575 static int processCommand(redisClient
*c
);
576 static void setupSigSegvAction(void);
577 static void rdbRemoveTempFile(pid_t childpid
);
578 static void aofRemoveTempFile(pid_t childpid
);
579 static size_t stringObjectLen(robj
*o
);
580 static void processInputBuffer(redisClient
*c
);
581 static zskiplist
*zslCreate(void);
582 static void zslFree(zskiplist
*zsl
);
583 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
584 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
585 static void initClientMultiState(redisClient
*c
);
586 static void freeClientMultiState(redisClient
*c
);
587 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
588 static void unblockClientWaitingData(redisClient
*c
);
589 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
590 static void vmInit(void);
591 static void vmMarkPagesFree(off_t page
, off_t count
);
592 static robj
*vmLoadObject(robj
*key
);
593 static robj
*vmPreviewObject(robj
*key
);
594 static int vmSwapOneObjectBlocking(void);
595 static int vmSwapOneObjectThreaded(void);
596 static int vmCanSwapOut(void);
597 static int tryFreeOneObjectFromFreelist(void);
598 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
599 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
600 static void vmCancelThreadedIOJob(robj
*o
);
601 static void lockThreadedIO(void);
602 static void unlockThreadedIO(void);
603 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
604 static void freeIOJob(iojob
*j
);
605 static void queueIOJob(iojob
*j
);
606 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
607 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
608 static void waitEmptyIOJobsQueue(void);
609 static void vmReopenSwapFile(void);
610 static int vmFreePage(off_t page
);
611 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
612 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
613 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
614 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
615 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
616 static struct redisCommand
*lookupCommand(char *name
);
617 static void call(redisClient
*c
, struct redisCommand
*cmd
);
618 static void resetClient(redisClient
*c
);
619 static void convertToRealHash(robj
*o
);
620 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
621 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
622 static void freePubsubPattern(void *p
);
623 static int listMatchPubsubPattern(void *a
, void *b
);
624 static int compareStringObjects(robj
*a
, robj
*b
);
626 static int rewriteAppendOnlyFileBackground(void);
628 static void authCommand(redisClient
*c
);
629 static void pingCommand(redisClient
*c
);
630 static void echoCommand(redisClient
*c
);
631 static void setCommand(redisClient
*c
);
632 static void setnxCommand(redisClient
*c
);
633 static void setexCommand(redisClient
*c
);
634 static void getCommand(redisClient
*c
);
635 static void delCommand(redisClient
*c
);
636 static void existsCommand(redisClient
*c
);
637 static void incrCommand(redisClient
*c
);
638 static void decrCommand(redisClient
*c
);
639 static void incrbyCommand(redisClient
*c
);
640 static void decrbyCommand(redisClient
*c
);
641 static void selectCommand(redisClient
*c
);
642 static void randomkeyCommand(redisClient
*c
);
643 static void keysCommand(redisClient
*c
);
644 static void dbsizeCommand(redisClient
*c
);
645 static void lastsaveCommand(redisClient
*c
);
646 static void saveCommand(redisClient
*c
);
647 static void bgsaveCommand(redisClient
*c
);
648 static void bgrewriteaofCommand(redisClient
*c
);
649 static void shutdownCommand(redisClient
*c
);
650 static void moveCommand(redisClient
*c
);
651 static void renameCommand(redisClient
*c
);
652 static void renamenxCommand(redisClient
*c
);
653 static void lpushCommand(redisClient
*c
);
654 static void rpushCommand(redisClient
*c
);
655 static void lpopCommand(redisClient
*c
);
656 static void rpopCommand(redisClient
*c
);
657 static void llenCommand(redisClient
*c
);
658 static void lindexCommand(redisClient
*c
);
659 static void lrangeCommand(redisClient
*c
);
660 static void ltrimCommand(redisClient
*c
);
661 static void typeCommand(redisClient
*c
);
662 static void lsetCommand(redisClient
*c
);
663 static void saddCommand(redisClient
*c
);
664 static void sremCommand(redisClient
*c
);
665 static void smoveCommand(redisClient
*c
);
666 static void sismemberCommand(redisClient
*c
);
667 static void scardCommand(redisClient
*c
);
668 static void spopCommand(redisClient
*c
);
669 static void srandmemberCommand(redisClient
*c
);
670 static void sinterCommand(redisClient
*c
);
671 static void sinterstoreCommand(redisClient
*c
);
672 static void sunionCommand(redisClient
*c
);
673 static void sunionstoreCommand(redisClient
*c
);
674 static void sdiffCommand(redisClient
*c
);
675 static void sdiffstoreCommand(redisClient
*c
);
676 static void syncCommand(redisClient
*c
);
677 static void flushdbCommand(redisClient
*c
);
678 static void flushallCommand(redisClient
*c
);
679 static void sortCommand(redisClient
*c
);
680 static void lremCommand(redisClient
*c
);
681 static void rpoplpushcommand(redisClient
*c
);
682 static void infoCommand(redisClient
*c
);
683 static void mgetCommand(redisClient
*c
);
684 static void monitorCommand(redisClient
*c
);
685 static void expireCommand(redisClient
*c
);
686 static void expireatCommand(redisClient
*c
);
687 static void getsetCommand(redisClient
*c
);
688 static void ttlCommand(redisClient
*c
);
689 static void slaveofCommand(redisClient
*c
);
690 static void debugCommand(redisClient
*c
);
691 static void msetCommand(redisClient
*c
);
692 static void msetnxCommand(redisClient
*c
);
693 static void zaddCommand(redisClient
*c
);
694 static void zincrbyCommand(redisClient
*c
);
695 static void zrangeCommand(redisClient
*c
);
696 static void zrangebyscoreCommand(redisClient
*c
);
697 static void zcountCommand(redisClient
*c
);
698 static void zrevrangeCommand(redisClient
*c
);
699 static void zcardCommand(redisClient
*c
);
700 static void zremCommand(redisClient
*c
);
701 static void zscoreCommand(redisClient
*c
);
702 static void zremrangebyscoreCommand(redisClient
*c
);
703 static void multiCommand(redisClient
*c
);
704 static void execCommand(redisClient
*c
);
705 static void discardCommand(redisClient
*c
);
706 static void blpopCommand(redisClient
*c
);
707 static void brpopCommand(redisClient
*c
);
708 static void appendCommand(redisClient
*c
);
709 static void substrCommand(redisClient
*c
);
710 static void zrankCommand(redisClient
*c
);
711 static void zrevrankCommand(redisClient
*c
);
712 static void hsetCommand(redisClient
*c
);
713 static void hsetnxCommand(redisClient
*c
);
714 static void hgetCommand(redisClient
*c
);
715 static void hmsetCommand(redisClient
*c
);
716 static void hmgetCommand(redisClient
*c
);
717 static void hdelCommand(redisClient
*c
);
718 static void hlenCommand(redisClient
*c
);
719 static void zremrangebyrankCommand(redisClient
*c
);
720 static void zunionCommand(redisClient
*c
);
721 static void zinterCommand(redisClient
*c
);
722 static void hkeysCommand(redisClient
*c
);
723 static void hvalsCommand(redisClient
*c
);
724 static void hgetallCommand(redisClient
*c
);
725 static void hexistsCommand(redisClient
*c
);
726 static void configCommand(redisClient
*c
);
727 static void hincrbyCommand(redisClient
*c
);
728 static void subscribeCommand(redisClient
*c
);
729 static void unsubscribeCommand(redisClient
*c
);
730 static void psubscribeCommand(redisClient
*c
);
731 static void punsubscribeCommand(redisClient
*c
);
732 static void publishCommand(redisClient
*c
);
734 /*================================= Globals ================================= */
737 static struct redisServer server
; /* server global state */
738 static struct redisCommand cmdTable
[] = {
739 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
740 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
741 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
742 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
743 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
746 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
748 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
749 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
750 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
751 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
752 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
757 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
759 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
761 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
762 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
763 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
764 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
765 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
766 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
767 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
768 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
769 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
770 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
771 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
772 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
773 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
774 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
775 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
776 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
778 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
780 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
781 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
782 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
783 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
784 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
790 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
791 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
792 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
795 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
797 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
799 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
802 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
804 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
805 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
806 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
807 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
808 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
809 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
812 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
813 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
814 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
821 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
827 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
829 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
834 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
837 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
840 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
843 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
844 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
845 {NULL
,NULL
,0,0,NULL
,0,0,0}
848 /*============================ Utility functions ============================ */
850 /* Glob-style pattern matching. */
851 static int stringmatchlen(const char *pattern
, int patternLen
,
852 const char *string
, int stringLen
, int nocase
)
857 while (pattern
[1] == '*') {
862 return 1; /* match */
864 if (stringmatchlen(pattern
+1, patternLen
-1,
865 string
, stringLen
, nocase
))
866 return 1; /* match */
870 return 0; /* no match */
874 return 0; /* no match */
884 not = pattern
[0] == '^';
891 if (pattern
[0] == '\\') {
894 if (pattern
[0] == string
[0])
896 } else if (pattern
[0] == ']') {
898 } else if (patternLen
== 0) {
902 } else if (pattern
[1] == '-' && patternLen
>= 3) {
903 int start
= pattern
[0];
904 int end
= pattern
[2];
912 start
= tolower(start
);
918 if (c
>= start
&& c
<= end
)
922 if (pattern
[0] == string
[0])
925 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
935 return 0; /* no match */
941 if (patternLen
>= 2) {
948 if (pattern
[0] != string
[0])
949 return 0; /* no match */
951 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
952 return 0; /* no match */
960 if (stringLen
== 0) {
961 while(*pattern
== '*') {
968 if (patternLen
== 0 && stringLen
== 0)
973 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
974 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
977 /* Convert a string representing an amount of memory into the number of
978 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
981 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
983 static long long memtoll(const char *p
, int *err
) {
986 long mul
; /* unit multiplier */
991 /* Search the first non digit character. */
994 while(*u
&& isdigit(*u
)) u
++;
995 if (*u
== '\0' || !strcasecmp(u
,"b")) {
997 } else if (!strcasecmp(u
,"k")) {
999 } else if (!strcasecmp(u
,"kb")) {
1001 } else if (!strcasecmp(u
,"m")) {
1003 } else if (!strcasecmp(u
,"mb")) {
1005 } else if (!strcasecmp(u
,"g")) {
1006 mul
= 1000L*1000*1000;
1007 } else if (!strcasecmp(u
,"gb")) {
1008 mul
= 1024L*1024*1024;
1014 if (digits
>= sizeof(buf
)) {
1018 memcpy(buf
,p
,digits
);
1020 val
= strtoll(buf
,NULL
,10);
1024 static void redisLog(int level
, const char *fmt
, ...) {
1028 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1032 if (level
>= server
.verbosity
) {
1038 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1039 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1040 vfprintf(fp
, fmt
, ap
);
1046 if (server
.logfile
) fclose(fp
);
1049 /*====================== Hash table type implementation ==================== */
1051 /* This is an hash table type that uses the SDS dynamic strings libary as
1052 * keys and radis objects as values (objects can hold SDS strings,
1055 static void dictVanillaFree(void *privdata
, void *val
)
1057 DICT_NOTUSED(privdata
);
1061 static void dictListDestructor(void *privdata
, void *val
)
1063 DICT_NOTUSED(privdata
);
1064 listRelease((list
*)val
);
1067 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1071 DICT_NOTUSED(privdata
);
1073 l1
= sdslen((sds
)key1
);
1074 l2
= sdslen((sds
)key2
);
1075 if (l1
!= l2
) return 0;
1076 return memcmp(key1
, key2
, l1
) == 0;
1079 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1081 DICT_NOTUSED(privdata
);
1083 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1087 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1090 const robj
*o1
= key1
, *o2
= key2
;
1091 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1094 static unsigned int dictObjHash(const void *key
) {
1095 const robj
*o
= key
;
1096 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1099 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1102 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1105 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1106 o2
->encoding
== REDIS_ENCODING_INT
&&
1107 o1
->ptr
== o2
->ptr
) return 1;
1109 o1
= getDecodedObject(o1
);
1110 o2
= getDecodedObject(o2
);
1111 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1117 static unsigned int dictEncObjHash(const void *key
) {
1118 robj
*o
= (robj
*) key
;
1120 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1121 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1123 if (o
->encoding
== REDIS_ENCODING_INT
) {
1127 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1128 return dictGenHashFunction((unsigned char*)buf
, len
);
1132 o
= getDecodedObject(o
);
1133 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1140 /* Sets type and expires */
1141 static dictType setDictType
= {
1142 dictEncObjHash
, /* hash function */
1145 dictEncObjKeyCompare
, /* key compare */
1146 dictRedisObjectDestructor
, /* key destructor */
1147 NULL
/* val destructor */
1150 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1151 static dictType zsetDictType
= {
1152 dictEncObjHash
, /* hash function */
1155 dictEncObjKeyCompare
, /* key compare */
1156 dictRedisObjectDestructor
, /* key destructor */
1157 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1161 static dictType dbDictType
= {
1162 dictObjHash
, /* hash function */
1165 dictObjKeyCompare
, /* key compare */
1166 dictRedisObjectDestructor
, /* key destructor */
1167 dictRedisObjectDestructor
/* val destructor */
1171 static dictType keyptrDictType
= {
1172 dictObjHash
, /* hash function */
1175 dictObjKeyCompare
, /* key compare */
1176 dictRedisObjectDestructor
, /* key destructor */
1177 NULL
/* val destructor */
1180 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1181 static dictType hashDictType
= {
1182 dictEncObjHash
, /* hash function */
1185 dictEncObjKeyCompare
, /* key compare */
1186 dictRedisObjectDestructor
, /* key destructor */
1187 dictRedisObjectDestructor
/* val destructor */
1190 /* Keylist hash table type has unencoded redis objects as keys and
1191 * lists as values. It's used for blocking operations (BLPOP) and to
1192 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1193 static dictType keylistDictType
= {
1194 dictObjHash
, /* hash function */
1197 dictObjKeyCompare
, /* key compare */
1198 dictRedisObjectDestructor
, /* key destructor */
1199 dictListDestructor
/* val destructor */
1202 static void version();
1204 /* ========================= Random utility functions ======================= */
1206 /* Redis generally does not try to recover from out of memory conditions
1207 * when allocating objects or strings, it is not clear if it will be possible
1208 * to report this condition to the client since the networking layer itself
1209 * is based on heap allocation for send buffers, so we simply abort.
1210 * At least the code will be simpler to read... */
1211 static void oom(const char *msg
) {
1212 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1217 /* ====================== Redis server networking stuff ===================== */
1218 static void closeTimedoutClients(void) {
1221 time_t now
= time(NULL
);
1224 listRewind(server
.clients
,&li
);
1225 while ((ln
= listNext(&li
)) != NULL
) {
1226 c
= listNodeValue(ln
);
1227 if (server
.maxidletime
&&
1228 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1229 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1230 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1231 listLength(c
->pubsub_patterns
) == 0 &&
1232 (now
- c
->lastinteraction
> server
.maxidletime
))
1234 redisLog(REDIS_VERBOSE
,"Closing idle client");
1236 } else if (c
->flags
& REDIS_BLOCKED
) {
1237 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1238 addReply(c
,shared
.nullmultibulk
);
1239 unblockClientWaitingData(c
);
1245 static int htNeedsResize(dict
*dict
) {
1246 long long size
, used
;
1248 size
= dictSlots(dict
);
1249 used
= dictSize(dict
);
1250 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1251 (used
*100/size
< REDIS_HT_MINFILL
));
1254 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1255 * we resize the hash table to save memory */
1256 static void tryResizeHashTables(void) {
1259 for (j
= 0; j
< server
.dbnum
; j
++) {
1260 if (htNeedsResize(server
.db
[j
].dict
))
1261 dictResize(server
.db
[j
].dict
);
1262 if (htNeedsResize(server
.db
[j
].expires
))
1263 dictResize(server
.db
[j
].expires
);
1267 /* Our hash table implementation performs rehashing incrementally while
1268 * we write/read from the hash table. Still if the server is idle, the hash
1269 * table will use two tables for a long time. So we try to use 1 millisecond
1270 * of CPU time at every serverCron() loop in order to rehash some key. */
1271 static void incrementallyRehash(void) {
1274 for (j
= 0; j
< server
.dbnum
; j
++) {
1275 if (dictIsRehashing(server
.db
[j
].dict
)) {
1276 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1277 break; /* already used our millisecond for this loop... */
1282 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1283 void backgroundSaveDoneHandler(int statloc
) {
1284 int exitcode
= WEXITSTATUS(statloc
);
1285 int bysignal
= WIFSIGNALED(statloc
);
1287 if (!bysignal
&& exitcode
== 0) {
1288 redisLog(REDIS_NOTICE
,
1289 "Background saving terminated with success");
1291 server
.lastsave
= time(NULL
);
1292 } else if (!bysignal
&& exitcode
!= 0) {
1293 redisLog(REDIS_WARNING
, "Background saving error");
1295 redisLog(REDIS_WARNING
,
1296 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1297 rdbRemoveTempFile(server
.bgsavechildpid
);
1299 server
.bgsavechildpid
= -1;
1300 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1301 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1302 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1305 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1307 void backgroundRewriteDoneHandler(int statloc
) {
1308 int exitcode
= WEXITSTATUS(statloc
);
1309 int bysignal
= WIFSIGNALED(statloc
);
1311 if (!bysignal
&& exitcode
== 0) {
1315 redisLog(REDIS_NOTICE
,
1316 "Background append only file rewriting terminated with success");
1317 /* Now it's time to flush the differences accumulated by the parent */
1318 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1319 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1321 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1324 /* Flush our data... */
1325 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1326 (signed) sdslen(server
.bgrewritebuf
)) {
1327 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1331 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1332 /* Now our work is to rename the temp file into the stable file. And
1333 * switch the file descriptor used by the server for append only. */
1334 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1335 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1339 /* Mission completed... almost */
1340 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1341 if (server
.appendfd
!= -1) {
1342 /* If append only is actually enabled... */
1343 close(server
.appendfd
);
1344 server
.appendfd
= fd
;
1346 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1347 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1349 /* If append only is disabled we just generate a dump in this
1350 * format. Why not? */
1353 } else if (!bysignal
&& exitcode
!= 0) {
1354 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1356 redisLog(REDIS_WARNING
,
1357 "Background append only file rewriting terminated by signal %d",
1361 sdsfree(server
.bgrewritebuf
);
1362 server
.bgrewritebuf
= sdsempty();
1363 aofRemoveTempFile(server
.bgrewritechildpid
);
1364 server
.bgrewritechildpid
= -1;
1367 /* This function is called once a background process of some kind terminates,
1368 * as we want to avoid resizing the hash tables when there is a child in order
1369 * to play well with copy-on-write (otherwise when a resize happens lots of
1370 * memory pages are copied). The goal of this function is to update the ability
1371 * for dict.c to resize the hash tables accordingly to the fact we have o not
1372 * running childs. */
1373 static void updateDictResizePolicy(void) {
1374 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1377 dictDisableResize();
1380 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1381 int j
, loops
= server
.cronloops
++;
1382 REDIS_NOTUSED(eventLoop
);
1384 REDIS_NOTUSED(clientData
);
1386 /* We take a cached value of the unix time in the global state because
1387 * with virtual memory and aging there is to store the current time
1388 * in objects at every object access, and accuracy is not needed.
1389 * To access a global var is faster than calling time(NULL) */
1390 server
.unixtime
= time(NULL
);
1392 /* Show some info about non-empty databases */
1393 for (j
= 0; j
< server
.dbnum
; j
++) {
1394 long long size
, used
, vkeys
;
1396 size
= dictSlots(server
.db
[j
].dict
);
1397 used
= dictSize(server
.db
[j
].dict
);
1398 vkeys
= dictSize(server
.db
[j
].expires
);
1399 if (!(loops
% 50) && (used
|| vkeys
)) {
1400 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1401 /* dictPrintStats(server.dict); */
1405 /* We don't want to resize the hash tables while a bacground saving
1406 * is in progress: the saving child is created using fork() that is
1407 * implemented with a copy-on-write semantic in most modern systems, so
1408 * if we resize the HT while there is the saving child at work actually
1409 * a lot of memory movements in the parent will cause a lot of pages
1411 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1412 if (!(loops
% 10)) tryResizeHashTables();
1413 if (server
.activerehashing
) incrementallyRehash();
1416 /* Show information about connected clients */
1417 if (!(loops
% 50)) {
1418 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1419 listLength(server
.clients
)-listLength(server
.slaves
),
1420 listLength(server
.slaves
),
1421 zmalloc_used_memory());
1424 /* Close connections of timedout clients */
1425 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1426 closeTimedoutClients();
1428 /* Check if a background saving or AOF rewrite in progress terminated */
1429 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1433 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1434 if (pid
== server
.bgsavechildpid
) {
1435 backgroundSaveDoneHandler(statloc
);
1437 backgroundRewriteDoneHandler(statloc
);
1439 updateDictResizePolicy();
1442 /* If there is not a background saving in progress check if
1443 * we have to save now */
1444 time_t now
= time(NULL
);
1445 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1446 struct saveparam
*sp
= server
.saveparams
+j
;
1448 if (server
.dirty
>= sp
->changes
&&
1449 now
-server
.lastsave
> sp
->seconds
) {
1450 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1451 sp
->changes
, sp
->seconds
);
1452 rdbSaveBackground(server
.dbfilename
);
1458 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1459 * will use few CPU cycles if there are few expiring keys, otherwise
1460 * it will get more aggressive to avoid that too much memory is used by
1461 * keys that can be removed from the keyspace. */
1462 for (j
= 0; j
< server
.dbnum
; j
++) {
1464 redisDb
*db
= server
.db
+j
;
1466 /* Continue to expire if at the end of the cycle more than 25%
1467 * of the keys were expired. */
1469 long num
= dictSize(db
->expires
);
1470 time_t now
= time(NULL
);
1473 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1474 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1479 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1480 t
= (time_t) dictGetEntryVal(de
);
1482 deleteKey(db
,dictGetEntryKey(de
));
1484 server
.stat_expiredkeys
++;
1487 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1490 /* Swap a few keys on disk if we are over the memory limit and VM
1491 * is enbled. Try to free objects from the free list first. */
1492 if (vmCanSwapOut()) {
1493 while (server
.vm_enabled
&& zmalloc_used_memory() >
1494 server
.vm_max_memory
)
1498 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1499 retval
= (server
.vm_max_threads
== 0) ?
1500 vmSwapOneObjectBlocking() :
1501 vmSwapOneObjectThreaded();
1502 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1503 zmalloc_used_memory() >
1504 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1506 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1508 /* Note that when using threade I/O we free just one object,
1509 * because anyway when the I/O thread in charge to swap this
1510 * object out will finish, the handler of completed jobs
1511 * will try to swap more objects if we are still out of memory. */
1512 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1516 /* Check if we should connect to a MASTER */
1517 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1518 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1519 if (syncWithMaster() == REDIS_OK
) {
1520 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1521 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1527 /* This function gets called every time Redis is entering the
1528 * main loop of the event driven library, that is, before to sleep
1529 * for ready file descriptors. */
1530 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1531 REDIS_NOTUSED(eventLoop
);
1533 /* Awake clients that got all the swapped keys they requested */
1534 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1538 listRewind(server
.io_ready_clients
,&li
);
1539 while((ln
= listNext(&li
))) {
1540 redisClient
*c
= ln
->value
;
1541 struct redisCommand
*cmd
;
1543 /* Resume the client. */
1544 listDelNode(server
.io_ready_clients
,ln
);
1545 c
->flags
&= (~REDIS_IO_WAIT
);
1546 server
.vm_blocked_clients
--;
1547 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1548 readQueryFromClient
, c
);
1549 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1550 assert(cmd
!= NULL
);
1553 /* There may be more data to process in the input buffer. */
1554 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1555 processInputBuffer(c
);
1558 /* Write the AOF buffer on disk */
1559 flushAppendOnlyFile();
1562 static void createSharedObjects(void) {
1565 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1566 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1567 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1568 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1569 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1570 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1571 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1572 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1573 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1574 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1575 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1576 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1577 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1578 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1579 "-ERR no such key\r\n"));
1580 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1581 "-ERR syntax error\r\n"));
1582 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1583 "-ERR source and destination objects are the same\r\n"));
1584 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1585 "-ERR index out of range\r\n"));
1586 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1587 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1588 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1589 shared
.select0
= createStringObject("select 0\r\n",10);
1590 shared
.select1
= createStringObject("select 1\r\n",10);
1591 shared
.select2
= createStringObject("select 2\r\n",10);
1592 shared
.select3
= createStringObject("select 3\r\n",10);
1593 shared
.select4
= createStringObject("select 4\r\n",10);
1594 shared
.select5
= createStringObject("select 5\r\n",10);
1595 shared
.select6
= createStringObject("select 6\r\n",10);
1596 shared
.select7
= createStringObject("select 7\r\n",10);
1597 shared
.select8
= createStringObject("select 8\r\n",10);
1598 shared
.select9
= createStringObject("select 9\r\n",10);
1599 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1600 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1601 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1602 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1603 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1604 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1605 shared
.mbulk3
= createStringObject("*3\r\n",4);
1606 shared
.mbulk4
= createStringObject("*4\r\n",4);
1607 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1608 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1609 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1613 static void appendServerSaveParams(time_t seconds
, int changes
) {
1614 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1615 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1616 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1617 server
.saveparamslen
++;
1620 static void resetServerSaveParams() {
1621 zfree(server
.saveparams
);
1622 server
.saveparams
= NULL
;
1623 server
.saveparamslen
= 0;
1626 static void initServerConfig() {
1627 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1628 server
.port
= REDIS_SERVERPORT
;
1629 server
.verbosity
= REDIS_VERBOSE
;
1630 server
.maxidletime
= REDIS_MAXIDLETIME
;
1631 server
.saveparams
= NULL
;
1632 server
.logfile
= NULL
; /* NULL = log on standard output */
1633 server
.bindaddr
= NULL
;
1634 server
.glueoutputbuf
= 1;
1635 server
.daemonize
= 0;
1636 server
.appendonly
= 0;
1637 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1638 server
.lastfsync
= time(NULL
);
1639 server
.appendfd
= -1;
1640 server
.appendseldb
= -1; /* Make sure the first time will not match */
1641 server
.pidfile
= zstrdup("/var/run/redis.pid");
1642 server
.dbfilename
= zstrdup("dump.rdb");
1643 server
.appendfilename
= zstrdup("appendonly.aof");
1644 server
.requirepass
= NULL
;
1645 server
.rdbcompression
= 1;
1646 server
.activerehashing
= 1;
1647 server
.maxclients
= 0;
1648 server
.blpop_blocked_clients
= 0;
1649 server
.maxmemory
= 0;
1650 server
.vm_enabled
= 0;
1651 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1652 server
.vm_page_size
= 256; /* 256 bytes per page */
1653 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1654 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1655 server
.vm_max_threads
= 4;
1656 server
.vm_blocked_clients
= 0;
1657 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1658 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1660 resetServerSaveParams();
1662 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1663 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1664 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1665 /* Replication related */
1667 server
.masterauth
= NULL
;
1668 server
.masterhost
= NULL
;
1669 server
.masterport
= 6379;
1670 server
.master
= NULL
;
1671 server
.replstate
= REDIS_REPL_NONE
;
1673 /* Double constants initialization */
1675 R_PosInf
= 1.0/R_Zero
;
1676 R_NegInf
= -1.0/R_Zero
;
1677 R_Nan
= R_Zero
/R_Zero
;
1680 static void initServer() {
1683 signal(SIGHUP
, SIG_IGN
);
1684 signal(SIGPIPE
, SIG_IGN
);
1685 setupSigSegvAction();
1687 server
.devnull
= fopen("/dev/null","w");
1688 if (server
.devnull
== NULL
) {
1689 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1692 server
.clients
= listCreate();
1693 server
.slaves
= listCreate();
1694 server
.monitors
= listCreate();
1695 server
.objfreelist
= listCreate();
1696 createSharedObjects();
1697 server
.el
= aeCreateEventLoop();
1698 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1699 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1700 if (server
.fd
== -1) {
1701 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1704 for (j
= 0; j
< server
.dbnum
; j
++) {
1705 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1706 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1707 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1708 if (server
.vm_enabled
)
1709 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1710 server
.db
[j
].id
= j
;
1712 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1713 server
.pubsub_patterns
= listCreate();
1714 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1715 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1716 server
.cronloops
= 0;
1717 server
.bgsavechildpid
= -1;
1718 server
.bgrewritechildpid
= -1;
1719 server
.bgrewritebuf
= sdsempty();
1720 server
.aofbuf
= sdsempty();
1721 server
.lastsave
= time(NULL
);
1723 server
.stat_numcommands
= 0;
1724 server
.stat_numconnections
= 0;
1725 server
.stat_expiredkeys
= 0;
1726 server
.stat_starttime
= time(NULL
);
1727 server
.unixtime
= time(NULL
);
1728 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1729 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1730 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1732 if (server
.appendonly
) {
1733 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1734 if (server
.appendfd
== -1) {
1735 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1741 if (server
.vm_enabled
) vmInit();
1744 /* Empty the whole database */
1745 static long long emptyDb() {
1747 long long removed
= 0;
1749 for (j
= 0; j
< server
.dbnum
; j
++) {
1750 removed
+= dictSize(server
.db
[j
].dict
);
1751 dictEmpty(server
.db
[j
].dict
);
1752 dictEmpty(server
.db
[j
].expires
);
1757 static int yesnotoi(char *s
) {
1758 if (!strcasecmp(s
,"yes")) return 1;
1759 else if (!strcasecmp(s
,"no")) return 0;
1763 /* I agree, this is a very rudimental way to load a configuration...
1764 will improve later if the config gets more complex */
1765 static void loadServerConfig(char *filename
) {
1767 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1771 if (filename
[0] == '-' && filename
[1] == '\0')
1774 if ((fp
= fopen(filename
,"r")) == NULL
) {
1775 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1780 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1786 line
= sdstrim(line
," \t\r\n");
1788 /* Skip comments and blank lines*/
1789 if (line
[0] == '#' || line
[0] == '\0') {
1794 /* Split into arguments */
1795 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1796 sdstolower(argv
[0]);
1798 /* Execute config directives */
1799 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1800 server
.maxidletime
= atoi(argv
[1]);
1801 if (server
.maxidletime
< 0) {
1802 err
= "Invalid timeout value"; goto loaderr
;
1804 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1805 server
.port
= atoi(argv
[1]);
1806 if (server
.port
< 1 || server
.port
> 65535) {
1807 err
= "Invalid port"; goto loaderr
;
1809 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1810 server
.bindaddr
= zstrdup(argv
[1]);
1811 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1812 int seconds
= atoi(argv
[1]);
1813 int changes
= atoi(argv
[2]);
1814 if (seconds
< 1 || changes
< 0) {
1815 err
= "Invalid save parameters"; goto loaderr
;
1817 appendServerSaveParams(seconds
,changes
);
1818 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1819 if (chdir(argv
[1]) == -1) {
1820 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1821 argv
[1], strerror(errno
));
1824 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1825 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1826 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1827 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1828 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1830 err
= "Invalid log level. Must be one of debug, notice, warning";
1833 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1836 server
.logfile
= zstrdup(argv
[1]);
1837 if (!strcasecmp(server
.logfile
,"stdout")) {
1838 zfree(server
.logfile
);
1839 server
.logfile
= NULL
;
1841 if (server
.logfile
) {
1842 /* Test if we are able to open the file. The server will not
1843 * be able to abort just for this problem later... */
1844 logfp
= fopen(server
.logfile
,"a");
1845 if (logfp
== NULL
) {
1846 err
= sdscatprintf(sdsempty(),
1847 "Can't open the log file: %s", strerror(errno
));
1852 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1853 server
.dbnum
= atoi(argv
[1]);
1854 if (server
.dbnum
< 1) {
1855 err
= "Invalid number of databases"; goto loaderr
;
1857 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1858 loadServerConfig(argv
[1]);
1859 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1860 server
.maxclients
= atoi(argv
[1]);
1861 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1862 server
.maxmemory
= memtoll(argv
[1],NULL
);
1863 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1864 server
.masterhost
= sdsnew(argv
[1]);
1865 server
.masterport
= atoi(argv
[2]);
1866 server
.replstate
= REDIS_REPL_CONNECT
;
1867 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1868 server
.masterauth
= zstrdup(argv
[1]);
1869 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1870 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1871 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1873 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1874 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1875 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1877 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1878 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1879 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1881 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1882 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1883 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1885 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1886 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1887 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1889 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1890 zfree(server
.appendfilename
);
1891 server
.appendfilename
= zstrdup(argv
[1]);
1892 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1893 if (!strcasecmp(argv
[1],"no")) {
1894 server
.appendfsync
= APPENDFSYNC_NO
;
1895 } else if (!strcasecmp(argv
[1],"always")) {
1896 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1897 } else if (!strcasecmp(argv
[1],"everysec")) {
1898 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1900 err
= "argument must be 'no', 'always' or 'everysec'";
1903 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1904 server
.requirepass
= zstrdup(argv
[1]);
1905 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1906 zfree(server
.pidfile
);
1907 server
.pidfile
= zstrdup(argv
[1]);
1908 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1909 zfree(server
.dbfilename
);
1910 server
.dbfilename
= zstrdup(argv
[1]);
1911 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1912 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1913 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1915 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1916 zfree(server
.vm_swap_file
);
1917 server
.vm_swap_file
= zstrdup(argv
[1]);
1918 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1919 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1920 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1921 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1922 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1923 server
.vm_pages
= memtoll(argv
[1], NULL
);
1924 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1925 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1926 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1927 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1928 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1929 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1931 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1933 for (j
= 0; j
< argc
; j
++)
1938 if (fp
!= stdin
) fclose(fp
);
1942 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1943 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1944 fprintf(stderr
, ">>> '%s'\n", line
);
1945 fprintf(stderr
, "%s\n", err
);
1949 static void freeClientArgv(redisClient
*c
) {
1952 for (j
= 0; j
< c
->argc
; j
++)
1953 decrRefCount(c
->argv
[j
]);
1954 for (j
= 0; j
< c
->mbargc
; j
++)
1955 decrRefCount(c
->mbargv
[j
]);
1960 static void freeClient(redisClient
*c
) {
1963 /* Note that if the client we are freeing is blocked into a blocking
1964 * call, we have to set querybuf to NULL *before* to call
1965 * unblockClientWaitingData() to avoid processInputBuffer() will get
1966 * called. Also it is important to remove the file events after
1967 * this, because this call adds the READABLE event. */
1968 sdsfree(c
->querybuf
);
1970 if (c
->flags
& REDIS_BLOCKED
)
1971 unblockClientWaitingData(c
);
1973 /* Unsubscribe from all the pubsub channels */
1974 pubsubUnsubscribeAllChannels(c
,0);
1975 pubsubUnsubscribeAllPatterns(c
,0);
1976 dictRelease(c
->pubsub_channels
);
1977 listRelease(c
->pubsub_patterns
);
1978 /* Obvious cleanup */
1979 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1980 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1981 listRelease(c
->reply
);
1984 /* Remove from the list of clients */
1985 ln
= listSearchKey(server
.clients
,c
);
1986 redisAssert(ln
!= NULL
);
1987 listDelNode(server
.clients
,ln
);
1988 /* Remove from the list of clients waiting for swapped keys */
1989 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1990 ln
= listSearchKey(server
.io_ready_clients
,c
);
1992 listDelNode(server
.io_ready_clients
,ln
);
1993 server
.vm_blocked_clients
--;
1996 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1997 ln
= listFirst(c
->io_keys
);
1998 dontWaitForSwappedKey(c
,ln
->value
);
2000 listRelease(c
->io_keys
);
2001 /* Master/slave cleanup */
2002 if (c
->flags
& REDIS_SLAVE
) {
2003 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2005 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2006 ln
= listSearchKey(l
,c
);
2007 redisAssert(ln
!= NULL
);
2010 if (c
->flags
& REDIS_MASTER
) {
2011 server
.master
= NULL
;
2012 server
.replstate
= REDIS_REPL_CONNECT
;
2014 /* Release memory */
2017 freeClientMultiState(c
);
2021 #define GLUEREPLY_UP_TO (1024)
2022 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2024 char buf
[GLUEREPLY_UP_TO
];
2029 listRewind(c
->reply
,&li
);
2030 while((ln
= listNext(&li
))) {
2034 objlen
= sdslen(o
->ptr
);
2035 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2036 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2038 listDelNode(c
->reply
,ln
);
2040 if (copylen
== 0) return;
2044 /* Now the output buffer is empty, add the new single element */
2045 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2046 listAddNodeHead(c
->reply
,o
);
2049 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2050 redisClient
*c
= privdata
;
2051 int nwritten
= 0, totwritten
= 0, objlen
;
2054 REDIS_NOTUSED(mask
);
2056 /* Use writev() if we have enough buffers to send */
2057 if (!server
.glueoutputbuf
&&
2058 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2059 !(c
->flags
& REDIS_MASTER
))
2061 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2065 while(listLength(c
->reply
)) {
2066 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2067 glueReplyBuffersIfNeeded(c
);
2069 o
= listNodeValue(listFirst(c
->reply
));
2070 objlen
= sdslen(o
->ptr
);
2073 listDelNode(c
->reply
,listFirst(c
->reply
));
2077 if (c
->flags
& REDIS_MASTER
) {
2078 /* Don't reply to a master */
2079 nwritten
= objlen
- c
->sentlen
;
2081 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2082 if (nwritten
<= 0) break;
2084 c
->sentlen
+= nwritten
;
2085 totwritten
+= nwritten
;
2086 /* If we fully sent the object on head go to the next one */
2087 if (c
->sentlen
== objlen
) {
2088 listDelNode(c
->reply
,listFirst(c
->reply
));
2091 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2092 * bytes, in a single threaded server it's a good idea to serve
2093 * other clients as well, even if a very large request comes from
2094 * super fast link that is always able to accept data (in real world
2095 * scenario think about 'KEYS *' against the loopback interfae) */
2096 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2098 if (nwritten
== -1) {
2099 if (errno
== EAGAIN
) {
2102 redisLog(REDIS_VERBOSE
,
2103 "Error writing to client: %s", strerror(errno
));
2108 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2109 if (listLength(c
->reply
) == 0) {
2111 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2115 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2117 redisClient
*c
= privdata
;
2118 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2120 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2121 int offset
, ion
= 0;
2123 REDIS_NOTUSED(mask
);
2126 while (listLength(c
->reply
)) {
2127 offset
= c
->sentlen
;
2131 /* fill-in the iov[] array */
2132 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2133 o
= listNodeValue(node
);
2134 objlen
= sdslen(o
->ptr
);
2136 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2139 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2140 break; /* no more iovecs */
2142 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2143 iov
[ion
].iov_len
= objlen
- offset
;
2144 willwrite
+= objlen
- offset
;
2145 offset
= 0; /* just for the first item */
2152 /* write all collected blocks at once */
2153 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2154 if (errno
!= EAGAIN
) {
2155 redisLog(REDIS_VERBOSE
,
2156 "Error writing to client: %s", strerror(errno
));
2163 totwritten
+= nwritten
;
2164 offset
= c
->sentlen
;
2166 /* remove written robjs from c->reply */
2167 while (nwritten
&& listLength(c
->reply
)) {
2168 o
= listNodeValue(listFirst(c
->reply
));
2169 objlen
= sdslen(o
->ptr
);
2171 if(nwritten
>= objlen
- offset
) {
2172 listDelNode(c
->reply
, listFirst(c
->reply
));
2173 nwritten
-= objlen
- offset
;
2177 c
->sentlen
+= nwritten
;
2185 c
->lastinteraction
= time(NULL
);
2187 if (listLength(c
->reply
) == 0) {
2189 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2193 static struct redisCommand
*lookupCommand(char *name
) {
2195 while(cmdTable
[j
].name
!= NULL
) {
2196 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2202 /* resetClient prepare the client to process the next command */
2203 static void resetClient(redisClient
*c
) {
2209 /* Call() is the core of Redis execution of a command */
2210 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2213 dirty
= server
.dirty
;
2215 dirty
= server
.dirty
-dirty
;
2217 if (server
.appendonly
&& dirty
)
2218 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2219 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2220 listLength(server
.slaves
))
2221 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2222 if (listLength(server
.monitors
))
2223 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2224 server
.stat_numcommands
++;
2227 /* If this function gets called we already read a whole
2228 * command, argments are in the client argv/argc fields.
2229 * processCommand() execute the command or prepare the
2230 * server for a bulk read from the client.
2232 * If 1 is returned the client is still alive and valid and
2233 * and other operations can be performed by the caller. Otherwise
2234 * if 0 is returned the client was destroied (i.e. after QUIT). */
2235 static int processCommand(redisClient
*c
) {
2236 struct redisCommand
*cmd
;
2238 /* Free some memory if needed (maxmemory setting) */
2239 if (server
.maxmemory
) freeMemoryIfNeeded();
2241 /* Handle the multi bulk command type. This is an alternative protocol
2242 * supported by Redis in order to receive commands that are composed of
2243 * multiple binary-safe "bulk" arguments. The latency of processing is
2244 * a bit higher but this allows things like multi-sets, so if this
2245 * protocol is used only for MSET and similar commands this is a big win. */
2246 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2247 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2248 if (c
->multibulk
<= 0) {
2252 decrRefCount(c
->argv
[c
->argc
-1]);
2256 } else if (c
->multibulk
) {
2257 if (c
->bulklen
== -1) {
2258 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2259 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2263 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2264 decrRefCount(c
->argv
[0]);
2265 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2267 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2272 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2276 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2277 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2281 if (c
->multibulk
== 0) {
2285 /* Here we need to swap the multi-bulk argc/argv with the
2286 * normal argc/argv of the client structure. */
2288 c
->argv
= c
->mbargv
;
2289 c
->mbargv
= auxargv
;
2292 c
->argc
= c
->mbargc
;
2293 c
->mbargc
= auxargc
;
2295 /* We need to set bulklen to something different than -1
2296 * in order for the code below to process the command without
2297 * to try to read the last argument of a bulk command as
2298 * a special argument. */
2300 /* continue below and process the command */
2307 /* -- end of multi bulk commands processing -- */
2309 /* The QUIT command is handled as a special case. Normal command
2310 * procs are unable to close the client connection safely */
2311 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2316 /* Now lookup the command and check ASAP about trivial error conditions
2317 * such wrong arity, bad command name and so forth. */
2318 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2321 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2322 (char*)c
->argv
[0]->ptr
));
2325 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2326 (c
->argc
< -cmd
->arity
)) {
2328 sdscatprintf(sdsempty(),
2329 "-ERR wrong number of arguments for '%s' command\r\n",
2333 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2334 /* This is a bulk command, we have to read the last argument yet. */
2335 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2337 decrRefCount(c
->argv
[c
->argc
-1]);
2338 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2340 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2345 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2346 /* It is possible that the bulk read is already in the
2347 * buffer. Check this condition and handle it accordingly.
2348 * This is just a fast path, alternative to call processInputBuffer().
2349 * It's a good idea since the code is small and this condition
2350 * happens most of the times. */
2351 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2352 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2354 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2356 /* Otherwise return... there is to read the last argument
2357 * from the socket. */
2361 /* Let's try to encode the bulk object to save space. */
2362 if (cmd
->flags
& REDIS_CMD_BULK
)
2363 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2365 /* Check if the user is authenticated */
2366 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2367 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2372 /* Handle the maxmemory directive */
2373 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2374 zmalloc_used_memory() > server
.maxmemory
)
2376 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2381 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2382 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2384 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2385 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2386 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2391 /* Exec the command */
2392 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2393 queueMultiCommand(c
,cmd
);
2394 addReply(c
,shared
.queued
);
2396 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2397 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2401 /* Prepare the client for the next command */
2406 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2411 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2412 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2413 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2414 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2417 if (argc
<= REDIS_STATIC_ARGS
) {
2420 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2423 lenobj
= createObject(REDIS_STRING
,
2424 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2425 lenobj
->refcount
= 0;
2426 outv
[outc
++] = lenobj
;
2427 for (j
= 0; j
< argc
; j
++) {
2428 lenobj
= createObject(REDIS_STRING
,
2429 sdscatprintf(sdsempty(),"$%lu\r\n",
2430 (unsigned long) stringObjectLen(argv
[j
])));
2431 lenobj
->refcount
= 0;
2432 outv
[outc
++] = lenobj
;
2433 outv
[outc
++] = argv
[j
];
2434 outv
[outc
++] = shared
.crlf
;
2437 /* Increment all the refcounts at start and decrement at end in order to
2438 * be sure to free objects if there is no slave in a replication state
2439 * able to be feed with commands */
2440 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2441 listRewind(slaves
,&li
);
2442 while((ln
= listNext(&li
))) {
2443 redisClient
*slave
= ln
->value
;
2445 /* Don't feed slaves that are still waiting for BGSAVE to start */
2446 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2448 /* Feed all the other slaves, MONITORs and so on */
2449 if (slave
->slaveseldb
!= dictid
) {
2453 case 0: selectcmd
= shared
.select0
; break;
2454 case 1: selectcmd
= shared
.select1
; break;
2455 case 2: selectcmd
= shared
.select2
; break;
2456 case 3: selectcmd
= shared
.select3
; break;
2457 case 4: selectcmd
= shared
.select4
; break;
2458 case 5: selectcmd
= shared
.select5
; break;
2459 case 6: selectcmd
= shared
.select6
; break;
2460 case 7: selectcmd
= shared
.select7
; break;
2461 case 8: selectcmd
= shared
.select8
; break;
2462 case 9: selectcmd
= shared
.select9
; break;
2464 selectcmd
= createObject(REDIS_STRING
,
2465 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2466 selectcmd
->refcount
= 0;
2469 addReply(slave
,selectcmd
);
2470 slave
->slaveseldb
= dictid
;
2472 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2474 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2475 if (outv
!= static_outv
) zfree(outv
);
2478 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2479 s
= sdscatlen(s
,"\"",1);
2484 s
= sdscatprintf(s
,"\\%c",*p
);
2486 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2487 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2488 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2489 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2490 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2493 s
= sdscatprintf(s
,"%c",*p
);
2495 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2500 return sdscatlen(s
,"\"",1);
2503 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2507 sds cmdrepr
= sdsnew("+");
2511 gettimeofday(&tv
,NULL
);
2512 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2513 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2515 for (j
= 0; j
< argc
; j
++) {
2516 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2517 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2519 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2520 sdslen(argv
[j
]->ptr
));
2523 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2525 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2526 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2528 listRewind(monitors
,&li
);
2529 while((ln
= listNext(&li
))) {
2530 redisClient
*monitor
= ln
->value
;
2531 addReply(monitor
,cmdobj
);
2533 decrRefCount(cmdobj
);
2536 static void processInputBuffer(redisClient
*c
) {
2538 /* Before to process the input buffer, make sure the client is not
2539 * waitig for a blocking operation such as BLPOP. Note that the first
2540 * iteration the client is never blocked, otherwise the processInputBuffer
2541 * would not be called at all, but after the execution of the first commands
2542 * in the input buffer the client may be blocked, and the "goto again"
2543 * will try to reiterate. The following line will make it return asap. */
2544 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2545 if (c
->bulklen
== -1) {
2546 /* Read the first line of the query */
2547 char *p
= strchr(c
->querybuf
,'\n');
2554 query
= c
->querybuf
;
2555 c
->querybuf
= sdsempty();
2556 querylen
= 1+(p
-(query
));
2557 if (sdslen(query
) > querylen
) {
2558 /* leave data after the first line of the query in the buffer */
2559 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2561 *p
= '\0'; /* remove "\n" */
2562 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2563 sdsupdatelen(query
);
2565 /* Now we can split the query in arguments */
2566 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2569 if (c
->argv
) zfree(c
->argv
);
2570 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2572 for (j
= 0; j
< argc
; j
++) {
2573 if (sdslen(argv
[j
])) {
2574 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2582 /* Execute the command. If the client is still valid
2583 * after processCommand() return and there is something
2584 * on the query buffer try to process the next command. */
2585 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2587 /* Nothing to process, argc == 0. Just process the query
2588 * buffer if it's not empty or return to the caller */
2589 if (sdslen(c
->querybuf
)) goto again
;
2592 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2593 redisLog(REDIS_VERBOSE
, "Client protocol error");
2598 /* Bulk read handling. Note that if we are at this point
2599 the client already sent a command terminated with a newline,
2600 we are reading the bulk data that is actually the last
2601 argument of the command. */
2602 int qbl
= sdslen(c
->querybuf
);
2604 if (c
->bulklen
<= qbl
) {
2605 /* Copy everything but the final CRLF as final argument */
2606 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2608 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2609 /* Process the command. If the client is still valid after
2610 * the processing and there is more data in the buffer
2611 * try to parse it. */
2612 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2618 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2619 redisClient
*c
= (redisClient
*) privdata
;
2620 char buf
[REDIS_IOBUF_LEN
];
2623 REDIS_NOTUSED(mask
);
2625 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2627 if (errno
== EAGAIN
) {
2630 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2634 } else if (nread
== 0) {
2635 redisLog(REDIS_VERBOSE
, "Client closed connection");
2640 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2641 c
->lastinteraction
= time(NULL
);
2645 processInputBuffer(c
);
2648 static int selectDb(redisClient
*c
, int id
) {
2649 if (id
< 0 || id
>= server
.dbnum
)
2651 c
->db
= &server
.db
[id
];
2655 static void *dupClientReplyValue(void *o
) {
2656 incrRefCount((robj
*)o
);
2660 static int listMatchObjects(void *a
, void *b
) {
2661 return compareStringObjects(a
,b
) == 0;
2664 static redisClient
*createClient(int fd
) {
2665 redisClient
*c
= zmalloc(sizeof(*c
));
2667 anetNonBlock(NULL
,fd
);
2668 anetTcpNoDelay(NULL
,fd
);
2669 if (!c
) return NULL
;
2672 c
->querybuf
= sdsempty();
2681 c
->lastinteraction
= time(NULL
);
2682 c
->authenticated
= 0;
2683 c
->replstate
= REDIS_REPL_NONE
;
2684 c
->reply
= listCreate();
2685 listSetFreeMethod(c
->reply
,decrRefCount
);
2686 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2687 c
->blockingkeys
= NULL
;
2688 c
->blockingkeysnum
= 0;
2689 c
->io_keys
= listCreate();
2690 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2691 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2692 c
->pubsub_patterns
= listCreate();
2693 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2694 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2695 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2696 readQueryFromClient
, c
) == AE_ERR
) {
2700 listAddNodeTail(server
.clients
,c
);
2701 initClientMultiState(c
);
2705 static void addReply(redisClient
*c
, robj
*obj
) {
2706 if (listLength(c
->reply
) == 0 &&
2707 (c
->replstate
== REDIS_REPL_NONE
||
2708 c
->replstate
== REDIS_REPL_ONLINE
) &&
2709 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2710 sendReplyToClient
, c
) == AE_ERR
) return;
2712 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2713 obj
= dupStringObject(obj
);
2714 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2716 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2719 static void addReplySds(redisClient
*c
, sds s
) {
2720 robj
*o
= createObject(REDIS_STRING
,s
);
2725 static void addReplyDouble(redisClient
*c
, double d
) {
2728 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2729 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2730 (unsigned long) strlen(buf
),buf
));
2733 static void addReplyLong(redisClient
*c
, long l
) {
2738 addReply(c
,shared
.czero
);
2740 } else if (l
== 1) {
2741 addReply(c
,shared
.cone
);
2744 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2745 addReplySds(c
,sdsnewlen(buf
,len
));
2748 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2753 addReply(c
,shared
.czero
);
2755 } else if (ll
== 1) {
2756 addReply(c
,shared
.cone
);
2759 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2760 addReplySds(c
,sdsnewlen(buf
,len
));
2763 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2768 addReply(c
,shared
.czero
);
2770 } else if (ul
== 1) {
2771 addReply(c
,shared
.cone
);
2774 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2775 addReplySds(c
,sdsnewlen(buf
,len
));
2778 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2781 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2782 len
= sdslen(obj
->ptr
);
2784 long n
= (long)obj
->ptr
;
2786 /* Compute how many bytes will take this integer as a radix 10 string */
2792 while((n
= n
/10) != 0) {
2796 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2799 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2800 addReplyBulkLen(c
,obj
);
2802 addReply(c
,shared
.crlf
);
2805 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2806 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2808 addReply(c
,shared
.nullbulk
);
2810 robj
*o
= createStringObject(s
,strlen(s
));
2816 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2821 REDIS_NOTUSED(mask
);
2822 REDIS_NOTUSED(privdata
);
2824 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2825 if (cfd
== AE_ERR
) {
2826 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2829 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2830 if ((c
= createClient(cfd
)) == NULL
) {
2831 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2832 close(cfd
); /* May be already closed, just ingore errors */
2835 /* If maxclient directive is set and this is one client more... close the
2836 * connection. Note that we create the client instead to check before
2837 * for this condition, since now the socket is already set in nonblocking
2838 * mode and we can send an error for free using the Kernel I/O */
2839 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2840 char *err
= "-ERR max number of clients reached\r\n";
2842 /* That's a best effort error message, don't check write errors */
2843 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2844 /* Nothing to do, Just to avoid the warning... */
2849 server
.stat_numconnections
++;
2852 /* ======================= Redis objects implementation ===================== */
2854 static robj
*createObject(int type
, void *ptr
) {
2857 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2858 if (listLength(server
.objfreelist
)) {
2859 listNode
*head
= listFirst(server
.objfreelist
);
2860 o
= listNodeValue(head
);
2861 listDelNode(server
.objfreelist
,head
);
2862 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2864 if (server
.vm_enabled
) {
2865 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2866 o
= zmalloc(sizeof(*o
));
2868 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2872 o
->encoding
= REDIS_ENCODING_RAW
;
2875 if (server
.vm_enabled
) {
2876 /* Note that this code may run in the context of an I/O thread
2877 * and accessing to server.unixtime in theory is an error
2878 * (no locks). But in practice this is safe, and even if we read
2879 * garbage Redis will not fail, as it's just a statistical info */
2880 o
->vm
.atime
= server
.unixtime
;
2881 o
->storage
= REDIS_VM_MEMORY
;
2886 static robj
*createStringObject(char *ptr
, size_t len
) {
2887 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2890 static robj
*createStringObjectFromLongLong(long long value
) {
2892 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2893 incrRefCount(shared
.integers
[value
]);
2894 o
= shared
.integers
[value
];
2896 o
= createObject(REDIS_STRING
, NULL
);
2897 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2898 o
->encoding
= REDIS_ENCODING_INT
;
2899 o
->ptr
= (void*)((long)value
);
2901 o
->ptr
= sdscatprintf(sdsempty(),"%lld",value
);
2907 static robj
*dupStringObject(robj
*o
) {
2908 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2909 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2912 static robj
*createListObject(void) {
2913 list
*l
= listCreate();
2915 listSetFreeMethod(l
,decrRefCount
);
2916 return createObject(REDIS_LIST
,l
);
2919 static robj
*createSetObject(void) {
2920 dict
*d
= dictCreate(&setDictType
,NULL
);
2921 return createObject(REDIS_SET
,d
);
2924 static robj
*createHashObject(void) {
2925 /* All the Hashes start as zipmaps. Will be automatically converted
2926 * into hash tables if there are enough elements or big elements
2928 unsigned char *zm
= zipmapNew();
2929 robj
*o
= createObject(REDIS_HASH
,zm
);
2930 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2934 static robj
*createZsetObject(void) {
2935 zset
*zs
= zmalloc(sizeof(*zs
));
2937 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2938 zs
->zsl
= zslCreate();
2939 return createObject(REDIS_ZSET
,zs
);
2942 static void freeStringObject(robj
*o
) {
2943 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2948 static void freeListObject(robj
*o
) {
2949 listRelease((list
*) o
->ptr
);
2952 static void freeSetObject(robj
*o
) {
2953 dictRelease((dict
*) o
->ptr
);
2956 static void freeZsetObject(robj
*o
) {
2959 dictRelease(zs
->dict
);
2964 static void freeHashObject(robj
*o
) {
2965 switch (o
->encoding
) {
2966 case REDIS_ENCODING_HT
:
2967 dictRelease((dict
*) o
->ptr
);
2969 case REDIS_ENCODING_ZIPMAP
:
2973 redisPanic("Unknown hash encoding type");
2978 static void incrRefCount(robj
*o
) {
2982 static void decrRefCount(void *obj
) {
2985 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
2986 /* Object is a key of a swapped out value, or in the process of being
2988 if (server
.vm_enabled
&&
2989 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2991 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2992 redisAssert(o
->type
== REDIS_STRING
);
2993 freeStringObject(o
);
2994 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2995 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2996 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2997 !listAddNodeHead(server
.objfreelist
,o
))
2999 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3000 server
.vm_stats_swapped_objects
--;
3003 /* Object is in memory, or in the process of being swapped out. */
3004 if (--(o
->refcount
) == 0) {
3005 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3006 vmCancelThreadedIOJob(obj
);
3008 case REDIS_STRING
: freeStringObject(o
); break;
3009 case REDIS_LIST
: freeListObject(o
); break;
3010 case REDIS_SET
: freeSetObject(o
); break;
3011 case REDIS_ZSET
: freeZsetObject(o
); break;
3012 case REDIS_HASH
: freeHashObject(o
); break;
3013 default: redisPanic("Unknown object type"); break;
3015 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3016 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3017 !listAddNodeHead(server
.objfreelist
,o
))
3019 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3023 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3024 dictEntry
*de
= dictFind(db
->dict
,key
);
3026 robj
*key
= dictGetEntryKey(de
);
3027 robj
*val
= dictGetEntryVal(de
);
3029 if (server
.vm_enabled
) {
3030 if (key
->storage
== REDIS_VM_MEMORY
||
3031 key
->storage
== REDIS_VM_SWAPPING
)
3033 /* If we were swapping the object out, stop it, this key
3035 if (key
->storage
== REDIS_VM_SWAPPING
)
3036 vmCancelThreadedIOJob(key
);
3037 /* Update the access time of the key for the aging algorithm. */
3038 key
->vm
.atime
= server
.unixtime
;
3040 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3042 /* Our value was swapped on disk. Bring it at home. */
3043 redisAssert(val
== NULL
);
3044 val
= vmLoadObject(key
);
3045 dictGetEntryVal(de
) = val
;
3047 /* Clients blocked by the VM subsystem may be waiting for
3049 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3058 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3059 expireIfNeeded(db
,key
);
3060 return lookupKey(db
,key
);
3063 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3064 deleteIfVolatile(db
,key
);
3065 return lookupKey(db
,key
);
3068 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3069 robj
*o
= lookupKeyRead(c
->db
, key
);
3070 if (!o
) addReply(c
,reply
);
3074 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3075 robj
*o
= lookupKeyWrite(c
->db
, key
);
3076 if (!o
) addReply(c
,reply
);
3080 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3081 if (o
->type
!= type
) {
3082 addReply(c
,shared
.wrongtypeerr
);
3088 static int deleteKey(redisDb
*db
, robj
*key
) {
3091 /* We need to protect key from destruction: after the first dictDelete()
3092 * it may happen that 'key' is no longer valid if we don't increment
3093 * it's count. This may happen when we get the object reference directly
3094 * from the hash table with dictRandomKey() or dict iterators */
3096 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3097 retval
= dictDelete(db
->dict
,key
);
3100 return retval
== DICT_OK
;
3103 /* Check if the nul-terminated string 's' can be represented by a long
3104 * (that is, is a number that fits into long without any other space or
3105 * character before or after the digits).
3107 * If so, the function returns REDIS_OK and *longval is set to the value
3108 * of the number. Otherwise REDIS_ERR is returned */
3109 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3110 char buf
[32], *endptr
;
3114 value
= strtol(s
, &endptr
, 10);
3115 if (endptr
[0] != '\0') return REDIS_ERR
;
3116 slen
= snprintf(buf
,32,"%ld",value
);
3118 /* If the number converted back into a string is not identical
3119 * then it's not possible to encode the string as integer */
3120 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3121 if (longval
) *longval
= value
;
3125 /* Try to encode a string object in order to save space */
3126 static robj
*tryObjectEncoding(robj
*o
) {
3130 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3131 return o
; /* Already encoded */
3133 /* It's not safe to encode shared objects: shared objects can be shared
3134 * everywhere in the "object space" of Redis. Encoded objects can only
3135 * appear as "values" (and not, for instance, as keys) */
3136 if (o
->refcount
> 1) return o
;
3138 /* Currently we try to encode only strings */
3139 redisAssert(o
->type
== REDIS_STRING
);
3141 /* Check if we can represent this string as a long integer */
3142 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3144 /* Ok, this object can be encoded */
3145 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3147 incrRefCount(shared
.integers
[value
]);
3148 return shared
.integers
[value
];
3150 o
->encoding
= REDIS_ENCODING_INT
;
3152 o
->ptr
= (void*) value
;
3157 /* Get a decoded version of an encoded object (returned as a new object).
3158 * If the object is already raw-encoded just increment the ref count. */
3159 static robj
*getDecodedObject(robj
*o
) {
3162 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3166 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3169 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3170 dec
= createStringObject(buf
,strlen(buf
));
3173 redisPanic("Unknown encoding type");
3177 /* Compare two string objects via strcmp() or alike.
3178 * Note that the objects may be integer-encoded. In such a case we
3179 * use snprintf() to get a string representation of the numbers on the stack
3180 * and compare the strings, it's much faster than calling getDecodedObject().
3182 * Important note: if objects are not integer encoded, but binary-safe strings,
3183 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3185 static int compareStringObjects(robj
*a
, robj
*b
) {
3186 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3187 char bufa
[128], bufb
[128], *astr
, *bstr
;
3190 if (a
== b
) return 0;
3191 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3192 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3198 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3199 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3205 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3208 static size_t stringObjectLen(robj
*o
) {
3209 redisAssert(o
->type
== REDIS_STRING
);
3210 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3211 return sdslen(o
->ptr
);
3215 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3219 static int getDoubleFromObject(robj
*o
, double *target
) {
3226 redisAssert(o
->type
== REDIS_STRING
);
3227 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3228 value
= strtod(o
->ptr
, &eptr
);
3229 if (eptr
[0] != '\0') return REDIS_ERR
;
3230 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3231 value
= (long)o
->ptr
;
3233 redisPanic("Unknown string encoding");
3241 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3243 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3245 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3247 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3256 static int getLongLongFromObject(robj
*o
, long long *target
) {
3263 redisAssert(o
->type
== REDIS_STRING
);
3264 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3265 value
= strtoll(o
->ptr
, &eptr
, 10);
3266 if (eptr
[0] != '\0') return REDIS_ERR
;
3267 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3268 value
= (long)o
->ptr
;
3270 redisPanic("Unknown string encoding");
3278 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3280 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3282 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3284 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3293 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3296 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3297 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3299 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3301 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3310 /*============================ RDB saving/loading =========================== */
3312 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3313 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3317 static int rdbSaveTime(FILE *fp
, time_t t
) {
3318 int32_t t32
= (int32_t) t
;
3319 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3323 /* check rdbLoadLen() comments for more info */
3324 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3325 unsigned char buf
[2];
3328 /* Save a 6 bit len */
3329 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3330 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3331 } else if (len
< (1<<14)) {
3332 /* Save a 14 bit len */
3333 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3335 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3337 /* Save a 32 bit len */
3338 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3339 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3341 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3346 /* String objects in the form "2391" "-100" without any space and with a
3347 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3348 * encoded as integers to save space */
3349 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3351 char *endptr
, buf
[32];
3353 /* Check if it's possible to encode this value as a number */
3354 value
= strtoll(s
, &endptr
, 10);
3355 if (endptr
[0] != '\0') return 0;
3356 snprintf(buf
,32,"%lld",value
);
3358 /* If the number converted back into a string is not identical
3359 * then it's not possible to encode the string as integer */
3360 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3362 /* Finally check if it fits in our ranges */
3363 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3364 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3365 enc
[1] = value
&0xFF;
3367 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3368 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3369 enc
[1] = value
&0xFF;
3370 enc
[2] = (value
>>8)&0xFF;
3372 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3373 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3374 enc
[1] = value
&0xFF;
3375 enc
[2] = (value
>>8)&0xFF;
3376 enc
[3] = (value
>>16)&0xFF;
3377 enc
[4] = (value
>>24)&0xFF;
3384 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3385 size_t comprlen
, outlen
;
3389 /* We require at least four bytes compression for this to be worth it */
3390 if (len
<= 4) return 0;
3392 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3393 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3394 if (comprlen
== 0) {
3398 /* Data compressed! Let's save it on disk */
3399 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3400 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3401 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3402 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3403 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3412 /* Save a string objet as [len][data] on disk. If the object is a string
3413 * representation of an integer value we try to safe it in a special form */
3414 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3417 /* Try integer encoding */
3419 unsigned char buf
[5];
3420 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3421 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3426 /* Try LZF compression - under 20 bytes it's unable to compress even
3427 * aaaaaaaaaaaaaaaaaa so skip it */
3428 if (server
.rdbcompression
&& len
> 20) {
3431 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3432 if (retval
== -1) return -1;
3433 if (retval
> 0) return 0;
3434 /* retval == 0 means data can't be compressed, save the old way */
3437 /* Store verbatim */
3438 if (rdbSaveLen(fp
,len
) == -1) return -1;
3439 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3443 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3444 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3447 /* Avoid incr/decr ref count business when possible.
3448 * This plays well with copy-on-write given that we are probably
3449 * in a child process (BGSAVE). Also this makes sure key objects
3450 * of swapped objects are not incRefCount-ed (an assert does not allow
3451 * this in order to avoid bugs) */
3452 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3453 obj
= getDecodedObject(obj
);
3454 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3457 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3462 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3463 * 8 bit integer specifing the length of the representation.
3464 * This 8 bit integer has special values in order to specify the following
3470 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3471 unsigned char buf
[128];
3477 } else if (!isfinite(val
)) {
3479 buf
[0] = (val
< 0) ? 255 : 254;
3481 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3482 buf
[0] = strlen((char*)buf
+1);
3485 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3489 /* Save a Redis object. */
3490 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3491 if (o
->type
== REDIS_STRING
) {
3492 /* Save a string value */
3493 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3494 } else if (o
->type
== REDIS_LIST
) {
3495 /* Save a list value */
3496 list
*list
= o
->ptr
;
3500 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3501 listRewind(list
,&li
);
3502 while((ln
= listNext(&li
))) {
3503 robj
*eleobj
= listNodeValue(ln
);
3505 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3507 } else if (o
->type
== REDIS_SET
) {
3508 /* Save a set value */
3510 dictIterator
*di
= dictGetIterator(set
);
3513 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3514 while((de
= dictNext(di
)) != NULL
) {
3515 robj
*eleobj
= dictGetEntryKey(de
);
3517 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3519 dictReleaseIterator(di
);
3520 } else if (o
->type
== REDIS_ZSET
) {
3521 /* Save a set value */
3523 dictIterator
*di
= dictGetIterator(zs
->dict
);
3526 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3527 while((de
= dictNext(di
)) != NULL
) {
3528 robj
*eleobj
= dictGetEntryKey(de
);
3529 double *score
= dictGetEntryVal(de
);
3531 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3532 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3534 dictReleaseIterator(di
);
3535 } else if (o
->type
== REDIS_HASH
) {
3536 /* Save a hash value */
3537 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3538 unsigned char *p
= zipmapRewind(o
->ptr
);
3539 unsigned int count
= zipmapLen(o
->ptr
);
3540 unsigned char *key
, *val
;
3541 unsigned int klen
, vlen
;
3543 if (rdbSaveLen(fp
,count
) == -1) return -1;
3544 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3545 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3546 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3549 dictIterator
*di
= dictGetIterator(o
->ptr
);
3552 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3553 while((de
= dictNext(di
)) != NULL
) {
3554 robj
*key
= dictGetEntryKey(de
);
3555 robj
*val
= dictGetEntryVal(de
);
3557 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3558 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3560 dictReleaseIterator(di
);
3563 redisPanic("Unknown object type");
3568 /* Return the length the object will have on disk if saved with
3569 * the rdbSaveObject() function. Currently we use a trick to get
3570 * this length with very little changes to the code. In the future
3571 * we could switch to a faster solution. */
3572 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3573 if (fp
== NULL
) fp
= server
.devnull
;
3575 assert(rdbSaveObject(fp
,o
) != 1);
3579 /* Return the number of pages required to save this object in the swap file */
3580 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3581 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3583 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3586 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3587 static int rdbSave(char *filename
) {
3588 dictIterator
*di
= NULL
;
3593 time_t now
= time(NULL
);
3595 /* Wait for I/O therads to terminate, just in case this is a
3596 * foreground-saving, to avoid seeking the swap file descriptor at the
3598 if (server
.vm_enabled
)
3599 waitEmptyIOJobsQueue();
3601 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3602 fp
= fopen(tmpfile
,"w");
3604 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3607 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3608 for (j
= 0; j
< server
.dbnum
; j
++) {
3609 redisDb
*db
= server
.db
+j
;
3611 if (dictSize(d
) == 0) continue;
3612 di
= dictGetIterator(d
);
3618 /* Write the SELECT DB opcode */
3619 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3620 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3622 /* Iterate this DB writing every entry */
3623 while((de
= dictNext(di
)) != NULL
) {
3624 robj
*key
= dictGetEntryKey(de
);
3625 robj
*o
= dictGetEntryVal(de
);
3626 time_t expiretime
= getExpire(db
,key
);
3628 /* Save the expire time */
3629 if (expiretime
!= -1) {
3630 /* If this key is already expired skip it */
3631 if (expiretime
< now
) continue;
3632 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3633 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3635 /* Save the key and associated value. This requires special
3636 * handling if the value is swapped out. */
3637 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3638 key
->storage
== REDIS_VM_SWAPPING
) {
3639 /* Save type, key, value */
3640 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3641 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3642 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3644 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3646 /* Get a preview of the object in memory */
3647 po
= vmPreviewObject(key
);
3648 /* Save type, key, value */
3649 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3650 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3651 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3652 /* Remove the loaded object from memory */
3656 dictReleaseIterator(di
);
3659 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3661 /* Make sure data will not remain on the OS's output buffers */
3666 /* Use RENAME to make sure the DB file is changed atomically only
3667 * if the generate DB file is ok. */
3668 if (rename(tmpfile
,filename
) == -1) {
3669 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3673 redisLog(REDIS_NOTICE
,"DB saved on disk");
3675 server
.lastsave
= time(NULL
);
3681 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3682 if (di
) dictReleaseIterator(di
);
3686 static int rdbSaveBackground(char *filename
) {
3689 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3690 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3691 if ((childpid
= fork()) == 0) {
3693 if (server
.vm_enabled
) vmReopenSwapFile();
3695 if (rdbSave(filename
) == REDIS_OK
) {
3702 if (childpid
== -1) {
3703 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3707 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3708 server
.bgsavechildpid
= childpid
;
3709 updateDictResizePolicy();
3712 return REDIS_OK
; /* unreached */
3715 static void rdbRemoveTempFile(pid_t childpid
) {
3718 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3722 static int rdbLoadType(FILE *fp
) {
3724 if (fread(&type
,1,1,fp
) == 0) return -1;
3728 static time_t rdbLoadTime(FILE *fp
) {
3730 if (fread(&t32
,4,1,fp
) == 0) return -1;
3731 return (time_t) t32
;
3734 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3735 * of this file for a description of how this are stored on disk.
3737 * isencoded is set to 1 if the readed length is not actually a length but
3738 * an "encoding type", check the above comments for more info */
3739 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3740 unsigned char buf
[2];
3744 if (isencoded
) *isencoded
= 0;
3745 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3746 type
= (buf
[0]&0xC0)>>6;
3747 if (type
== REDIS_RDB_6BITLEN
) {
3748 /* Read a 6 bit len */
3750 } else if (type
== REDIS_RDB_ENCVAL
) {
3751 /* Read a 6 bit len encoding type */
3752 if (isencoded
) *isencoded
= 1;
3754 } else if (type
== REDIS_RDB_14BITLEN
) {
3755 /* Read a 14 bit len */
3756 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3757 return ((buf
[0]&0x3F)<<8)|buf
[1];
3759 /* Read a 32 bit len */
3760 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3765 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3766 unsigned char enc
[4];
3769 if (enctype
== REDIS_RDB_ENC_INT8
) {
3770 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3771 val
= (signed char)enc
[0];
3772 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3774 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3775 v
= enc
[0]|(enc
[1]<<8);
3777 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3779 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3780 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3783 val
= 0; /* anti-warning */
3784 redisPanic("Unknown RDB integer encoding type");
3786 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3789 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3790 unsigned int len
, clen
;
3791 unsigned char *c
= NULL
;
3794 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3795 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3796 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3797 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3798 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3799 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3801 return createObject(REDIS_STRING
,val
);
3808 static robj
*rdbLoadStringObject(FILE*fp
) {
3813 len
= rdbLoadLen(fp
,&isencoded
);
3816 case REDIS_RDB_ENC_INT8
:
3817 case REDIS_RDB_ENC_INT16
:
3818 case REDIS_RDB_ENC_INT32
:
3819 return rdbLoadIntegerObject(fp
,len
);
3820 case REDIS_RDB_ENC_LZF
:
3821 return rdbLoadLzfStringObject(fp
);
3823 redisPanic("Unknown RDB encoding type");
3827 if (len
== REDIS_RDB_LENERR
) return NULL
;
3828 val
= sdsnewlen(NULL
,len
);
3829 if (len
&& fread(val
,len
,1,fp
) == 0) {
3833 return createObject(REDIS_STRING
,val
);
3836 /* For information about double serialization check rdbSaveDoubleValue() */
3837 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3841 if (fread(&len
,1,1,fp
) == 0) return -1;
3843 case 255: *val
= R_NegInf
; return 0;
3844 case 254: *val
= R_PosInf
; return 0;
3845 case 253: *val
= R_Nan
; return 0;
3847 if (fread(buf
,len
,1,fp
) == 0) return -1;
3849 sscanf(buf
, "%lg", val
);
3854 /* Load a Redis object of the specified type from the specified file.
3855 * On success a newly allocated object is returned, otherwise NULL. */
3856 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3859 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3860 if (type
== REDIS_STRING
) {
3861 /* Read string value */
3862 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3863 o
= tryObjectEncoding(o
);
3864 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3865 /* Read list/set value */
3868 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3869 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3870 /* It's faster to expand the dict to the right size asap in order
3871 * to avoid rehashing */
3872 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3873 dictExpand(o
->ptr
,listlen
);
3874 /* Load every single element of the list/set */
3878 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3879 ele
= tryObjectEncoding(ele
);
3880 if (type
== REDIS_LIST
) {
3881 listAddNodeTail((list
*)o
->ptr
,ele
);
3883 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3886 } else if (type
== REDIS_ZSET
) {
3887 /* Read list/set value */
3891 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3892 o
= createZsetObject();
3894 /* Load every single element of the list/set */
3897 double *score
= zmalloc(sizeof(double));
3899 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3900 ele
= tryObjectEncoding(ele
);
3901 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3902 dictAdd(zs
->dict
,ele
,score
);
3903 zslInsert(zs
->zsl
,*score
,ele
);
3904 incrRefCount(ele
); /* added to skiplist */
3906 } else if (type
== REDIS_HASH
) {
3909 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3910 o
= createHashObject();
3911 /* Too many entries? Use an hash table. */
3912 if (hashlen
> server
.hash_max_zipmap_entries
)
3913 convertToRealHash(o
);
3914 /* Load every key/value, then set it into the zipmap or hash
3915 * table, as needed. */
3919 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3920 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3921 /* If we are using a zipmap and there are too big values
3922 * the object is converted to real hash table encoding. */
3923 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3924 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3925 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3927 convertToRealHash(o
);
3930 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3931 unsigned char *zm
= o
->ptr
;
3933 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3934 val
->ptr
,sdslen(val
->ptr
),NULL
);
3939 key
= tryObjectEncoding(key
);
3940 val
= tryObjectEncoding(val
);
3941 dictAdd((dict
*)o
->ptr
,key
,val
);
3945 redisPanic("Unknown object type");
3950 static int rdbLoad(char *filename
) {
3952 robj
*keyobj
= NULL
;
3954 int type
, retval
, rdbver
;
3955 dict
*d
= server
.db
[0].dict
;
3956 redisDb
*db
= server
.db
+0;
3958 time_t expiretime
= -1, now
= time(NULL
);
3959 long long loadedkeys
= 0;
3961 fp
= fopen(filename
,"r");
3962 if (!fp
) return REDIS_ERR
;
3963 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3965 if (memcmp(buf
,"REDIS",5) != 0) {
3967 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3970 rdbver
= atoi(buf
+5);
3973 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3980 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3981 if (type
== REDIS_EXPIRETIME
) {
3982 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3983 /* We read the time so we need to read the object type again */
3984 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3986 if (type
== REDIS_EOF
) break;
3987 /* Handle SELECT DB opcode as a special case */
3988 if (type
== REDIS_SELECTDB
) {
3989 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3991 if (dbid
>= (unsigned)server
.dbnum
) {
3992 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3995 db
= server
.db
+dbid
;
4000 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4002 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4003 /* Add the new object in the hash table */
4004 retval
= dictAdd(d
,keyobj
,o
);
4005 if (retval
== DICT_ERR
) {
4006 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
4009 /* Set the expire time if needed */
4010 if (expiretime
!= -1) {
4011 setExpire(db
,keyobj
,expiretime
);
4012 /* Delete this key if already expired */
4013 if (expiretime
< now
) deleteKey(db
,keyobj
);
4017 /* Handle swapping while loading big datasets when VM is on */
4019 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4020 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4021 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4028 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4029 if (keyobj
) decrRefCount(keyobj
);
4030 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4032 return REDIS_ERR
; /* Just to avoid warning */
4035 /*================================== Commands =============================== */
4037 static void authCommand(redisClient
*c
) {
4038 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4039 c
->authenticated
= 1;
4040 addReply(c
,shared
.ok
);
4042 c
->authenticated
= 0;
4043 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4047 static void pingCommand(redisClient
*c
) {
4048 addReply(c
,shared
.pong
);
4051 static void echoCommand(redisClient
*c
) {
4052 addReplyBulk(c
,c
->argv
[1]);
4055 /*=================================== Strings =============================== */
4057 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4059 long seconds
= 0; /* initialized to avoid an harmness warning */
4062 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4065 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4070 if (nx
) deleteIfVolatile(c
->db
,key
);
4071 retval
= dictAdd(c
->db
->dict
,key
,val
);
4072 if (retval
== DICT_ERR
) {
4074 /* If the key is about a swapped value, we want a new key object
4075 * to overwrite the old. So we delete the old key in the database.
4076 * This will also make sure that swap pages about the old object
4077 * will be marked as free. */
4078 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4080 dictReplace(c
->db
->dict
,key
,val
);
4083 addReply(c
,shared
.czero
);
4091 removeExpire(c
->db
,key
);
4092 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4093 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4096 static void setCommand(redisClient
*c
) {
4097 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4100 static void setnxCommand(redisClient
*c
) {
4101 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4104 static void setexCommand(redisClient
*c
) {
4105 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4108 static int getGenericCommand(redisClient
*c
) {
4111 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4114 if (o
->type
!= REDIS_STRING
) {
4115 addReply(c
,shared
.wrongtypeerr
);
4123 static void getCommand(redisClient
*c
) {
4124 getGenericCommand(c
);
4127 static void getsetCommand(redisClient
*c
) {
4128 if (getGenericCommand(c
) == REDIS_ERR
) return;
4129 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4130 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4132 incrRefCount(c
->argv
[1]);
4134 incrRefCount(c
->argv
[2]);
4136 removeExpire(c
->db
,c
->argv
[1]);
4139 static void mgetCommand(redisClient
*c
) {
4142 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4143 for (j
= 1; j
< c
->argc
; j
++) {
4144 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4146 addReply(c
,shared
.nullbulk
);
4148 if (o
->type
!= REDIS_STRING
) {
4149 addReply(c
,shared
.nullbulk
);
4157 static void msetGenericCommand(redisClient
*c
, int nx
) {
4158 int j
, busykeys
= 0;
4160 if ((c
->argc
% 2) == 0) {
4161 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4164 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4165 * set nothing at all if at least one already key exists. */
4167 for (j
= 1; j
< c
->argc
; j
+= 2) {
4168 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4174 addReply(c
, shared
.czero
);
4178 for (j
= 1; j
< c
->argc
; j
+= 2) {
4181 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4182 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4183 if (retval
== DICT_ERR
) {
4184 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4185 incrRefCount(c
->argv
[j
+1]);
4187 incrRefCount(c
->argv
[j
]);
4188 incrRefCount(c
->argv
[j
+1]);
4190 removeExpire(c
->db
,c
->argv
[j
]);
4192 server
.dirty
+= (c
->argc
-1)/2;
4193 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4196 static void msetCommand(redisClient
*c
) {
4197 msetGenericCommand(c
,0);
4200 static void msetnxCommand(redisClient
*c
) {
4201 msetGenericCommand(c
,1);
4204 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4209 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4211 if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return;
4214 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4215 o
= tryObjectEncoding(o
);
4216 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4217 if (retval
== DICT_ERR
) {
4218 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4219 removeExpire(c
->db
,c
->argv
[1]);
4221 incrRefCount(c
->argv
[1]);
4224 addReply(c
,shared
.colon
);
4226 addReply(c
,shared
.crlf
);
4229 static void incrCommand(redisClient
*c
) {
4230 incrDecrCommand(c
,1);
4233 static void decrCommand(redisClient
*c
) {
4234 incrDecrCommand(c
,-1);
4237 static void incrbyCommand(redisClient
*c
) {
4240 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4241 incrDecrCommand(c
,incr
);
4244 static void decrbyCommand(redisClient
*c
) {
4247 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4248 incrDecrCommand(c
,-incr
);
4251 static void appendCommand(redisClient
*c
) {
4256 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4258 /* Create the key */
4259 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4260 incrRefCount(c
->argv
[1]);
4261 incrRefCount(c
->argv
[2]);
4262 totlen
= stringObjectLen(c
->argv
[2]);
4266 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4269 o
= dictGetEntryVal(de
);
4270 if (o
->type
!= REDIS_STRING
) {
4271 addReply(c
,shared
.wrongtypeerr
);
4274 /* If the object is specially encoded or shared we have to make
4276 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4277 robj
*decoded
= getDecodedObject(o
);
4279 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4280 decrRefCount(decoded
);
4281 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4284 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4285 o
->ptr
= sdscatlen(o
->ptr
,
4286 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4288 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4289 (unsigned long) c
->argv
[2]->ptr
);
4291 totlen
= sdslen(o
->ptr
);
4294 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4297 static void substrCommand(redisClient
*c
) {
4299 long start
= atoi(c
->argv
[2]->ptr
);
4300 long end
= atoi(c
->argv
[3]->ptr
);
4301 size_t rangelen
, strlen
;
4304 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4305 checkType(c
,o
,REDIS_STRING
)) return;
4307 o
= getDecodedObject(o
);
4308 strlen
= sdslen(o
->ptr
);
4310 /* convert negative indexes */
4311 if (start
< 0) start
= strlen
+start
;
4312 if (end
< 0) end
= strlen
+end
;
4313 if (start
< 0) start
= 0;
4314 if (end
< 0) end
= 0;
4316 /* indexes sanity checks */
4317 if (start
> end
|| (size_t)start
>= strlen
) {
4318 /* Out of range start or start > end result in null reply */
4319 addReply(c
,shared
.nullbulk
);
4323 if ((size_t)end
>= strlen
) end
= strlen
-1;
4324 rangelen
= (end
-start
)+1;
4326 /* Return the result */
4327 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4328 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4329 addReplySds(c
,range
);
4330 addReply(c
,shared
.crlf
);
4334 /* ========================= Type agnostic commands ========================= */
4336 static void delCommand(redisClient
*c
) {
4339 for (j
= 1; j
< c
->argc
; j
++) {
4340 if (deleteKey(c
->db
,c
->argv
[j
])) {
4345 addReplyLong(c
,deleted
);
4348 static void existsCommand(redisClient
*c
) {
4349 expireIfNeeded(c
->db
,c
->argv
[1]);
4350 if (dictFind(c
->db
->dict
,c
->argv
[1])) {
4351 addReply(c
, shared
.cone
);
4353 addReply(c
, shared
.czero
);
4357 static void selectCommand(redisClient
*c
) {
4358 int id
= atoi(c
->argv
[1]->ptr
);
4360 if (selectDb(c
,id
) == REDIS_ERR
) {
4361 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4363 addReply(c
,shared
.ok
);
4367 static void randomkeyCommand(redisClient
*c
) {
4372 de
= dictGetRandomKey(c
->db
->dict
);
4373 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4377 addReply(c
,shared
.nullbulk
);
4381 key
= dictGetEntryKey(de
);
4382 if (server
.vm_enabled
) {
4383 key
= dupStringObject(key
);
4384 addReplyBulk(c
,key
);
4387 addReplyBulk(c
,key
);
4391 static void keysCommand(redisClient
*c
) {
4394 sds pattern
= c
->argv
[1]->ptr
;
4395 int plen
= sdslen(pattern
);
4396 unsigned long numkeys
= 0;
4397 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4399 di
= dictGetIterator(c
->db
->dict
);
4401 decrRefCount(lenobj
);
4402 while((de
= dictNext(di
)) != NULL
) {
4403 robj
*keyobj
= dictGetEntryKey(de
);
4405 sds key
= keyobj
->ptr
;
4406 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4407 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4408 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4409 addReplyBulk(c
,keyobj
);
4414 dictReleaseIterator(di
);
4415 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4418 static void dbsizeCommand(redisClient
*c
) {
4420 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4423 static void lastsaveCommand(redisClient
*c
) {
4425 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4428 static void typeCommand(redisClient
*c
) {
4432 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4437 case REDIS_STRING
: type
= "+string"; break;
4438 case REDIS_LIST
: type
= "+list"; break;
4439 case REDIS_SET
: type
= "+set"; break;
4440 case REDIS_ZSET
: type
= "+zset"; break;
4441 case REDIS_HASH
: type
= "+hash"; break;
4442 default: type
= "+unknown"; break;
4445 addReplySds(c
,sdsnew(type
));
4446 addReply(c
,shared
.crlf
);
4449 static void saveCommand(redisClient
*c
) {
4450 if (server
.bgsavechildpid
!= -1) {
4451 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4454 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4455 addReply(c
,shared
.ok
);
4457 addReply(c
,shared
.err
);
4461 static void bgsaveCommand(redisClient
*c
) {
4462 if (server
.bgsavechildpid
!= -1) {
4463 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4466 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4467 char *status
= "+Background saving started\r\n";
4468 addReplySds(c
,sdsnew(status
));
4470 addReply(c
,shared
.err
);
4474 static void shutdownCommand(redisClient
*c
) {
4475 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4476 /* Kill the saving child if there is a background saving in progress.
4477 We want to avoid race conditions, for instance our saving child may
4478 overwrite the synchronous saving did by SHUTDOWN. */
4479 if (server
.bgsavechildpid
!= -1) {
4480 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4481 kill(server
.bgsavechildpid
,SIGKILL
);
4482 rdbRemoveTempFile(server
.bgsavechildpid
);
4484 if (server
.appendonly
) {
4485 /* Append only file: fsync() the AOF and exit */
4486 fsync(server
.appendfd
);
4487 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4490 /* Snapshotting. Perform a SYNC SAVE and exit */
4491 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4492 if (server
.daemonize
)
4493 unlink(server
.pidfile
);
4494 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4495 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4498 /* Ooops.. error saving! The best we can do is to continue
4499 * operating. Note that if there was a background saving process,
4500 * in the next cron() Redis will be notified that the background
4501 * saving aborted, handling special stuff like slaves pending for
4502 * synchronization... */
4503 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4505 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4510 static void renameGenericCommand(redisClient
*c
, int nx
) {
4513 /* To use the same key as src and dst is probably an error */
4514 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4515 addReply(c
,shared
.sameobjecterr
);
4519 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4523 deleteIfVolatile(c
->db
,c
->argv
[2]);
4524 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4527 addReply(c
,shared
.czero
);
4530 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4532 incrRefCount(c
->argv
[2]);
4534 deleteKey(c
->db
,c
->argv
[1]);
4536 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4539 static void renameCommand(redisClient
*c
) {
4540 renameGenericCommand(c
,0);
4543 static void renamenxCommand(redisClient
*c
) {
4544 renameGenericCommand(c
,1);
4547 static void moveCommand(redisClient
*c
) {
4552 /* Obtain source and target DB pointers */
4555 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4556 addReply(c
,shared
.outofrangeerr
);
4560 selectDb(c
,srcid
); /* Back to the source DB */
4562 /* If the user is moving using as target the same
4563 * DB as the source DB it is probably an error. */
4565 addReply(c
,shared
.sameobjecterr
);
4569 /* Check if the element exists and get a reference */
4570 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4572 addReply(c
,shared
.czero
);
4576 /* Try to add the element to the target DB */
4577 deleteIfVolatile(dst
,c
->argv
[1]);
4578 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4579 addReply(c
,shared
.czero
);
4582 incrRefCount(c
->argv
[1]);
4585 /* OK! key moved, free the entry in the source DB */
4586 deleteKey(src
,c
->argv
[1]);
4588 addReply(c
,shared
.cone
);
4591 /* =================================== Lists ================================ */
4592 static void pushGenericCommand(redisClient
*c
, int where
) {
4596 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4598 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4599 addReply(c
,shared
.cone
);
4602 lobj
= createListObject();
4604 if (where
== REDIS_HEAD
) {
4605 listAddNodeHead(list
,c
->argv
[2]);
4607 listAddNodeTail(list
,c
->argv
[2]);
4609 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4610 incrRefCount(c
->argv
[1]);
4611 incrRefCount(c
->argv
[2]);
4613 if (lobj
->type
!= REDIS_LIST
) {
4614 addReply(c
,shared
.wrongtypeerr
);
4617 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4618 addReply(c
,shared
.cone
);
4622 if (where
== REDIS_HEAD
) {
4623 listAddNodeHead(list
,c
->argv
[2]);
4625 listAddNodeTail(list
,c
->argv
[2]);
4627 incrRefCount(c
->argv
[2]);
4630 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4633 static void lpushCommand(redisClient
*c
) {
4634 pushGenericCommand(c
,REDIS_HEAD
);
4637 static void rpushCommand(redisClient
*c
) {
4638 pushGenericCommand(c
,REDIS_TAIL
);
4641 static void llenCommand(redisClient
*c
) {
4645 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4646 checkType(c
,o
,REDIS_LIST
)) return;
4649 addReplyUlong(c
,listLength(l
));
4652 static void lindexCommand(redisClient
*c
) {
4654 int index
= atoi(c
->argv
[2]->ptr
);
4658 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4659 checkType(c
,o
,REDIS_LIST
)) return;
4662 ln
= listIndex(list
, index
);
4664 addReply(c
,shared
.nullbulk
);
4666 robj
*ele
= listNodeValue(ln
);
4667 addReplyBulk(c
,ele
);
4671 static void lsetCommand(redisClient
*c
) {
4673 int index
= atoi(c
->argv
[2]->ptr
);
4677 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4678 checkType(c
,o
,REDIS_LIST
)) return;
4681 ln
= listIndex(list
, index
);
4683 addReply(c
,shared
.outofrangeerr
);
4685 robj
*ele
= listNodeValue(ln
);
4688 listNodeValue(ln
) = c
->argv
[3];
4689 incrRefCount(c
->argv
[3]);
4690 addReply(c
,shared
.ok
);
4695 static void popGenericCommand(redisClient
*c
, int where
) {
4700 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4701 checkType(c
,o
,REDIS_LIST
)) return;
4704 if (where
== REDIS_HEAD
)
4705 ln
= listFirst(list
);
4707 ln
= listLast(list
);
4710 addReply(c
,shared
.nullbulk
);
4712 robj
*ele
= listNodeValue(ln
);
4713 addReplyBulk(c
,ele
);
4714 listDelNode(list
,ln
);
4715 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4720 static void lpopCommand(redisClient
*c
) {
4721 popGenericCommand(c
,REDIS_HEAD
);
4724 static void rpopCommand(redisClient
*c
) {
4725 popGenericCommand(c
,REDIS_TAIL
);
4728 static void lrangeCommand(redisClient
*c
) {
4730 int start
= atoi(c
->argv
[2]->ptr
);
4731 int end
= atoi(c
->argv
[3]->ptr
);
4738 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4739 || checkType(c
,o
,REDIS_LIST
)) return;
4741 llen
= listLength(list
);
4743 /* convert negative indexes */
4744 if (start
< 0) start
= llen
+start
;
4745 if (end
< 0) end
= llen
+end
;
4746 if (start
< 0) start
= 0;
4747 if (end
< 0) end
= 0;
4749 /* indexes sanity checks */
4750 if (start
> end
|| start
>= llen
) {
4751 /* Out of range start or start > end result in empty list */
4752 addReply(c
,shared
.emptymultibulk
);
4755 if (end
>= llen
) end
= llen
-1;
4756 rangelen
= (end
-start
)+1;
4758 /* Return the result in form of a multi-bulk reply */
4759 ln
= listIndex(list
, start
);
4760 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4761 for (j
= 0; j
< rangelen
; j
++) {
4762 ele
= listNodeValue(ln
);
4763 addReplyBulk(c
,ele
);
4768 static void ltrimCommand(redisClient
*c
) {
4770 int start
= atoi(c
->argv
[2]->ptr
);
4771 int end
= atoi(c
->argv
[3]->ptr
);
4773 int j
, ltrim
, rtrim
;
4777 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4778 checkType(c
,o
,REDIS_LIST
)) return;
4780 llen
= listLength(list
);
4782 /* convert negative indexes */
4783 if (start
< 0) start
= llen
+start
;
4784 if (end
< 0) end
= llen
+end
;
4785 if (start
< 0) start
= 0;
4786 if (end
< 0) end
= 0;
4788 /* indexes sanity checks */
4789 if (start
> end
|| start
>= llen
) {
4790 /* Out of range start or start > end result in empty list */
4794 if (end
>= llen
) end
= llen
-1;
4799 /* Remove list elements to perform the trim */
4800 for (j
= 0; j
< ltrim
; j
++) {
4801 ln
= listFirst(list
);
4802 listDelNode(list
,ln
);
4804 for (j
= 0; j
< rtrim
; j
++) {
4805 ln
= listLast(list
);
4806 listDelNode(list
,ln
);
4808 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4810 addReply(c
,shared
.ok
);
4813 static void lremCommand(redisClient
*c
) {
4816 listNode
*ln
, *next
;
4817 int toremove
= atoi(c
->argv
[2]->ptr
);
4821 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4822 checkType(c
,o
,REDIS_LIST
)) return;
4826 toremove
= -toremove
;
4829 ln
= fromtail
? list
->tail
: list
->head
;
4831 robj
*ele
= listNodeValue(ln
);
4833 next
= fromtail
? ln
->prev
: ln
->next
;
4834 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4835 listDelNode(list
,ln
);
4838 if (toremove
&& removed
== toremove
) break;
4842 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4843 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4846 /* This is the semantic of this command:
4847 * RPOPLPUSH srclist dstlist:
4848 * IF LLEN(srclist) > 0
4849 * element = RPOP srclist
4850 * LPUSH dstlist element
4857 * The idea is to be able to get an element from a list in a reliable way
4858 * since the element is not just returned but pushed against another list
4859 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4861 static void rpoplpushcommand(redisClient
*c
) {
4866 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4867 checkType(c
,sobj
,REDIS_LIST
)) return;
4868 srclist
= sobj
->ptr
;
4869 ln
= listLast(srclist
);
4872 addReply(c
,shared
.nullbulk
);
4874 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4875 robj
*ele
= listNodeValue(ln
);
4878 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4879 addReply(c
,shared
.wrongtypeerr
);
4883 /* Add the element to the target list (unless it's directly
4884 * passed to some BLPOP-ing client */
4885 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4887 /* Create the list if the key does not exist */
4888 dobj
= createListObject();
4889 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4890 incrRefCount(c
->argv
[2]);
4892 dstlist
= dobj
->ptr
;
4893 listAddNodeHead(dstlist
,ele
);
4897 /* Send the element to the client as reply as well */
4898 addReplyBulk(c
,ele
);
4900 /* Finally remove the element from the source list */
4901 listDelNode(srclist
,ln
);
4902 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4907 /* ==================================== Sets ================================ */
4909 static void saddCommand(redisClient
*c
) {
4912 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4914 set
= createSetObject();
4915 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4916 incrRefCount(c
->argv
[1]);
4918 if (set
->type
!= REDIS_SET
) {
4919 addReply(c
,shared
.wrongtypeerr
);
4923 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4924 incrRefCount(c
->argv
[2]);
4926 addReply(c
,shared
.cone
);
4928 addReply(c
,shared
.czero
);
4932 static void sremCommand(redisClient
*c
) {
4935 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4936 checkType(c
,set
,REDIS_SET
)) return;
4938 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4940 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4941 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4942 addReply(c
,shared
.cone
);
4944 addReply(c
,shared
.czero
);
4948 static void smoveCommand(redisClient
*c
) {
4949 robj
*srcset
, *dstset
;
4951 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4952 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4954 /* If the source key does not exist return 0, if it's of the wrong type
4956 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4957 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4960 /* Error if the destination key is not a set as well */
4961 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4962 addReply(c
,shared
.wrongtypeerr
);
4965 /* Remove the element from the source set */
4966 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4967 /* Key not found in the src set! return zero */
4968 addReply(c
,shared
.czero
);
4971 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4972 deleteKey(c
->db
,c
->argv
[1]);
4974 /* Add the element to the destination set */
4976 dstset
= createSetObject();
4977 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4978 incrRefCount(c
->argv
[2]);
4980 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4981 incrRefCount(c
->argv
[3]);
4982 addReply(c
,shared
.cone
);
4985 static void sismemberCommand(redisClient
*c
) {
4988 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4989 checkType(c
,set
,REDIS_SET
)) return;
4991 if (dictFind(set
->ptr
,c
->argv
[2]))
4992 addReply(c
,shared
.cone
);
4994 addReply(c
,shared
.czero
);
4997 static void scardCommand(redisClient
*c
) {
5001 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5002 checkType(c
,o
,REDIS_SET
)) return;
5005 addReplyUlong(c
,dictSize(s
));
5008 static void spopCommand(redisClient
*c
) {
5012 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5013 checkType(c
,set
,REDIS_SET
)) return;
5015 de
= dictGetRandomKey(set
->ptr
);
5017 addReply(c
,shared
.nullbulk
);
5019 robj
*ele
= dictGetEntryKey(de
);
5021 addReplyBulk(c
,ele
);
5022 dictDelete(set
->ptr
,ele
);
5023 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5024 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5029 static void srandmemberCommand(redisClient
*c
) {
5033 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5034 checkType(c
,set
,REDIS_SET
)) return;
5036 de
= dictGetRandomKey(set
->ptr
);
5038 addReply(c
,shared
.nullbulk
);
5040 robj
*ele
= dictGetEntryKey(de
);
5042 addReplyBulk(c
,ele
);
5046 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5047 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5049 return dictSize(*d1
)-dictSize(*d2
);
5052 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5053 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5056 robj
*lenobj
= NULL
, *dstset
= NULL
;
5057 unsigned long j
, cardinality
= 0;
5059 for (j
= 0; j
< setsnum
; j
++) {
5063 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5064 lookupKeyRead(c
->db
,setskeys
[j
]);
5068 if (deleteKey(c
->db
,dstkey
))
5070 addReply(c
,shared
.czero
);
5072 addReply(c
,shared
.emptymultibulk
);
5076 if (setobj
->type
!= REDIS_SET
) {
5078 addReply(c
,shared
.wrongtypeerr
);
5081 dv
[j
] = setobj
->ptr
;
5083 /* Sort sets from the smallest to largest, this will improve our
5084 * algorithm's performace */
5085 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5087 /* The first thing we should output is the total number of elements...
5088 * since this is a multi-bulk write, but at this stage we don't know
5089 * the intersection set size, so we use a trick, append an empty object
5090 * to the output list and save the pointer to later modify it with the
5093 lenobj
= createObject(REDIS_STRING
,NULL
);
5095 decrRefCount(lenobj
);
5097 /* If we have a target key where to store the resulting set
5098 * create this key with an empty set inside */
5099 dstset
= createSetObject();
5102 /* Iterate all the elements of the first (smallest) set, and test
5103 * the element against all the other sets, if at least one set does
5104 * not include the element it is discarded */
5105 di
= dictGetIterator(dv
[0]);
5107 while((de
= dictNext(di
)) != NULL
) {
5110 for (j
= 1; j
< setsnum
; j
++)
5111 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5113 continue; /* at least one set does not contain the member */
5114 ele
= dictGetEntryKey(de
);
5116 addReplyBulk(c
,ele
);
5119 dictAdd(dstset
->ptr
,ele
,NULL
);
5123 dictReleaseIterator(di
);
5126 /* Store the resulting set into the target, if the intersection
5127 * is not an empty set. */
5128 deleteKey(c
->db
,dstkey
);
5129 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5130 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5131 incrRefCount(dstkey
);
5132 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5134 decrRefCount(dstset
);
5135 addReply(c
,shared
.czero
);
5139 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5144 static void sinterCommand(redisClient
*c
) {
5145 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5148 static void sinterstoreCommand(redisClient
*c
) {
5149 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5152 #define REDIS_OP_UNION 0
5153 #define REDIS_OP_DIFF 1
5154 #define REDIS_OP_INTER 2
5156 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5157 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5160 robj
*dstset
= NULL
;
5161 int j
, cardinality
= 0;
5163 for (j
= 0; j
< setsnum
; j
++) {
5167 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5168 lookupKeyRead(c
->db
,setskeys
[j
]);
5173 if (setobj
->type
!= REDIS_SET
) {
5175 addReply(c
,shared
.wrongtypeerr
);
5178 dv
[j
] = setobj
->ptr
;
5181 /* We need a temp set object to store our union. If the dstkey
5182 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5183 * this set object will be the resulting object to set into the target key*/
5184 dstset
= createSetObject();
5186 /* Iterate all the elements of all the sets, add every element a single
5187 * time to the result set */
5188 for (j
= 0; j
< setsnum
; j
++) {
5189 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5190 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5192 di
= dictGetIterator(dv
[j
]);
5194 while((de
= dictNext(di
)) != NULL
) {
5197 /* dictAdd will not add the same element multiple times */
5198 ele
= dictGetEntryKey(de
);
5199 if (op
== REDIS_OP_UNION
|| j
== 0) {
5200 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5204 } else if (op
== REDIS_OP_DIFF
) {
5205 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5210 dictReleaseIterator(di
);
5212 /* result set is empty? Exit asap. */
5213 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5216 /* Output the content of the resulting set, if not in STORE mode */
5218 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5219 di
= dictGetIterator(dstset
->ptr
);
5220 while((de
= dictNext(di
)) != NULL
) {
5223 ele
= dictGetEntryKey(de
);
5224 addReplyBulk(c
,ele
);
5226 dictReleaseIterator(di
);
5227 decrRefCount(dstset
);
5229 /* If we have a target key where to store the resulting set
5230 * create this key with the result set inside */
5231 deleteKey(c
->db
,dstkey
);
5232 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5233 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5234 incrRefCount(dstkey
);
5235 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5237 decrRefCount(dstset
);
5238 addReply(c
,shared
.czero
);
5245 static void sunionCommand(redisClient
*c
) {
5246 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5249 static void sunionstoreCommand(redisClient
*c
) {
5250 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5253 static void sdiffCommand(redisClient
*c
) {
5254 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5257 static void sdiffstoreCommand(redisClient
*c
) {
5258 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5261 /* ==================================== ZSets =============================== */
5263 /* ZSETs are ordered sets using two data structures to hold the same elements
5264 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5267 * The elements are added to an hash table mapping Redis objects to scores.
5268 * At the same time the elements are added to a skip list mapping scores
5269 * to Redis objects (so objects are sorted by scores in this "view"). */
5271 /* This skiplist implementation is almost a C translation of the original
5272 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5273 * Alternative to Balanced Trees", modified in three ways:
5274 * a) this implementation allows for repeated values.
5275 * b) the comparison is not just by key (our 'score') but by satellite data.
5276 * c) there is a back pointer, so it's a doubly linked list with the back
5277 * pointers being only at "level 1". This allows to traverse the list
5278 * from tail to head, useful for ZREVRANGE. */
5280 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5281 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5283 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5285 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5291 static zskiplist
*zslCreate(void) {
5295 zsl
= zmalloc(sizeof(*zsl
));
5298 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5299 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5300 zsl
->header
->forward
[j
] = NULL
;
5302 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5303 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5304 zsl
->header
->span
[j
] = 0;
5306 zsl
->header
->backward
= NULL
;
5311 static void zslFreeNode(zskiplistNode
*node
) {
5312 decrRefCount(node
->obj
);
5313 zfree(node
->forward
);
5318 static void zslFree(zskiplist
*zsl
) {
5319 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5321 zfree(zsl
->header
->forward
);
5322 zfree(zsl
->header
->span
);
5325 next
= node
->forward
[0];
5332 static int zslRandomLevel(void) {
5334 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5336 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5339 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5340 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5341 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5345 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5346 /* store rank that is crossed to reach the insert position */
5347 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5349 while (x
->forward
[i
] &&
5350 (x
->forward
[i
]->score
< score
||
5351 (x
->forward
[i
]->score
== score
&&
5352 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5353 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5358 /* we assume the key is not already inside, since we allow duplicated
5359 * scores, and the re-insertion of score and redis object should never
5360 * happpen since the caller of zslInsert() should test in the hash table
5361 * if the element is already inside or not. */
5362 level
= zslRandomLevel();
5363 if (level
> zsl
->level
) {
5364 for (i
= zsl
->level
; i
< level
; i
++) {
5366 update
[i
] = zsl
->header
;
5367 update
[i
]->span
[i
-1] = zsl
->length
;
5371 x
= zslCreateNode(level
,score
,obj
);
5372 for (i
= 0; i
< level
; i
++) {
5373 x
->forward
[i
] = update
[i
]->forward
[i
];
5374 update
[i
]->forward
[i
] = x
;
5376 /* update span covered by update[i] as x is inserted here */
5378 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5379 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5383 /* increment span for untouched levels */
5384 for (i
= level
; i
< zsl
->level
; i
++) {
5385 update
[i
]->span
[i
-1]++;
5388 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5390 x
->forward
[0]->backward
= x
;
5396 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5397 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5399 for (i
= 0; i
< zsl
->level
; i
++) {
5400 if (update
[i
]->forward
[i
] == x
) {
5402 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5404 update
[i
]->forward
[i
] = x
->forward
[i
];
5406 /* invariant: i > 0, because update[0]->forward[0]
5407 * is always equal to x */
5408 update
[i
]->span
[i
-1] -= 1;
5411 if (x
->forward
[0]) {
5412 x
->forward
[0]->backward
= x
->backward
;
5414 zsl
->tail
= x
->backward
;
5416 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5421 /* Delete an element with matching score/object from the skiplist. */
5422 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5423 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5427 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5428 while (x
->forward
[i
] &&
5429 (x
->forward
[i
]->score
< score
||
5430 (x
->forward
[i
]->score
== score
&&
5431 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5435 /* We may have multiple elements with the same score, what we need
5436 * is to find the element with both the right score and object. */
5438 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5439 zslDeleteNode(zsl
, x
, update
);
5443 return 0; /* not found */
5445 return 0; /* not found */
5448 /* Delete all the elements with score between min and max from the skiplist.
5449 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5450 * Note that this function takes the reference to the hash table view of the
5451 * sorted set, in order to remove the elements from the hash table too. */
5452 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5453 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5454 unsigned long removed
= 0;
5458 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5459 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5463 /* We may have multiple elements with the same score, what we need
5464 * is to find the element with both the right score and object. */
5466 while (x
&& x
->score
<= max
) {
5467 zskiplistNode
*next
= x
->forward
[0];
5468 zslDeleteNode(zsl
, x
, update
);
5469 dictDelete(dict
,x
->obj
);
5474 return removed
; /* not found */
5477 /* Delete all the elements with rank between start and end from the skiplist.
5478 * Start and end are inclusive. Note that start and end need to be 1-based */
5479 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5480 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5481 unsigned long traversed
= 0, removed
= 0;
5485 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5486 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5487 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5495 while (x
&& traversed
<= end
) {
5496 zskiplistNode
*next
= x
->forward
[0];
5497 zslDeleteNode(zsl
, x
, update
);
5498 dictDelete(dict
,x
->obj
);
5507 /* Find the first node having a score equal or greater than the specified one.
5508 * Returns NULL if there is no match. */
5509 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5514 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5515 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5518 /* We may have multiple elements with the same score, what we need
5519 * is to find the element with both the right score and object. */
5520 return x
->forward
[0];
5523 /* Find the rank for an element by both score and key.
5524 * Returns 0 when the element cannot be found, rank otherwise.
5525 * Note that the rank is 1-based due to the span of zsl->header to the
5527 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5529 unsigned long rank
= 0;
5533 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5534 while (x
->forward
[i
] &&
5535 (x
->forward
[i
]->score
< score
||
5536 (x
->forward
[i
]->score
== score
&&
5537 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5538 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5542 /* x might be equal to zsl->header, so test if obj is non-NULL */
5543 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5550 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5551 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5553 unsigned long traversed
= 0;
5557 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5558 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5560 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5563 if (traversed
== rank
) {
5570 /* The actual Z-commands implementations */
5572 /* This generic command implements both ZADD and ZINCRBY.
5573 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5574 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5575 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5580 zsetobj
= lookupKeyWrite(c
->db
,key
);
5581 if (zsetobj
== NULL
) {
5582 zsetobj
= createZsetObject();
5583 dictAdd(c
->db
->dict
,key
,zsetobj
);
5586 if (zsetobj
->type
!= REDIS_ZSET
) {
5587 addReply(c
,shared
.wrongtypeerr
);
5593 /* Ok now since we implement both ZADD and ZINCRBY here the code
5594 * needs to handle the two different conditions. It's all about setting
5595 * '*score', that is, the new score to set, to the right value. */
5596 score
= zmalloc(sizeof(double));
5600 /* Read the old score. If the element was not present starts from 0 */
5601 de
= dictFind(zs
->dict
,ele
);
5603 double *oldscore
= dictGetEntryVal(de
);
5604 *score
= *oldscore
+ scoreval
;
5612 /* What follows is a simple remove and re-insert operation that is common
5613 * to both ZADD and ZINCRBY... */
5614 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5615 /* case 1: New element */
5616 incrRefCount(ele
); /* added to hash */
5617 zslInsert(zs
->zsl
,*score
,ele
);
5618 incrRefCount(ele
); /* added to skiplist */
5621 addReplyDouble(c
,*score
);
5623 addReply(c
,shared
.cone
);
5628 /* case 2: Score update operation */
5629 de
= dictFind(zs
->dict
,ele
);
5630 redisAssert(de
!= NULL
);
5631 oldscore
= dictGetEntryVal(de
);
5632 if (*score
!= *oldscore
) {
5635 /* Remove and insert the element in the skip list with new score */
5636 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5637 redisAssert(deleted
!= 0);
5638 zslInsert(zs
->zsl
,*score
,ele
);
5640 /* Update the score in the hash table */
5641 dictReplace(zs
->dict
,ele
,score
);
5647 addReplyDouble(c
,*score
);
5649 addReply(c
,shared
.czero
);
5653 static void zaddCommand(redisClient
*c
) {
5656 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5657 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5660 static void zincrbyCommand(redisClient
*c
) {
5663 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5664 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5667 static void zremCommand(redisClient
*c
) {
5674 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5675 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5678 de
= dictFind(zs
->dict
,c
->argv
[2]);
5680 addReply(c
,shared
.czero
);
5683 /* Delete from the skiplist */
5684 oldscore
= dictGetEntryVal(de
);
5685 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5686 redisAssert(deleted
!= 0);
5688 /* Delete from the hash table */
5689 dictDelete(zs
->dict
,c
->argv
[2]);
5690 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5691 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5693 addReply(c
,shared
.cone
);
5696 static void zremrangebyscoreCommand(redisClient
*c
) {
5703 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5704 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5706 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5707 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5710 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5711 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5712 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5713 server
.dirty
+= deleted
;
5714 addReplyLong(c
,deleted
);
5717 static void zremrangebyrankCommand(redisClient
*c
) {
5725 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5726 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5728 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5729 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5731 llen
= zs
->zsl
->length
;
5733 /* convert negative indexes */
5734 if (start
< 0) start
= llen
+start
;
5735 if (end
< 0) end
= llen
+end
;
5736 if (start
< 0) start
= 0;
5737 if (end
< 0) end
= 0;
5739 /* indexes sanity checks */
5740 if (start
> end
|| start
>= llen
) {
5741 addReply(c
,shared
.czero
);
5744 if (end
>= llen
) end
= llen
-1;
5746 /* increment start and end because zsl*Rank functions
5747 * use 1-based rank */
5748 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5749 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5750 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5751 server
.dirty
+= deleted
;
5752 addReplyLong(c
, deleted
);
5760 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5761 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5762 unsigned long size1
, size2
;
5763 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5764 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5765 return size1
- size2
;
5768 #define REDIS_AGGR_SUM 1
5769 #define REDIS_AGGR_MIN 2
5770 #define REDIS_AGGR_MAX 3
5772 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5773 if (aggregate
== REDIS_AGGR_SUM
) {
5774 *target
= *target
+ val
;
5775 } else if (aggregate
== REDIS_AGGR_MIN
) {
5776 *target
= val
< *target
? val
: *target
;
5777 } else if (aggregate
== REDIS_AGGR_MAX
) {
5778 *target
= val
> *target
? val
: *target
;
5781 redisPanic("Unknown ZUNION/INTER aggregate type");
5785 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5787 int aggregate
= REDIS_AGGR_SUM
;
5794 /* expect zsetnum input keys to be given */
5795 zsetnum
= atoi(c
->argv
[2]->ptr
);
5797 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5801 /* test if the expected number of keys would overflow */
5802 if (3+zsetnum
> c
->argc
) {
5803 addReply(c
,shared
.syntaxerr
);
5807 /* read keys to be used for input */
5808 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5809 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5810 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5814 if (zsetobj
->type
!= REDIS_ZSET
) {
5816 addReply(c
,shared
.wrongtypeerr
);
5819 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5822 /* default all weights to 1 */
5823 src
[i
].weight
= 1.0;
5826 /* parse optional extra arguments */
5828 int remaining
= c
->argc
- j
;
5831 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5833 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5834 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5837 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5839 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5840 aggregate
= REDIS_AGGR_SUM
;
5841 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5842 aggregate
= REDIS_AGGR_MIN
;
5843 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5844 aggregate
= REDIS_AGGR_MAX
;
5847 addReply(c
,shared
.syntaxerr
);
5853 addReply(c
,shared
.syntaxerr
);
5859 /* sort sets from the smallest to largest, this will improve our
5860 * algorithm's performance */
5861 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5863 dstobj
= createZsetObject();
5864 dstzset
= dstobj
->ptr
;
5866 if (op
== REDIS_OP_INTER
) {
5867 /* skip going over all entries if the smallest zset is NULL or empty */
5868 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5869 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5870 * from small to large, all src[i > 0].dict are non-empty too */
5871 di
= dictGetIterator(src
[0].dict
);
5872 while((de
= dictNext(di
)) != NULL
) {
5873 double *score
= zmalloc(sizeof(double)), value
;
5874 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5876 for (j
= 1; j
< zsetnum
; j
++) {
5877 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5879 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5880 zunionInterAggregate(score
, value
, aggregate
);
5886 /* skip entry when not present in every source dict */
5890 robj
*o
= dictGetEntryKey(de
);
5891 dictAdd(dstzset
->dict
,o
,score
);
5892 incrRefCount(o
); /* added to dictionary */
5893 zslInsert(dstzset
->zsl
,*score
,o
);
5894 incrRefCount(o
); /* added to skiplist */
5897 dictReleaseIterator(di
);
5899 } else if (op
== REDIS_OP_UNION
) {
5900 for (i
= 0; i
< zsetnum
; i
++) {
5901 if (!src
[i
].dict
) continue;
5903 di
= dictGetIterator(src
[i
].dict
);
5904 while((de
= dictNext(di
)) != NULL
) {
5905 /* skip key when already processed */
5906 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5908 double *score
= zmalloc(sizeof(double)), value
;
5909 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5911 /* because the zsets are sorted by size, its only possible
5912 * for sets at larger indices to hold this entry */
5913 for (j
= (i
+1); j
< zsetnum
; j
++) {
5914 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5916 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5917 zunionInterAggregate(score
, value
, aggregate
);
5921 robj
*o
= dictGetEntryKey(de
);
5922 dictAdd(dstzset
->dict
,o
,score
);
5923 incrRefCount(o
); /* added to dictionary */
5924 zslInsert(dstzset
->zsl
,*score
,o
);
5925 incrRefCount(o
); /* added to skiplist */
5927 dictReleaseIterator(di
);
5930 /* unknown operator */
5931 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5934 deleteKey(c
->db
,dstkey
);
5935 if (dstzset
->zsl
->length
) {
5936 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5937 incrRefCount(dstkey
);
5938 addReplyLong(c
, dstzset
->zsl
->length
);
5941 decrRefCount(dstobj
);
5942 addReply(c
, shared
.czero
);
5947 static void zunionCommand(redisClient
*c
) {
5948 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5951 static void zinterCommand(redisClient
*c
) {
5952 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5955 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5967 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5968 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5970 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5972 } else if (c
->argc
>= 5) {
5973 addReply(c
,shared
.syntaxerr
);
5977 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5978 || checkType(c
,o
,REDIS_ZSET
)) return;
5983 /* convert negative indexes */
5984 if (start
< 0) start
= llen
+start
;
5985 if (end
< 0) end
= llen
+end
;
5986 if (start
< 0) start
= 0;
5987 if (end
< 0) end
= 0;
5989 /* indexes sanity checks */
5990 if (start
> end
|| start
>= llen
) {
5991 /* Out of range start or start > end result in empty list */
5992 addReply(c
,shared
.emptymultibulk
);
5995 if (end
>= llen
) end
= llen
-1;
5996 rangelen
= (end
-start
)+1;
5998 /* check if starting point is trivial, before searching
5999 * the element in log(N) time */
6001 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6004 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6007 /* Return the result in form of a multi-bulk reply */
6008 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6009 withscores
? (rangelen
*2) : rangelen
));
6010 for (j
= 0; j
< rangelen
; j
++) {
6012 addReplyBulk(c
,ele
);
6014 addReplyDouble(c
,ln
->score
);
6015 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6019 static void zrangeCommand(redisClient
*c
) {
6020 zrangeGenericCommand(c
,0);
6023 static void zrevrangeCommand(redisClient
*c
) {
6024 zrangeGenericCommand(c
,1);
6027 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6028 * If justcount is non-zero, just the count is returned. */
6029 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6032 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6033 int offset
= 0, limit
= -1;
6037 /* Parse the min-max interval. If one of the values is prefixed
6038 * by the "(" character, it's considered "open". For instance
6039 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6040 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6041 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6042 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6045 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6047 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6048 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6051 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6054 /* Parse "WITHSCORES": note that if the command was called with
6055 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6056 * enter the following paths to parse WITHSCORES and LIMIT. */
6057 if (c
->argc
== 5 || c
->argc
== 8) {
6058 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6063 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6067 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6072 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6073 addReply(c
,shared
.syntaxerr
);
6075 } else if (c
->argc
== (7 + withscores
)) {
6076 offset
= atoi(c
->argv
[5]->ptr
);
6077 limit
= atoi(c
->argv
[6]->ptr
);
6078 if (offset
< 0) offset
= 0;
6081 /* Ok, lookup the key and get the range */
6082 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6084 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6086 if (o
->type
!= REDIS_ZSET
) {
6087 addReply(c
,shared
.wrongtypeerr
);
6089 zset
*zsetobj
= o
->ptr
;
6090 zskiplist
*zsl
= zsetobj
->zsl
;
6092 robj
*ele
, *lenobj
= NULL
;
6093 unsigned long rangelen
= 0;
6095 /* Get the first node with the score >= min, or with
6096 * score > min if 'minex' is true. */
6097 ln
= zslFirstWithScore(zsl
,min
);
6098 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6101 /* No element matching the speciifed interval */
6102 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6106 /* We don't know in advance how many matching elements there
6107 * are in the list, so we push this object that will represent
6108 * the multi-bulk length in the output buffer, and will "fix"
6111 lenobj
= createObject(REDIS_STRING
,NULL
);
6113 decrRefCount(lenobj
);
6116 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6119 ln
= ln
->forward
[0];
6122 if (limit
== 0) break;
6125 addReplyBulk(c
,ele
);
6127 addReplyDouble(c
,ln
->score
);
6129 ln
= ln
->forward
[0];
6131 if (limit
> 0) limit
--;
6134 addReplyLong(c
,(long)rangelen
);
6136 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6137 withscores
? (rangelen
*2) : rangelen
);
6143 static void zrangebyscoreCommand(redisClient
*c
) {
6144 genericZrangebyscoreCommand(c
,0);
6147 static void zcountCommand(redisClient
*c
) {
6148 genericZrangebyscoreCommand(c
,1);
6151 static void zcardCommand(redisClient
*c
) {
6155 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6156 checkType(c
,o
,REDIS_ZSET
)) return;
6159 addReplyUlong(c
,zs
->zsl
->length
);
6162 static void zscoreCommand(redisClient
*c
) {
6167 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6168 checkType(c
,o
,REDIS_ZSET
)) return;
6171 de
= dictFind(zs
->dict
,c
->argv
[2]);
6173 addReply(c
,shared
.nullbulk
);
6175 double *score
= dictGetEntryVal(de
);
6177 addReplyDouble(c
,*score
);
6181 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6189 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6190 checkType(c
,o
,REDIS_ZSET
)) return;
6194 de
= dictFind(zs
->dict
,c
->argv
[2]);
6196 addReply(c
,shared
.nullbulk
);
6200 score
= dictGetEntryVal(de
);
6201 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6204 addReplyLong(c
, zsl
->length
- rank
);
6206 addReplyLong(c
, rank
-1);
6209 addReply(c
,shared
.nullbulk
);
6213 static void zrankCommand(redisClient
*c
) {
6214 zrankGenericCommand(c
, 0);
6217 static void zrevrankCommand(redisClient
*c
) {
6218 zrankGenericCommand(c
, 1);
6221 /* ========================= Hashes utility functions ======================= */
6222 #define REDIS_HASH_KEY 1
6223 #define REDIS_HASH_VALUE 2
6225 /* Check the length of a number of objects to see if we need to convert a
6226 * zipmap to a real hash. Note that we only check string encoded objects
6227 * as their string length can be queried in constant time. */
6228 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6230 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6232 for (i
= start
; i
<= end
; i
++) {
6233 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6234 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6236 convertToRealHash(subject
);
6242 /* Encode given objects in-place when the hash uses a dict. */
6243 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6244 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6245 if (o1
) *o1
= tryObjectEncoding(*o1
);
6246 if (o2
) *o2
= tryObjectEncoding(*o2
);
6250 /* Get the value from a hash identified by key. Returns either a string
6251 * object or NULL if the value cannot be found. The refcount of the object
6252 * is always increased by 1 when the value was found. */
6253 static robj
*hashGet(robj
*o
, robj
*key
) {
6255 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6258 key
= getDecodedObject(key
);
6259 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6260 value
= createStringObject((char*)v
,vlen
);
6264 dictEntry
*de
= dictFind(o
->ptr
,key
);
6266 value
= dictGetEntryVal(de
);
6267 incrRefCount(value
);
6273 /* Test if the key exists in the given hash. Returns 1 if the key
6274 * exists and 0 when it doesn't. */
6275 static int hashExists(robj
*o
, robj
*key
) {
6276 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6277 key
= getDecodedObject(key
);
6278 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6284 if (dictFind(o
->ptr
,key
) != NULL
) {
6291 /* Add an element, discard the old if the key already exists.
6292 * Return 0 on insert and 1 on update. */
6293 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6295 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6296 key
= getDecodedObject(key
);
6297 value
= getDecodedObject(value
);
6298 o
->ptr
= zipmapSet(o
->ptr
,
6299 key
->ptr
,sdslen(key
->ptr
),
6300 value
->ptr
,sdslen(value
->ptr
), &update
);
6302 decrRefCount(value
);
6304 /* Check if the zipmap needs to be upgraded to a real hash table */
6305 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6306 convertToRealHash(o
);
6308 if (dictReplace(o
->ptr
,key
,value
)) {
6315 incrRefCount(value
);
6320 /* Delete an element from a hash.
6321 * Return 1 on deleted and 0 on not found. */
6322 static int hashDelete(robj
*o
, robj
*key
) {
6324 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6325 key
= getDecodedObject(key
);
6326 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6329 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6330 /* Always check if the dictionary needs a resize after a delete. */
6331 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6336 /* Return the number of elements in a hash. */
6337 static unsigned long hashLength(robj
*o
) {
6338 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6339 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6342 /* Structure to hold hash iteration abstration. Note that iteration over
6343 * hashes involves both fields and values. Because it is possible that
6344 * not both are required, store pointers in the iterator to avoid
6345 * unnecessary memory allocation for fields/values. */
6349 unsigned char *zk
, *zv
;
6350 unsigned int zklen
, zvlen
;
6356 static hashIterator
*hashInitIterator(robj
*subject
) {
6357 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6358 hi
->encoding
= subject
->encoding
;
6359 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6360 hi
->zi
= zipmapRewind(subject
->ptr
);
6361 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6362 hi
->di
= dictGetIterator(subject
->ptr
);
6369 static void hashReleaseIterator(hashIterator
*hi
) {
6370 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6371 dictReleaseIterator(hi
->di
);
6376 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6377 * could be found and REDIS_ERR when the iterator reaches the end. */
6378 static int hashNext(hashIterator
*hi
) {
6379 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6380 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6381 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6383 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6388 /* Get key or value object at current iteration position.
6389 * This increases the refcount of the field object by 1. */
6390 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6392 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6393 if (what
& REDIS_HASH_KEY
) {
6394 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6396 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6399 if (what
& REDIS_HASH_KEY
) {
6400 o
= dictGetEntryKey(hi
->de
);
6402 o
= dictGetEntryVal(hi
->de
);
6409 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6410 robj
*o
= lookupKeyWrite(c
->db
,key
);
6412 o
= createHashObject();
6413 dictAdd(c
->db
->dict
,key
,o
);
6416 if (o
->type
!= REDIS_HASH
) {
6417 addReply(c
,shared
.wrongtypeerr
);
6424 /* ============================= Hash commands ============================== */
6425 static void hsetCommand(redisClient
*c
) {
6429 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6430 hashTryConversion(o
,c
->argv
,2,3);
6431 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6432 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6433 addReply(c
, update
? shared
.czero
: shared
.cone
);
6437 static void hsetnxCommand(redisClient
*c
) {
6439 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6440 hashTryConversion(o
,c
->argv
,2,3);
6442 if (hashExists(o
, c
->argv
[2])) {
6443 addReply(c
, shared
.czero
);
6445 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6446 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6447 addReply(c
, shared
.cone
);
6452 static void hmsetCommand(redisClient
*c
) {
6456 if ((c
->argc
% 2) == 1) {
6457 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6461 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6462 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6463 for (i
= 2; i
< c
->argc
; i
+= 2) {
6464 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6465 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6467 addReply(c
, shared
.ok
);
6471 static void hincrbyCommand(redisClient
*c
) {
6472 long long value
, incr
;
6473 robj
*o
, *current
, *new;
6475 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6476 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6477 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6478 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6479 "hash value is not an integer") != REDIS_OK
) {
6480 decrRefCount(current
);
6483 decrRefCount(current
);
6489 new = createStringObjectFromLongLong(value
);
6490 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6491 hashSet(o
,c
->argv
[2],new);
6493 addReplyLongLong(c
,value
);
6497 static void hgetCommand(redisClient
*c
) {
6499 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6500 checkType(c
,o
,REDIS_HASH
)) return;
6502 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6503 addReplyBulk(c
,value
);
6504 decrRefCount(value
);
6506 addReply(c
,shared
.nullbulk
);
6510 static void hmgetCommand(redisClient
*c
) {
6513 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6514 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6515 addReply(c
,shared
.wrongtypeerr
);
6518 /* Note the check for o != NULL happens inside the loop. This is
6519 * done because objects that cannot be found are considered to be
6520 * an empty hash. The reply should then be a series of NULLs. */
6521 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6522 for (i
= 2; i
< c
->argc
; i
++) {
6523 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6524 addReplyBulk(c
,value
);
6525 decrRefCount(value
);
6527 addReply(c
,shared
.nullbulk
);
6532 static void hdelCommand(redisClient
*c
) {
6534 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6535 checkType(c
,o
,REDIS_HASH
)) return;
6537 if (hashDelete(o
,c
->argv
[2])) {
6538 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6539 addReply(c
,shared
.cone
);
6542 addReply(c
,shared
.czero
);
6546 static void hlenCommand(redisClient
*c
) {
6548 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6549 checkType(c
,o
,REDIS_HASH
)) return;
6551 addReplyUlong(c
,hashLength(o
));
6554 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6555 robj
*o
, *lenobj
, *obj
;
6556 unsigned long count
= 0;
6559 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6560 || checkType(c
,o
,REDIS_HASH
)) return;
6562 lenobj
= createObject(REDIS_STRING
,NULL
);
6564 decrRefCount(lenobj
);
6566 hi
= hashInitIterator(o
);
6567 while (hashNext(hi
) != REDIS_ERR
) {
6568 if (flags
& REDIS_HASH_KEY
) {
6569 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6570 addReplyBulk(c
,obj
);
6574 if (flags
& REDIS_HASH_VALUE
) {
6575 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6576 addReplyBulk(c
,obj
);
6581 hashReleaseIterator(hi
);
6583 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6586 static void hkeysCommand(redisClient
*c
) {
6587 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6590 static void hvalsCommand(redisClient
*c
) {
6591 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6594 static void hgetallCommand(redisClient
*c
) {
6595 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6598 static void hexistsCommand(redisClient
*c
) {
6600 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6601 checkType(c
,o
,REDIS_HASH
)) return;
6603 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6606 static void convertToRealHash(robj
*o
) {
6607 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6608 unsigned int klen
, vlen
;
6609 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6611 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6612 p
= zipmapRewind(zm
);
6613 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6614 robj
*keyobj
, *valobj
;
6616 keyobj
= createStringObject((char*)key
,klen
);
6617 valobj
= createStringObject((char*)val
,vlen
);
6618 keyobj
= tryObjectEncoding(keyobj
);
6619 valobj
= tryObjectEncoding(valobj
);
6620 dictAdd(dict
,keyobj
,valobj
);
6622 o
->encoding
= REDIS_ENCODING_HT
;
6627 /* ========================= Non type-specific commands ==================== */
6629 static void flushdbCommand(redisClient
*c
) {
6630 server
.dirty
+= dictSize(c
->db
->dict
);
6631 dictEmpty(c
->db
->dict
);
6632 dictEmpty(c
->db
->expires
);
6633 addReply(c
,shared
.ok
);
6636 static void flushallCommand(redisClient
*c
) {
6637 server
.dirty
+= emptyDb();
6638 addReply(c
,shared
.ok
);
6639 if (server
.bgsavechildpid
!= -1) {
6640 kill(server
.bgsavechildpid
,SIGKILL
);
6641 rdbRemoveTempFile(server
.bgsavechildpid
);
6643 rdbSave(server
.dbfilename
);
6647 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6648 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6650 so
->pattern
= pattern
;
6654 /* Return the value associated to the key with a name obtained
6655 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6656 * The returned object will always have its refcount increased by 1
6657 * when it is non-NULL. */
6658 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6661 robj keyobj
, fieldobj
, *o
;
6662 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6663 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6667 char buf
[REDIS_SORTKEY_MAX
+1];
6668 } keyname
, fieldname
;
6670 /* If the pattern is "#" return the substitution object itself in order
6671 * to implement the "SORT ... GET #" feature. */
6672 spat
= pattern
->ptr
;
6673 if (spat
[0] == '#' && spat
[1] == '\0') {
6674 incrRefCount(subst
);
6678 /* The substitution object may be specially encoded. If so we create
6679 * a decoded object on the fly. Otherwise getDecodedObject will just
6680 * increment the ref count, that we'll decrement later. */
6681 subst
= getDecodedObject(subst
);
6684 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6685 p
= strchr(spat
,'*');
6687 decrRefCount(subst
);
6691 /* Find out if we're dealing with a hash dereference. */
6692 if ((f
= strstr(p
+1, "->")) != NULL
) {
6693 fieldlen
= sdslen(spat
)-(f
-spat
);
6694 /* this also copies \0 character */
6695 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6696 fieldname
.len
= fieldlen
-2;
6702 sublen
= sdslen(ssub
);
6703 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6704 memcpy(keyname
.buf
,spat
,prefixlen
);
6705 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6706 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6707 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6708 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6709 decrRefCount(subst
);
6711 /* Lookup substituted key */
6712 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6713 o
= lookupKeyRead(db
,&keyobj
);
6714 if (o
== NULL
) return NULL
;
6717 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6719 /* Retrieve value from hash by the field name. This operation
6720 * already increases the refcount of the returned object. */
6721 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6722 o
= hashGet(o
, &fieldobj
);
6724 if (o
->type
!= REDIS_STRING
) return NULL
;
6726 /* Every object that this function returns needs to have its refcount
6727 * increased. sortCommand decreases it again. */
6734 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6735 * the additional parameter is not standard but a BSD-specific we have to
6736 * pass sorting parameters via the global 'server' structure */
6737 static int sortCompare(const void *s1
, const void *s2
) {
6738 const redisSortObject
*so1
= s1
, *so2
= s2
;
6741 if (!server
.sort_alpha
) {
6742 /* Numeric sorting. Here it's trivial as we precomputed scores */
6743 if (so1
->u
.score
> so2
->u
.score
) {
6745 } else if (so1
->u
.score
< so2
->u
.score
) {
6751 /* Alphanumeric sorting */
6752 if (server
.sort_bypattern
) {
6753 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6754 /* At least one compare object is NULL */
6755 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6757 else if (so1
->u
.cmpobj
== NULL
)
6762 /* We have both the objects, use strcoll */
6763 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6766 /* Compare elements directly. */
6767 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6770 return server
.sort_desc
? -cmp
: cmp
;
6773 /* The SORT command is the most complex command in Redis. Warning: this code
6774 * is optimized for speed and a bit less for readability */
6775 static void sortCommand(redisClient
*c
) {
6778 int desc
= 0, alpha
= 0;
6779 int limit_start
= 0, limit_count
= -1, start
, end
;
6780 int j
, dontsort
= 0, vectorlen
;
6781 int getop
= 0; /* GET operation counter */
6782 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6783 redisSortObject
*vector
; /* Resulting vector to sort */
6785 /* Lookup the key to sort. It must be of the right types */
6786 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6787 if (sortval
== NULL
) {
6788 addReply(c
,shared
.emptymultibulk
);
6791 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6792 sortval
->type
!= REDIS_ZSET
)
6794 addReply(c
,shared
.wrongtypeerr
);
6798 /* Create a list of operations to perform for every sorted element.
6799 * Operations can be GET/DEL/INCR/DECR */
6800 operations
= listCreate();
6801 listSetFreeMethod(operations
,zfree
);
6804 /* Now we need to protect sortval incrementing its count, in the future
6805 * SORT may have options able to overwrite/delete keys during the sorting
6806 * and the sorted key itself may get destroied */
6807 incrRefCount(sortval
);
6809 /* The SORT command has an SQL-alike syntax, parse it */
6810 while(j
< c
->argc
) {
6811 int leftargs
= c
->argc
-j
-1;
6812 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6814 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6816 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6818 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6819 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6820 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6822 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6823 storekey
= c
->argv
[j
+1];
6825 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6826 sortby
= c
->argv
[j
+1];
6827 /* If the BY pattern does not contain '*', i.e. it is constant,
6828 * we don't need to sort nor to lookup the weight keys. */
6829 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6831 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6832 listAddNodeTail(operations
,createSortOperation(
6833 REDIS_SORT_GET
,c
->argv
[j
+1]));
6837 decrRefCount(sortval
);
6838 listRelease(operations
);
6839 addReply(c
,shared
.syntaxerr
);
6845 /* Load the sorting vector with all the objects to sort */
6846 switch(sortval
->type
) {
6847 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6848 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6849 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6850 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
6852 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6855 if (sortval
->type
== REDIS_LIST
) {
6856 list
*list
= sortval
->ptr
;
6860 listRewind(list
,&li
);
6861 while((ln
= listNext(&li
))) {
6862 robj
*ele
= ln
->value
;
6863 vector
[j
].obj
= ele
;
6864 vector
[j
].u
.score
= 0;
6865 vector
[j
].u
.cmpobj
= NULL
;
6873 if (sortval
->type
== REDIS_SET
) {
6876 zset
*zs
= sortval
->ptr
;
6880 di
= dictGetIterator(set
);
6881 while((setele
= dictNext(di
)) != NULL
) {
6882 vector
[j
].obj
= dictGetEntryKey(setele
);
6883 vector
[j
].u
.score
= 0;
6884 vector
[j
].u
.cmpobj
= NULL
;
6887 dictReleaseIterator(di
);
6889 redisAssert(j
== vectorlen
);
6891 /* Now it's time to load the right scores in the sorting vector */
6892 if (dontsort
== 0) {
6893 for (j
= 0; j
< vectorlen
; j
++) {
6896 /* lookup value to sort by */
6897 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6898 if (!byval
) continue;
6900 /* use object itself to sort by */
6901 byval
= vector
[j
].obj
;
6905 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6907 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6908 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6909 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
6910 /* Don't need to decode the object if it's
6911 * integer-encoded (the only encoding supported) so
6912 * far. We can just cast it */
6913 vector
[j
].u
.score
= (long)byval
->ptr
;
6915 redisAssert(1 != 1);
6919 /* when the object was retrieved using lookupKeyByPattern,
6920 * its refcount needs to be decreased. */
6922 decrRefCount(byval
);
6927 /* We are ready to sort the vector... perform a bit of sanity check
6928 * on the LIMIT option too. We'll use a partial version of quicksort. */
6929 start
= (limit_start
< 0) ? 0 : limit_start
;
6930 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6931 if (start
>= vectorlen
) {
6932 start
= vectorlen
-1;
6935 if (end
>= vectorlen
) end
= vectorlen
-1;
6937 if (dontsort
== 0) {
6938 server
.sort_desc
= desc
;
6939 server
.sort_alpha
= alpha
;
6940 server
.sort_bypattern
= sortby
? 1 : 0;
6941 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6942 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6944 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6947 /* Send command output to the output buffer, performing the specified
6948 * GET/DEL/INCR/DECR operations if any. */
6949 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6950 if (storekey
== NULL
) {
6951 /* STORE option not specified, sent the sorting result to client */
6952 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6953 for (j
= start
; j
<= end
; j
++) {
6957 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6958 listRewind(operations
,&li
);
6959 while((ln
= listNext(&li
))) {
6960 redisSortOperation
*sop
= ln
->value
;
6961 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6964 if (sop
->type
== REDIS_SORT_GET
) {
6966 addReply(c
,shared
.nullbulk
);
6968 addReplyBulk(c
,val
);
6972 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6977 robj
*listObject
= createListObject();
6978 list
*listPtr
= (list
*) listObject
->ptr
;
6980 /* STORE option specified, set the sorting result as a List object */
6981 for (j
= start
; j
<= end
; j
++) {
6986 listAddNodeTail(listPtr
,vector
[j
].obj
);
6987 incrRefCount(vector
[j
].obj
);
6989 listRewind(operations
,&li
);
6990 while((ln
= listNext(&li
))) {
6991 redisSortOperation
*sop
= ln
->value
;
6992 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6995 if (sop
->type
== REDIS_SORT_GET
) {
6997 listAddNodeTail(listPtr
,createStringObject("",0));
6999 /* We should do a incrRefCount on val because it is
7000 * added to the list, but also a decrRefCount because
7001 * it is returned by lookupKeyByPattern. This results
7002 * in doing nothing at all. */
7003 listAddNodeTail(listPtr
,val
);
7006 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7010 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
7011 incrRefCount(storekey
);
7013 /* Note: we add 1 because the DB is dirty anyway since even if the
7014 * SORT result is empty a new key is set and maybe the old content
7016 server
.dirty
+= 1+outputlen
;
7017 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7021 decrRefCount(sortval
);
7022 listRelease(operations
);
7023 for (j
= 0; j
< vectorlen
; j
++) {
7024 if (alpha
&& vector
[j
].u
.cmpobj
)
7025 decrRefCount(vector
[j
].u
.cmpobj
);
7030 /* Convert an amount of bytes into a human readable string in the form
7031 * of 100B, 2G, 100M, 4K, and so forth. */
7032 static void bytesToHuman(char *s
, unsigned long long n
) {
7037 sprintf(s
,"%lluB",n
);
7039 } else if (n
< (1024*1024)) {
7040 d
= (double)n
/(1024);
7041 sprintf(s
,"%.2fK",d
);
7042 } else if (n
< (1024LL*1024*1024)) {
7043 d
= (double)n
/(1024*1024);
7044 sprintf(s
,"%.2fM",d
);
7045 } else if (n
< (1024LL*1024*1024*1024)) {
7046 d
= (double)n
/(1024LL*1024*1024);
7047 sprintf(s
,"%.2fG",d
);
7051 /* Create the string returned by the INFO command. This is decoupled
7052 * by the INFO command itself as we need to report the same information
7053 * on memory corruption problems. */
7054 static sds
genRedisInfoString(void) {
7056 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7060 bytesToHuman(hmem
,zmalloc_used_memory());
7061 info
= sdscatprintf(sdsempty(),
7062 "redis_version:%s\r\n"
7064 "multiplexing_api:%s\r\n"
7065 "process_id:%ld\r\n"
7066 "uptime_in_seconds:%ld\r\n"
7067 "uptime_in_days:%ld\r\n"
7068 "connected_clients:%d\r\n"
7069 "connected_slaves:%d\r\n"
7070 "blocked_clients:%d\r\n"
7071 "used_memory:%zu\r\n"
7072 "used_memory_human:%s\r\n"
7073 "changes_since_last_save:%lld\r\n"
7074 "bgsave_in_progress:%d\r\n"
7075 "last_save_time:%ld\r\n"
7076 "bgrewriteaof_in_progress:%d\r\n"
7077 "total_connections_received:%lld\r\n"
7078 "total_commands_processed:%lld\r\n"
7079 "expired_keys:%lld\r\n"
7080 "hash_max_zipmap_entries:%ld\r\n"
7081 "hash_max_zipmap_value:%ld\r\n"
7082 "pubsub_channels:%ld\r\n"
7083 "pubsub_patterns:%u\r\n"
7087 (sizeof(long) == 8) ? "64" : "32",
7092 listLength(server
.clients
)-listLength(server
.slaves
),
7093 listLength(server
.slaves
),
7094 server
.blpop_blocked_clients
,
7095 zmalloc_used_memory(),
7098 server
.bgsavechildpid
!= -1,
7100 server
.bgrewritechildpid
!= -1,
7101 server
.stat_numconnections
,
7102 server
.stat_numcommands
,
7103 server
.stat_expiredkeys
,
7104 server
.hash_max_zipmap_entries
,
7105 server
.hash_max_zipmap_value
,
7106 dictSize(server
.pubsub_channels
),
7107 listLength(server
.pubsub_patterns
),
7108 server
.vm_enabled
!= 0,
7109 server
.masterhost
== NULL
? "master" : "slave"
7111 if (server
.masterhost
) {
7112 info
= sdscatprintf(info
,
7113 "master_host:%s\r\n"
7114 "master_port:%d\r\n"
7115 "master_link_status:%s\r\n"
7116 "master_last_io_seconds_ago:%d\r\n"
7119 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7121 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7124 if (server
.vm_enabled
) {
7126 info
= sdscatprintf(info
,
7127 "vm_conf_max_memory:%llu\r\n"
7128 "vm_conf_page_size:%llu\r\n"
7129 "vm_conf_pages:%llu\r\n"
7130 "vm_stats_used_pages:%llu\r\n"
7131 "vm_stats_swapped_objects:%llu\r\n"
7132 "vm_stats_swappin_count:%llu\r\n"
7133 "vm_stats_swappout_count:%llu\r\n"
7134 "vm_stats_io_newjobs_len:%lu\r\n"
7135 "vm_stats_io_processing_len:%lu\r\n"
7136 "vm_stats_io_processed_len:%lu\r\n"
7137 "vm_stats_io_active_threads:%lu\r\n"
7138 "vm_stats_blocked_clients:%lu\r\n"
7139 ,(unsigned long long) server
.vm_max_memory
,
7140 (unsigned long long) server
.vm_page_size
,
7141 (unsigned long long) server
.vm_pages
,
7142 (unsigned long long) server
.vm_stats_used_pages
,
7143 (unsigned long long) server
.vm_stats_swapped_objects
,
7144 (unsigned long long) server
.vm_stats_swapins
,
7145 (unsigned long long) server
.vm_stats_swapouts
,
7146 (unsigned long) listLength(server
.io_newjobs
),
7147 (unsigned long) listLength(server
.io_processing
),
7148 (unsigned long) listLength(server
.io_processed
),
7149 (unsigned long) server
.io_active_threads
,
7150 (unsigned long) server
.vm_blocked_clients
7154 for (j
= 0; j
< server
.dbnum
; j
++) {
7155 long long keys
, vkeys
;
7157 keys
= dictSize(server
.db
[j
].dict
);
7158 vkeys
= dictSize(server
.db
[j
].expires
);
7159 if (keys
|| vkeys
) {
7160 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7167 static void infoCommand(redisClient
*c
) {
7168 sds info
= genRedisInfoString();
7169 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7170 (unsigned long)sdslen(info
)));
7171 addReplySds(c
,info
);
7172 addReply(c
,shared
.crlf
);
7175 static void monitorCommand(redisClient
*c
) {
7176 /* ignore MONITOR if aleady slave or in monitor mode */
7177 if (c
->flags
& REDIS_SLAVE
) return;
7179 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7181 listAddNodeTail(server
.monitors
,c
);
7182 addReply(c
,shared
.ok
);
7185 /* ================================= Expire ================================= */
7186 static int removeExpire(redisDb
*db
, robj
*key
) {
7187 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7194 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7195 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7203 /* Return the expire time of the specified key, or -1 if no expire
7204 * is associated with this key (i.e. the key is non volatile) */
7205 static time_t getExpire(redisDb
*db
, robj
*key
) {
7208 /* No expire? return ASAP */
7209 if (dictSize(db
->expires
) == 0 ||
7210 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7212 return (time_t) dictGetEntryVal(de
);
7215 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7219 /* No expire? return ASAP */
7220 if (dictSize(db
->expires
) == 0 ||
7221 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7223 /* Lookup the expire */
7224 when
= (time_t) dictGetEntryVal(de
);
7225 if (time(NULL
) <= when
) return 0;
7227 /* Delete the key */
7228 dictDelete(db
->expires
,key
);
7229 server
.stat_expiredkeys
++;
7230 return dictDelete(db
->dict
,key
) == DICT_OK
;
7233 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7236 /* No expire? return ASAP */
7237 if (dictSize(db
->expires
) == 0 ||
7238 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7240 /* Delete the key */
7242 server
.stat_expiredkeys
++;
7243 dictDelete(db
->expires
,key
);
7244 return dictDelete(db
->dict
,key
) == DICT_OK
;
7247 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7251 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7255 de
= dictFind(c
->db
->dict
,key
);
7257 addReply(c
,shared
.czero
);
7261 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7262 addReply(c
, shared
.cone
);
7265 time_t when
= time(NULL
)+seconds
;
7266 if (setExpire(c
->db
,key
,when
)) {
7267 addReply(c
,shared
.cone
);
7270 addReply(c
,shared
.czero
);
7276 static void expireCommand(redisClient
*c
) {
7277 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7280 static void expireatCommand(redisClient
*c
) {
7281 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7284 static void ttlCommand(redisClient
*c
) {
7288 expire
= getExpire(c
->db
,c
->argv
[1]);
7290 ttl
= (int) (expire
-time(NULL
));
7291 if (ttl
< 0) ttl
= -1;
7293 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7296 /* ================================ MULTI/EXEC ============================== */
7298 /* Client state initialization for MULTI/EXEC */
7299 static void initClientMultiState(redisClient
*c
) {
7300 c
->mstate
.commands
= NULL
;
7301 c
->mstate
.count
= 0;
7304 /* Release all the resources associated with MULTI/EXEC state */
7305 static void freeClientMultiState(redisClient
*c
) {
7308 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7310 multiCmd
*mc
= c
->mstate
.commands
+j
;
7312 for (i
= 0; i
< mc
->argc
; i
++)
7313 decrRefCount(mc
->argv
[i
]);
7316 zfree(c
->mstate
.commands
);
7319 /* Add a new command into the MULTI commands queue */
7320 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7324 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7325 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7326 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7329 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7330 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7331 for (j
= 0; j
< c
->argc
; j
++)
7332 incrRefCount(mc
->argv
[j
]);
7336 static void multiCommand(redisClient
*c
) {
7337 c
->flags
|= REDIS_MULTI
;
7338 addReply(c
,shared
.ok
);
7341 static void discardCommand(redisClient
*c
) {
7342 if (!(c
->flags
& REDIS_MULTI
)) {
7343 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7347 freeClientMultiState(c
);
7348 initClientMultiState(c
);
7349 c
->flags
&= (~REDIS_MULTI
);
7350 addReply(c
,shared
.ok
);
7353 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7354 * implememntation for more information. */
7355 static void execCommandReplicateMulti(redisClient
*c
) {
7356 struct redisCommand
*cmd
;
7357 robj
*multistring
= createStringObject("MULTI",5);
7359 cmd
= lookupCommand("multi");
7360 if (server
.appendonly
)
7361 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7362 if (listLength(server
.slaves
))
7363 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7364 decrRefCount(multistring
);
7367 static void execCommand(redisClient
*c
) {
7372 if (!(c
->flags
& REDIS_MULTI
)) {
7373 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7377 /* Replicate a MULTI request now that we are sure the block is executed.
7378 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7379 * both the AOF and the replication link will have the same consistency
7380 * and atomicity guarantees. */
7381 execCommandReplicateMulti(c
);
7383 /* Exec all the queued commands */
7384 orig_argv
= c
->argv
;
7385 orig_argc
= c
->argc
;
7386 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7387 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7388 c
->argc
= c
->mstate
.commands
[j
].argc
;
7389 c
->argv
= c
->mstate
.commands
[j
].argv
;
7390 call(c
,c
->mstate
.commands
[j
].cmd
);
7392 c
->argv
= orig_argv
;
7393 c
->argc
= orig_argc
;
7394 freeClientMultiState(c
);
7395 initClientMultiState(c
);
7396 c
->flags
&= (~REDIS_MULTI
);
7397 /* Make sure the EXEC command is always replicated / AOF, since we
7398 * always send the MULTI command (we can't know beforehand if the
7399 * next operations will contain at least a modification to the DB). */
7403 /* =========================== Blocking Operations ========================= */
7405 /* Currently Redis blocking operations support is limited to list POP ops,
7406 * so the current implementation is not fully generic, but it is also not
7407 * completely specific so it will not require a rewrite to support new
7408 * kind of blocking operations in the future.
7410 * Still it's important to note that list blocking operations can be already
7411 * used as a notification mechanism in order to implement other blocking
7412 * operations at application level, so there must be a very strong evidence
7413 * of usefulness and generality before new blocking operations are implemented.
7415 * This is how the current blocking POP works, we use BLPOP as example:
7416 * - If the user calls BLPOP and the key exists and contains a non empty list
7417 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7418 * if there is not to block.
7419 * - If instead BLPOP is called and the key does not exists or the list is
7420 * empty we need to block. In order to do so we remove the notification for
7421 * new data to read in the client socket (so that we'll not serve new
7422 * requests if the blocking request is not served). Also we put the client
7423 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7424 * blocking for this keys.
7425 * - If a PUSH operation against a key with blocked clients waiting is
7426 * performed, we serve the first in the list: basically instead to push
7427 * the new element inside the list we return it to the (first / oldest)
7428 * blocking client, unblock the client, and remove it form the list.
7430 * The above comment and the source code should be enough in order to understand
7431 * the implementation and modify / fix it later.
7434 /* Set a client in blocking mode for the specified key, with the specified
7436 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7441 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7442 c
->blockingkeysnum
= numkeys
;
7443 c
->blockingto
= timeout
;
7444 for (j
= 0; j
< numkeys
; j
++) {
7445 /* Add the key in the client structure, to map clients -> keys */
7446 c
->blockingkeys
[j
] = keys
[j
];
7447 incrRefCount(keys
[j
]);
7449 /* And in the other "side", to map keys -> clients */
7450 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7454 /* For every key we take a list of clients blocked for it */
7456 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7457 incrRefCount(keys
[j
]);
7458 assert(retval
== DICT_OK
);
7460 l
= dictGetEntryVal(de
);
7462 listAddNodeTail(l
,c
);
7464 /* Mark the client as a blocked client */
7465 c
->flags
|= REDIS_BLOCKED
;
7466 server
.blpop_blocked_clients
++;
7469 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7470 static void unblockClientWaitingData(redisClient
*c
) {
7475 assert(c
->blockingkeys
!= NULL
);
7476 /* The client may wait for multiple keys, so unblock it for every key. */
7477 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7478 /* Remove this client from the list of clients waiting for this key. */
7479 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7481 l
= dictGetEntryVal(de
);
7482 listDelNode(l
,listSearchKey(l
,c
));
7483 /* If the list is empty we need to remove it to avoid wasting memory */
7484 if (listLength(l
) == 0)
7485 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7486 decrRefCount(c
->blockingkeys
[j
]);
7488 /* Cleanup the client structure */
7489 zfree(c
->blockingkeys
);
7490 c
->blockingkeys
= NULL
;
7491 c
->flags
&= (~REDIS_BLOCKED
);
7492 server
.blpop_blocked_clients
--;
7493 /* We want to process data if there is some command waiting
7494 * in the input buffer. Note that this is safe even if
7495 * unblockClientWaitingData() gets called from freeClient() because
7496 * freeClient() will be smart enough to call this function
7497 * *after* c->querybuf was set to NULL. */
7498 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7501 /* This should be called from any function PUSHing into lists.
7502 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7503 * 'ele' is the element pushed.
7505 * If the function returns 0 there was no client waiting for a list push
7508 * If the function returns 1 there was a client waiting for a list push
7509 * against this key, the element was passed to this client thus it's not
7510 * needed to actually add it to the list and the caller should return asap. */
7511 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7512 struct dictEntry
*de
;
7513 redisClient
*receiver
;
7517 de
= dictFind(c
->db
->blockingkeys
,key
);
7518 if (de
== NULL
) return 0;
7519 l
= dictGetEntryVal(de
);
7522 receiver
= ln
->value
;
7524 addReplySds(receiver
,sdsnew("*2\r\n"));
7525 addReplyBulk(receiver
,key
);
7526 addReplyBulk(receiver
,ele
);
7527 unblockClientWaitingData(receiver
);
7531 /* Blocking RPOP/LPOP */
7532 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7537 for (j
= 1; j
< c
->argc
-1; j
++) {
7538 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7540 if (o
->type
!= REDIS_LIST
) {
7541 addReply(c
,shared
.wrongtypeerr
);
7544 list
*list
= o
->ptr
;
7545 if (listLength(list
) != 0) {
7546 /* If the list contains elements fall back to the usual
7547 * non-blocking POP operation */
7548 robj
*argv
[2], **orig_argv
;
7551 /* We need to alter the command arguments before to call
7552 * popGenericCommand() as the command takes a single key. */
7553 orig_argv
= c
->argv
;
7554 orig_argc
= c
->argc
;
7555 argv
[1] = c
->argv
[j
];
7559 /* Also the return value is different, we need to output
7560 * the multi bulk reply header and the key name. The
7561 * "real" command will add the last element (the value)
7562 * for us. If this souds like an hack to you it's just
7563 * because it is... */
7564 addReplySds(c
,sdsnew("*2\r\n"));
7565 addReplyBulk(c
,argv
[1]);
7566 popGenericCommand(c
,where
);
7568 /* Fix the client structure with the original stuff */
7569 c
->argv
= orig_argv
;
7570 c
->argc
= orig_argc
;
7576 /* If the list is empty or the key does not exists we must block */
7577 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7578 if (timeout
> 0) timeout
+= time(NULL
);
7579 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7582 static void blpopCommand(redisClient
*c
) {
7583 blockingPopGenericCommand(c
,REDIS_HEAD
);
7586 static void brpopCommand(redisClient
*c
) {
7587 blockingPopGenericCommand(c
,REDIS_TAIL
);
7590 /* =============================== Replication ============================= */
7592 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7593 ssize_t nwritten
, ret
= size
;
7594 time_t start
= time(NULL
);
7598 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7599 nwritten
= write(fd
,ptr
,size
);
7600 if (nwritten
== -1) return -1;
7604 if ((time(NULL
)-start
) > timeout
) {
7612 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7613 ssize_t nread
, totread
= 0;
7614 time_t start
= time(NULL
);
7618 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7619 nread
= read(fd
,ptr
,size
);
7620 if (nread
== -1) return -1;
7625 if ((time(NULL
)-start
) > timeout
) {
7633 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7640 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7643 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7654 static void syncCommand(redisClient
*c
) {
7655 /* ignore SYNC if aleady slave or in monitor mode */
7656 if (c
->flags
& REDIS_SLAVE
) return;
7658 /* SYNC can't be issued when the server has pending data to send to
7659 * the client about already issued commands. We need a fresh reply
7660 * buffer registering the differences between the BGSAVE and the current
7661 * dataset, so that we can copy to other slaves if needed. */
7662 if (listLength(c
->reply
) != 0) {
7663 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7667 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7668 /* Here we need to check if there is a background saving operation
7669 * in progress, or if it is required to start one */
7670 if (server
.bgsavechildpid
!= -1) {
7671 /* Ok a background save is in progress. Let's check if it is a good
7672 * one for replication, i.e. if there is another slave that is
7673 * registering differences since the server forked to save */
7678 listRewind(server
.slaves
,&li
);
7679 while((ln
= listNext(&li
))) {
7681 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7684 /* Perfect, the server is already registering differences for
7685 * another slave. Set the right state, and copy the buffer. */
7686 listRelease(c
->reply
);
7687 c
->reply
= listDup(slave
->reply
);
7688 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7689 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7691 /* No way, we need to wait for the next BGSAVE in order to
7692 * register differences */
7693 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7694 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7697 /* Ok we don't have a BGSAVE in progress, let's start one */
7698 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7699 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7700 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7701 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7704 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7707 c
->flags
|= REDIS_SLAVE
;
7709 listAddNodeTail(server
.slaves
,c
);
7713 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7714 redisClient
*slave
= privdata
;
7716 REDIS_NOTUSED(mask
);
7717 char buf
[REDIS_IOBUF_LEN
];
7718 ssize_t nwritten
, buflen
;
7720 if (slave
->repldboff
== 0) {
7721 /* Write the bulk write count before to transfer the DB. In theory here
7722 * we don't know how much room there is in the output buffer of the
7723 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7724 * operations) will never be smaller than the few bytes we need. */
7727 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7729 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7737 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7738 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7740 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7741 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7745 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7746 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7751 slave
->repldboff
+= nwritten
;
7752 if (slave
->repldboff
== slave
->repldbsize
) {
7753 close(slave
->repldbfd
);
7754 slave
->repldbfd
= -1;
7755 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7756 slave
->replstate
= REDIS_REPL_ONLINE
;
7757 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7758 sendReplyToClient
, slave
) == AE_ERR
) {
7762 addReplySds(slave
,sdsempty());
7763 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7767 /* This function is called at the end of every backgrond saving.
7768 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7769 * otherwise REDIS_ERR is passed to the function.
7771 * The goal of this function is to handle slaves waiting for a successful
7772 * background saving in order to perform non-blocking synchronization. */
7773 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7775 int startbgsave
= 0;
7778 listRewind(server
.slaves
,&li
);
7779 while((ln
= listNext(&li
))) {
7780 redisClient
*slave
= ln
->value
;
7782 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7784 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7785 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7786 struct redis_stat buf
;
7788 if (bgsaveerr
!= REDIS_OK
) {
7790 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7793 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7794 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7796 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7799 slave
->repldboff
= 0;
7800 slave
->repldbsize
= buf
.st_size
;
7801 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7802 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7803 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7810 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7813 listRewind(server
.slaves
,&li
);
7814 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7815 while((ln
= listNext(&li
))) {
7816 redisClient
*slave
= ln
->value
;
7818 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7825 static int syncWithMaster(void) {
7826 char buf
[1024], tmpfile
[256], authcmd
[1024];
7828 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7829 int dfd
, maxtries
= 5;
7832 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7837 /* AUTH with the master if required. */
7838 if(server
.masterauth
) {
7839 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7840 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7842 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7846 /* Read the AUTH result. */
7847 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7849 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7853 if (buf
[0] != '+') {
7855 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7860 /* Issue the SYNC command */
7861 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7863 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7867 /* Read the bulk write count */
7868 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7870 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7874 if (buf
[0] != '$') {
7876 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7879 dumpsize
= strtol(buf
+1,NULL
,10);
7880 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7881 /* Read the bulk write data on a temp file */
7883 snprintf(tmpfile
,256,
7884 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7885 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7886 if (dfd
!= -1) break;
7891 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7895 int nread
, nwritten
;
7897 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7899 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7905 nwritten
= write(dfd
,buf
,nread
);
7906 if (nwritten
== -1) {
7907 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7915 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7916 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7922 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7923 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7927 server
.master
= createClient(fd
);
7928 server
.master
->flags
|= REDIS_MASTER
;
7929 server
.master
->authenticated
= 1;
7930 server
.replstate
= REDIS_REPL_CONNECTED
;
7934 static void slaveofCommand(redisClient
*c
) {
7935 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7936 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7937 if (server
.masterhost
) {
7938 sdsfree(server
.masterhost
);
7939 server
.masterhost
= NULL
;
7940 if (server
.master
) freeClient(server
.master
);
7941 server
.replstate
= REDIS_REPL_NONE
;
7942 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7945 sdsfree(server
.masterhost
);
7946 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7947 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7948 if (server
.master
) freeClient(server
.master
);
7949 server
.replstate
= REDIS_REPL_CONNECT
;
7950 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7951 server
.masterhost
, server
.masterport
);
7953 addReply(c
,shared
.ok
);
7956 /* ============================ Maxmemory directive ======================== */
7958 /* Try to free one object form the pre-allocated objects free list.
7959 * This is useful under low mem conditions as by default we take 1 million
7960 * free objects allocated. On success REDIS_OK is returned, otherwise
7962 static int tryFreeOneObjectFromFreelist(void) {
7965 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7966 if (listLength(server
.objfreelist
)) {
7967 listNode
*head
= listFirst(server
.objfreelist
);
7968 o
= listNodeValue(head
);
7969 listDelNode(server
.objfreelist
,head
);
7970 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7974 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7979 /* This function gets called when 'maxmemory' is set on the config file to limit
7980 * the max memory used by the server, and we are out of memory.
7981 * This function will try to, in order:
7983 * - Free objects from the free list
7984 * - Try to remove keys with an EXPIRE set
7986 * It is not possible to free enough memory to reach used-memory < maxmemory
7987 * the server will start refusing commands that will enlarge even more the
7990 static void freeMemoryIfNeeded(void) {
7991 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7992 int j
, k
, freed
= 0;
7994 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7995 for (j
= 0; j
< server
.dbnum
; j
++) {
7997 robj
*minkey
= NULL
;
7998 struct dictEntry
*de
;
8000 if (dictSize(server
.db
[j
].expires
)) {
8002 /* From a sample of three keys drop the one nearest to
8003 * the natural expire */
8004 for (k
= 0; k
< 3; k
++) {
8007 de
= dictGetRandomKey(server
.db
[j
].expires
);
8008 t
= (time_t) dictGetEntryVal(de
);
8009 if (minttl
== -1 || t
< minttl
) {
8010 minkey
= dictGetEntryKey(de
);
8014 deleteKey(server
.db
+j
,minkey
);
8017 if (!freed
) return; /* nothing to free... */
8021 /* ============================== Append Only file ========================== */
8023 /* Write the append only file buffer on disk.
8025 * Since we are required to write the AOF before replying to the client,
8026 * and the only way the client socket can get a write is entering when the
8027 * the event loop, we accumulate all the AOF writes in a memory
8028 * buffer and write it on disk using this function just before entering
8029 * the event loop again. */
8030 static void flushAppendOnlyFile(void) {
8034 if (sdslen(server
.aofbuf
) == 0) return;
8036 /* We want to perform a single write. This should be guaranteed atomic
8037 * at least if the filesystem we are writing is a real physical one.
8038 * While this will save us against the server being killed I don't think
8039 * there is much to do about the whole server stopping for power problems
8041 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8042 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8043 /* Ooops, we are in troubles. The best thing to do for now is
8044 * aborting instead of giving the illusion that everything is
8045 * working as expected. */
8046 if (nwritten
== -1) {
8047 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8049 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8053 sdsfree(server
.aofbuf
);
8054 server
.aofbuf
= sdsempty();
8056 /* Fsync if needed */
8058 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8059 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8060 now
-server
.lastfsync
> 1))
8062 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8063 * flushing metadata. */
8064 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8065 server
.lastfsync
= now
;
8069 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8070 sds buf
= sdsempty();
8074 /* The DB this command was targetting is not the same as the last command
8075 * we appendend. To issue a SELECT command is needed. */
8076 if (dictid
!= server
.appendseldb
) {
8079 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8080 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8081 (unsigned long)strlen(seldb
),seldb
);
8082 server
.appendseldb
= dictid
;
8085 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
8086 * EXPIREs into EXPIREATs calls */
8087 if (cmd
->proc
== expireCommand
) {
8090 tmpargv
[0] = createStringObject("EXPIREAT",8);
8091 tmpargv
[1] = argv
[1];
8092 incrRefCount(argv
[1]);
8093 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
8094 tmpargv
[2] = createObject(REDIS_STRING
,
8095 sdscatprintf(sdsempty(),"%ld",when
));
8099 /* Append the actual command */
8100 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8101 for (j
= 0; j
< argc
; j
++) {
8104 o
= getDecodedObject(o
);
8105 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8106 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8107 buf
= sdscatlen(buf
,"\r\n",2);
8111 /* Free the objects from the modified argv for EXPIREAT */
8112 if (cmd
->proc
== expireCommand
) {
8113 for (j
= 0; j
< 3; j
++)
8114 decrRefCount(argv
[j
]);
8117 /* Append to the AOF buffer. This will be flushed on disk just before
8118 * of re-entering the event loop, so before the client will get a
8119 * positive reply about the operation performed. */
8120 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8122 /* If a background append only file rewriting is in progress we want to
8123 * accumulate the differences between the child DB and the current one
8124 * in a buffer, so that when the child process will do its work we
8125 * can append the differences to the new append only file. */
8126 if (server
.bgrewritechildpid
!= -1)
8127 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8132 /* In Redis commands are always executed in the context of a client, so in
8133 * order to load the append only file we need to create a fake client. */
8134 static struct redisClient
*createFakeClient(void) {
8135 struct redisClient
*c
= zmalloc(sizeof(*c
));
8139 c
->querybuf
= sdsempty();
8143 /* We set the fake client as a slave waiting for the synchronization
8144 * so that Redis will not try to send replies to this client. */
8145 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8146 c
->reply
= listCreate();
8147 listSetFreeMethod(c
->reply
,decrRefCount
);
8148 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8149 initClientMultiState(c
);
8153 static void freeFakeClient(struct redisClient
*c
) {
8154 sdsfree(c
->querybuf
);
8155 listRelease(c
->reply
);
8156 freeClientMultiState(c
);
8160 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8161 * error (the append only file is zero-length) REDIS_ERR is returned. On
8162 * fatal error an error message is logged and the program exists. */
8163 int loadAppendOnlyFile(char *filename
) {
8164 struct redisClient
*fakeClient
;
8165 FILE *fp
= fopen(filename
,"r");
8166 struct redis_stat sb
;
8167 unsigned long long loadedkeys
= 0;
8168 int appendonly
= server
.appendonly
;
8170 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8174 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8178 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8179 * to the same file we're about to read. */
8180 server
.appendonly
= 0;
8182 fakeClient
= createFakeClient();
8189 struct redisCommand
*cmd
;
8191 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8197 if (buf
[0] != '*') goto fmterr
;
8199 argv
= zmalloc(sizeof(robj
*)*argc
);
8200 for (j
= 0; j
< argc
; j
++) {
8201 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8202 if (buf
[0] != '$') goto fmterr
;
8203 len
= strtol(buf
+1,NULL
,10);
8204 argsds
= sdsnewlen(NULL
,len
);
8205 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8206 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8207 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8210 /* Command lookup */
8211 cmd
= lookupCommand(argv
[0]->ptr
);
8213 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8216 /* Try object encoding */
8217 if (cmd
->flags
& REDIS_CMD_BULK
)
8218 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8219 /* Run the command in the context of a fake client */
8220 fakeClient
->argc
= argc
;
8221 fakeClient
->argv
= argv
;
8222 cmd
->proc(fakeClient
);
8223 /* Discard the reply objects list from the fake client */
8224 while(listLength(fakeClient
->reply
))
8225 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8226 /* Clean up, ready for the next command */
8227 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8229 /* Handle swapping while loading big datasets when VM is on */
8231 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8232 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8233 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8238 /* This point can only be reached when EOF is reached without errors.
8239 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8240 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8243 freeFakeClient(fakeClient
);
8244 server
.appendonly
= appendonly
;
8249 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8251 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8255 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8259 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8260 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8264 /* Avoid the incr/decr ref count business if possible to help
8265 * copy-on-write (we are often in a child process when this function
8267 * Also makes sure that key objects don't get incrRefCount-ed when VM
8269 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8270 obj
= getDecodedObject(obj
);
8273 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8274 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8275 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8277 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8278 if (decrrc
) decrRefCount(obj
);
8281 if (decrrc
) decrRefCount(obj
);
8285 /* Write binary-safe string into a file in the bulkformat
8286 * $<count>\r\n<payload>\r\n */
8287 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8290 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8291 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8292 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8293 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8297 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8298 static int fwriteBulkDouble(FILE *fp
, double d
) {
8299 char buf
[128], dbuf
[128];
8301 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8302 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8303 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8304 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8308 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8309 static int fwriteBulkLong(FILE *fp
, long l
) {
8310 char buf
[128], lbuf
[128];
8312 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8313 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8314 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8315 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8319 /* Write a sequence of commands able to fully rebuild the dataset into
8320 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8321 static int rewriteAppendOnlyFile(char *filename
) {
8322 dictIterator
*di
= NULL
;
8327 time_t now
= time(NULL
);
8329 /* Note that we have to use a different temp name here compared to the
8330 * one used by rewriteAppendOnlyFileBackground() function. */
8331 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8332 fp
= fopen(tmpfile
,"w");
8334 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8337 for (j
= 0; j
< server
.dbnum
; j
++) {
8338 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8339 redisDb
*db
= server
.db
+j
;
8341 if (dictSize(d
) == 0) continue;
8342 di
= dictGetIterator(d
);
8348 /* SELECT the new DB */
8349 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8350 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8352 /* Iterate this DB writing every entry */
8353 while((de
= dictNext(di
)) != NULL
) {
8358 key
= dictGetEntryKey(de
);
8359 /* If the value for this key is swapped, load a preview in memory.
8360 * We use a "swapped" flag to remember if we need to free the
8361 * value object instead to just increment the ref count anyway
8362 * in order to avoid copy-on-write of pages if we are forked() */
8363 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8364 key
->storage
== REDIS_VM_SWAPPING
) {
8365 o
= dictGetEntryVal(de
);
8368 o
= vmPreviewObject(key
);
8371 expiretime
= getExpire(db
,key
);
8373 /* Save the key and associated value */
8374 if (o
->type
== REDIS_STRING
) {
8375 /* Emit a SET command */
8376 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8377 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8379 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8380 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8381 } else if (o
->type
== REDIS_LIST
) {
8382 /* Emit the RPUSHes needed to rebuild the list */
8383 list
*list
= o
->ptr
;
8387 listRewind(list
,&li
);
8388 while((ln
= listNext(&li
))) {
8389 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8390 robj
*eleobj
= listNodeValue(ln
);
8392 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8393 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8394 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8396 } else if (o
->type
== REDIS_SET
) {
8397 /* Emit the SADDs needed to rebuild the set */
8399 dictIterator
*di
= dictGetIterator(set
);
8402 while((de
= dictNext(di
)) != NULL
) {
8403 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8404 robj
*eleobj
= dictGetEntryKey(de
);
8406 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8407 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8408 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8410 dictReleaseIterator(di
);
8411 } else if (o
->type
== REDIS_ZSET
) {
8412 /* Emit the ZADDs needed to rebuild the sorted set */
8414 dictIterator
*di
= dictGetIterator(zs
->dict
);
8417 while((de
= dictNext(di
)) != NULL
) {
8418 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8419 robj
*eleobj
= dictGetEntryKey(de
);
8420 double *score
= dictGetEntryVal(de
);
8422 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8423 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8424 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8425 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8427 dictReleaseIterator(di
);
8428 } else if (o
->type
== REDIS_HASH
) {
8429 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8431 /* Emit the HSETs needed to rebuild the hash */
8432 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8433 unsigned char *p
= zipmapRewind(o
->ptr
);
8434 unsigned char *field
, *val
;
8435 unsigned int flen
, vlen
;
8437 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8438 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8439 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8440 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8442 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8446 dictIterator
*di
= dictGetIterator(o
->ptr
);
8449 while((de
= dictNext(di
)) != NULL
) {
8450 robj
*field
= dictGetEntryKey(de
);
8451 robj
*val
= dictGetEntryVal(de
);
8453 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8454 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8455 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8456 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8458 dictReleaseIterator(di
);
8461 redisPanic("Unknown object type");
8463 /* Save the expire time */
8464 if (expiretime
!= -1) {
8465 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8466 /* If this key is already expired skip it */
8467 if (expiretime
< now
) continue;
8468 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8469 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8470 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8472 if (swapped
) decrRefCount(o
);
8474 dictReleaseIterator(di
);
8477 /* Make sure data will not remain on the OS's output buffers */
8482 /* Use RENAME to make sure the DB file is changed atomically only
8483 * if the generate DB file is ok. */
8484 if (rename(tmpfile
,filename
) == -1) {
8485 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8489 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8495 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8496 if (di
) dictReleaseIterator(di
);
8500 /* This is how rewriting of the append only file in background works:
8502 * 1) The user calls BGREWRITEAOF
8503 * 2) Redis calls this function, that forks():
8504 * 2a) the child rewrite the append only file in a temp file.
8505 * 2b) the parent accumulates differences in server.bgrewritebuf.
8506 * 3) When the child finished '2a' exists.
8507 * 4) The parent will trap the exit code, if it's OK, will append the
8508 * data accumulated into server.bgrewritebuf into the temp file, and
8509 * finally will rename(2) the temp file in the actual file name.
8510 * The the new file is reopened as the new append only file. Profit!
8512 static int rewriteAppendOnlyFileBackground(void) {
8515 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8516 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8517 if ((childpid
= fork()) == 0) {
8521 if (server
.vm_enabled
) vmReopenSwapFile();
8523 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8524 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8531 if (childpid
== -1) {
8532 redisLog(REDIS_WARNING
,
8533 "Can't rewrite append only file in background: fork: %s",
8537 redisLog(REDIS_NOTICE
,
8538 "Background append only file rewriting started by pid %d",childpid
);
8539 server
.bgrewritechildpid
= childpid
;
8540 updateDictResizePolicy();
8541 /* We set appendseldb to -1 in order to force the next call to the
8542 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8543 * accumulated by the parent into server.bgrewritebuf will start
8544 * with a SELECT statement and it will be safe to merge. */
8545 server
.appendseldb
= -1;
8548 return REDIS_OK
; /* unreached */
8551 static void bgrewriteaofCommand(redisClient
*c
) {
8552 if (server
.bgrewritechildpid
!= -1) {
8553 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8556 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8557 char *status
= "+Background append only file rewriting started\r\n";
8558 addReplySds(c
,sdsnew(status
));
8560 addReply(c
,shared
.err
);
8564 static void aofRemoveTempFile(pid_t childpid
) {
8567 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8571 /* Virtual Memory is composed mainly of two subsystems:
8572 * - Blocking Virutal Memory
8573 * - Threaded Virtual Memory I/O
8574 * The two parts are not fully decoupled, but functions are split among two
8575 * different sections of the source code (delimited by comments) in order to
8576 * make more clear what functionality is about the blocking VM and what about
8577 * the threaded (not blocking) VM.
8581 * Redis VM is a blocking VM (one that blocks reading swapped values from
8582 * disk into memory when a value swapped out is needed in memory) that is made
8583 * unblocking by trying to examine the command argument vector in order to
8584 * load in background values that will likely be needed in order to exec
8585 * the command. The command is executed only once all the relevant keys
8586 * are loaded into memory.
8588 * This basically is almost as simple of a blocking VM, but almost as parallel
8589 * as a fully non-blocking VM.
8592 /* =================== Virtual Memory - Blocking Side ====================== */
8594 static void vmInit(void) {
8600 if (server
.vm_max_threads
!= 0)
8601 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8603 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8604 /* Try to open the old swap file, otherwise create it */
8605 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8606 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8608 if (server
.vm_fp
== NULL
) {
8609 redisLog(REDIS_WARNING
,
8610 "Can't open the swap file: %s. Exiting.",
8614 server
.vm_fd
= fileno(server
.vm_fp
);
8615 /* Lock the swap file for writing, this is useful in order to avoid
8616 * another instance to use the same swap file for a config error. */
8617 fl
.l_type
= F_WRLCK
;
8618 fl
.l_whence
= SEEK_SET
;
8619 fl
.l_start
= fl
.l_len
= 0;
8620 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8621 redisLog(REDIS_WARNING
,
8622 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8626 server
.vm_next_page
= 0;
8627 server
.vm_near_pages
= 0;
8628 server
.vm_stats_used_pages
= 0;
8629 server
.vm_stats_swapped_objects
= 0;
8630 server
.vm_stats_swapouts
= 0;
8631 server
.vm_stats_swapins
= 0;
8632 totsize
= server
.vm_pages
*server
.vm_page_size
;
8633 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8634 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8635 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8639 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8641 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8642 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8643 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8644 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8646 /* Initialize threaded I/O (used by Virtual Memory) */
8647 server
.io_newjobs
= listCreate();
8648 server
.io_processing
= listCreate();
8649 server
.io_processed
= listCreate();
8650 server
.io_ready_clients
= listCreate();
8651 pthread_mutex_init(&server
.io_mutex
,NULL
);
8652 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8653 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8654 server
.io_active_threads
= 0;
8655 if (pipe(pipefds
) == -1) {
8656 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8660 server
.io_ready_pipe_read
= pipefds
[0];
8661 server
.io_ready_pipe_write
= pipefds
[1];
8662 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8663 /* LZF requires a lot of stack */
8664 pthread_attr_init(&server
.io_threads_attr
);
8665 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8666 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8667 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8668 /* Listen for events in the threaded I/O pipe */
8669 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8670 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8671 oom("creating file event");
8674 /* Mark the page as used */
8675 static void vmMarkPageUsed(off_t page
) {
8676 off_t byte
= page
/8;
8678 redisAssert(vmFreePage(page
) == 1);
8679 server
.vm_bitmap
[byte
] |= 1<<bit
;
8682 /* Mark N contiguous pages as used, with 'page' being the first. */
8683 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8686 for (j
= 0; j
< count
; j
++)
8687 vmMarkPageUsed(page
+j
);
8688 server
.vm_stats_used_pages
+= count
;
8689 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8690 (long long)count
, (long long)page
);
8693 /* Mark the page as free */
8694 static void vmMarkPageFree(off_t page
) {
8695 off_t byte
= page
/8;
8697 redisAssert(vmFreePage(page
) == 0);
8698 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8701 /* Mark N contiguous pages as free, with 'page' being the first. */
8702 static void vmMarkPagesFree(off_t page
, off_t count
) {
8705 for (j
= 0; j
< count
; j
++)
8706 vmMarkPageFree(page
+j
);
8707 server
.vm_stats_used_pages
-= count
;
8708 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8709 (long long)count
, (long long)page
);
8712 /* Test if the page is free */
8713 static int vmFreePage(off_t page
) {
8714 off_t byte
= page
/8;
8716 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8719 /* Find N contiguous free pages storing the first page of the cluster in *first.
8720 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8721 * REDIS_ERR is returned.
8723 * This function uses a simple algorithm: we try to allocate
8724 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8725 * again from the start of the swap file searching for free spaces.
8727 * If it looks pretty clear that there are no free pages near our offset
8728 * we try to find less populated places doing a forward jump of
8729 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8730 * without hurry, and then we jump again and so forth...
8732 * This function can be improved using a free list to avoid to guess
8733 * too much, since we could collect data about freed pages.
8735 * note: I implemented this function just after watching an episode of
8736 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8738 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8739 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8741 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8742 server
.vm_near_pages
= 0;
8743 server
.vm_next_page
= 0;
8745 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8746 base
= server
.vm_next_page
;
8748 while(offset
< server
.vm_pages
) {
8749 off_t
this = base
+offset
;
8751 /* If we overflow, restart from page zero */
8752 if (this >= server
.vm_pages
) {
8753 this -= server
.vm_pages
;
8755 /* Just overflowed, what we found on tail is no longer
8756 * interesting, as it's no longer contiguous. */
8760 if (vmFreePage(this)) {
8761 /* This is a free page */
8763 /* Already got N free pages? Return to the caller, with success */
8765 *first
= this-(n
-1);
8766 server
.vm_next_page
= this+1;
8767 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8771 /* The current one is not a free page */
8775 /* Fast-forward if the current page is not free and we already
8776 * searched enough near this place. */
8778 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8779 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8781 /* Note that even if we rewind after the jump, we are don't need
8782 * to make sure numfree is set to zero as we only jump *if* it
8783 * is set to zero. */
8785 /* Otherwise just check the next page */
8792 /* Write the specified object at the specified page of the swap file */
8793 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8794 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8795 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8796 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8797 redisLog(REDIS_WARNING
,
8798 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8802 rdbSaveObject(server
.vm_fp
,o
);
8803 fflush(server
.vm_fp
);
8804 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8808 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8809 * needed to later retrieve the object into the key object.
8810 * If we can't find enough contiguous empty pages to swap the object on disk
8811 * REDIS_ERR is returned. */
8812 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8813 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8816 assert(key
->storage
== REDIS_VM_MEMORY
);
8817 assert(key
->refcount
== 1);
8818 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8819 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8820 key
->vm
.page
= page
;
8821 key
->vm
.usedpages
= pages
;
8822 key
->storage
= REDIS_VM_SWAPPED
;
8823 key
->vtype
= val
->type
;
8824 decrRefCount(val
); /* Deallocate the object from memory. */
8825 vmMarkPagesUsed(page
,pages
);
8826 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8827 (unsigned char*) key
->ptr
,
8828 (unsigned long long) page
, (unsigned long long) pages
);
8829 server
.vm_stats_swapped_objects
++;
8830 server
.vm_stats_swapouts
++;
8834 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8837 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8838 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8839 redisLog(REDIS_WARNING
,
8840 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8844 o
= rdbLoadObject(type
,server
.vm_fp
);
8846 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8849 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8853 /* Load the value object relative to the 'key' object from swap to memory.
8854 * The newly allocated object is returned.
8856 * If preview is true the unserialized object is returned to the caller but
8857 * no changes are made to the key object, nor the pages are marked as freed */
8858 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8861 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8862 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8864 key
->storage
= REDIS_VM_MEMORY
;
8865 key
->vm
.atime
= server
.unixtime
;
8866 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8867 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8868 (unsigned char*) key
->ptr
);
8869 server
.vm_stats_swapped_objects
--;
8871 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8872 (unsigned char*) key
->ptr
);
8874 server
.vm_stats_swapins
++;
8878 /* Plain object loading, from swap to memory */
8879 static robj
*vmLoadObject(robj
*key
) {
8880 /* If we are loading the object in background, stop it, we
8881 * need to load this object synchronously ASAP. */
8882 if (key
->storage
== REDIS_VM_LOADING
)
8883 vmCancelThreadedIOJob(key
);
8884 return vmGenericLoadObject(key
,0);
8887 /* Just load the value on disk, without to modify the key.
8888 * This is useful when we want to perform some operation on the value
8889 * without to really bring it from swap to memory, like while saving the
8890 * dataset or rewriting the append only log. */
8891 static robj
*vmPreviewObject(robj
*key
) {
8892 return vmGenericLoadObject(key
,1);
8895 /* How a good candidate is this object for swapping?
8896 * The better candidate it is, the greater the returned value.
8898 * Currently we try to perform a fast estimation of the object size in
8899 * memory, and combine it with aging informations.
8901 * Basically swappability = idle-time * log(estimated size)
8903 * Bigger objects are preferred over smaller objects, but not
8904 * proportionally, this is why we use the logarithm. This algorithm is
8905 * just a first try and will probably be tuned later. */
8906 static double computeObjectSwappability(robj
*o
) {
8907 time_t age
= server
.unixtime
- o
->vm
.atime
;
8911 struct dictEntry
*de
;
8914 if (age
<= 0) return 0;
8917 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8920 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8925 listNode
*ln
= listFirst(l
);
8927 asize
= sizeof(list
);
8929 robj
*ele
= ln
->value
;
8932 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8933 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8935 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8940 z
= (o
->type
== REDIS_ZSET
);
8941 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8943 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8944 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8949 de
= dictGetRandomKey(d
);
8950 ele
= dictGetEntryKey(de
);
8951 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8952 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8954 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8955 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8959 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8960 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8961 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8962 unsigned int klen
, vlen
;
8963 unsigned char *key
, *val
;
8965 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8969 asize
= len
*(klen
+vlen
+3);
8970 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8972 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8977 de
= dictGetRandomKey(d
);
8978 ele
= dictGetEntryKey(de
);
8979 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8980 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8982 ele
= dictGetEntryVal(de
);
8983 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8984 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8986 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8991 return (double)age
*log(1+asize
);
8994 /* Try to swap an object that's a good candidate for swapping.
8995 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8996 * to swap any object at all.
8998 * If 'usethreaded' is true, Redis will try to swap the object in background
8999 * using I/O threads. */
9000 static int vmSwapOneObject(int usethreads
) {
9002 struct dictEntry
*best
= NULL
;
9003 double best_swappability
= 0;
9004 redisDb
*best_db
= NULL
;
9007 for (j
= 0; j
< server
.dbnum
; j
++) {
9008 redisDb
*db
= server
.db
+j
;
9009 /* Why maxtries is set to 100?
9010 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9011 * are swappable objects */
9014 if (dictSize(db
->dict
) == 0) continue;
9015 for (i
= 0; i
< 5; i
++) {
9017 double swappability
;
9019 if (maxtries
) maxtries
--;
9020 de
= dictGetRandomKey(db
->dict
);
9021 key
= dictGetEntryKey(de
);
9022 val
= dictGetEntryVal(de
);
9023 /* Only swap objects that are currently in memory.
9025 * Also don't swap shared objects if threaded VM is on, as we
9026 * try to ensure that the main thread does not touch the
9027 * object while the I/O thread is using it, but we can't
9028 * control other keys without adding additional mutex. */
9029 if (key
->storage
!= REDIS_VM_MEMORY
||
9030 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
9031 if (maxtries
) i
--; /* don't count this try */
9034 swappability
= computeObjectSwappability(val
);
9035 if (!best
|| swappability
> best_swappability
) {
9037 best_swappability
= swappability
;
9042 if (best
== NULL
) return REDIS_ERR
;
9043 key
= dictGetEntryKey(best
);
9044 val
= dictGetEntryVal(best
);
9046 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9047 key
->ptr
, best_swappability
);
9049 /* Unshare the key if needed */
9050 if (key
->refcount
> 1) {
9051 robj
*newkey
= dupStringObject(key
);
9053 key
= dictGetEntryKey(best
) = newkey
;
9057 vmSwapObjectThreaded(key
,val
,best_db
);
9060 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9061 dictGetEntryVal(best
) = NULL
;
9069 static int vmSwapOneObjectBlocking() {
9070 return vmSwapOneObject(0);
9073 static int vmSwapOneObjectThreaded() {
9074 return vmSwapOneObject(1);
9077 /* Return true if it's safe to swap out objects in a given moment.
9078 * Basically we don't want to swap objects out while there is a BGSAVE
9079 * or a BGAEOREWRITE running in backgroud. */
9080 static int vmCanSwapOut(void) {
9081 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9084 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9085 * and was deleted. Otherwise 0 is returned. */
9086 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9090 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9091 foundkey
= dictGetEntryKey(de
);
9092 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9097 /* =================== Virtual Memory - Threaded I/O ======================= */
9099 static void freeIOJob(iojob
*j
) {
9100 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9101 j
->type
== REDIS_IOJOB_DO_SWAP
||
9102 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9103 decrRefCount(j
->val
);
9104 /* We don't decrRefCount the j->key field as we did't incremented
9105 * the count creating IO Jobs. This is because the key field here is
9106 * just used as an indentifier and if a key is removed the Job should
9107 * never be touched again. */
9111 /* Every time a thread finished a Job, it writes a byte into the write side
9112 * of an unix pipe in order to "awake" the main thread, and this function
9114 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9118 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9120 REDIS_NOTUSED(mask
);
9121 REDIS_NOTUSED(privdata
);
9123 /* For every byte we read in the read side of the pipe, there is one
9124 * I/O job completed to process. */
9125 while((retval
= read(fd
,buf
,1)) == 1) {
9129 struct dictEntry
*de
;
9131 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9133 /* Get the processed element (the oldest one) */
9135 assert(listLength(server
.io_processed
) != 0);
9136 if (toprocess
== -1) {
9137 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9138 if (toprocess
<= 0) toprocess
= 1;
9140 ln
= listFirst(server
.io_processed
);
9142 listDelNode(server
.io_processed
,ln
);
9144 /* If this job is marked as canceled, just ignore it */
9149 /* Post process it in the main thread, as there are things we
9150 * can do just here to avoid race conditions and/or invasive locks */
9151 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9152 de
= dictFind(j
->db
->dict
,j
->key
);
9154 key
= dictGetEntryKey(de
);
9155 if (j
->type
== REDIS_IOJOB_LOAD
) {
9158 /* Key loaded, bring it at home */
9159 key
->storage
= REDIS_VM_MEMORY
;
9160 key
->vm
.atime
= server
.unixtime
;
9161 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9162 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9163 (unsigned char*) key
->ptr
);
9164 server
.vm_stats_swapped_objects
--;
9165 server
.vm_stats_swapins
++;
9166 dictGetEntryVal(de
) = j
->val
;
9167 incrRefCount(j
->val
);
9170 /* Handle clients waiting for this key to be loaded. */
9171 handleClientsBlockedOnSwappedKey(db
,key
);
9172 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9173 /* Now we know the amount of pages required to swap this object.
9174 * Let's find some space for it, and queue this task again
9175 * rebranded as REDIS_IOJOB_DO_SWAP. */
9176 if (!vmCanSwapOut() ||
9177 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9179 /* Ooops... no space or we can't swap as there is
9180 * a fork()ed Redis trying to save stuff on disk. */
9182 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9184 /* Note that we need to mark this pages as used now,
9185 * if the job will be canceled, we'll mark them as freed
9187 vmMarkPagesUsed(j
->page
,j
->pages
);
9188 j
->type
= REDIS_IOJOB_DO_SWAP
;
9193 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9196 /* Key swapped. We can finally free some memory. */
9197 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9198 printf("key->storage: %d\n",key
->storage
);
9199 printf("key->name: %s\n",(char*)key
->ptr
);
9200 printf("key->refcount: %d\n",key
->refcount
);
9201 printf("val: %p\n",(void*)j
->val
);
9202 printf("val->type: %d\n",j
->val
->type
);
9203 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9205 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9206 val
= dictGetEntryVal(de
);
9207 key
->vm
.page
= j
->page
;
9208 key
->vm
.usedpages
= j
->pages
;
9209 key
->storage
= REDIS_VM_SWAPPED
;
9210 key
->vtype
= j
->val
->type
;
9211 decrRefCount(val
); /* Deallocate the object from memory. */
9212 dictGetEntryVal(de
) = NULL
;
9213 redisLog(REDIS_DEBUG
,
9214 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9215 (unsigned char*) key
->ptr
,
9216 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9217 server
.vm_stats_swapped_objects
++;
9218 server
.vm_stats_swapouts
++;
9220 /* Put a few more swap requests in queue if we are still
9222 if (trytoswap
&& vmCanSwapOut() &&
9223 zmalloc_used_memory() > server
.vm_max_memory
)
9228 more
= listLength(server
.io_newjobs
) <
9229 (unsigned) server
.vm_max_threads
;
9231 /* Don't waste CPU time if swappable objects are rare. */
9232 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9240 if (processed
== toprocess
) return;
9242 if (retval
< 0 && errno
!= EAGAIN
) {
9243 redisLog(REDIS_WARNING
,
9244 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9249 static void lockThreadedIO(void) {
9250 pthread_mutex_lock(&server
.io_mutex
);
9253 static void unlockThreadedIO(void) {
9254 pthread_mutex_unlock(&server
.io_mutex
);
9257 /* Remove the specified object from the threaded I/O queue if still not
9258 * processed, otherwise make sure to flag it as canceled. */
9259 static void vmCancelThreadedIOJob(robj
*o
) {
9261 server
.io_newjobs
, /* 0 */
9262 server
.io_processing
, /* 1 */
9263 server
.io_processed
/* 2 */
9267 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9270 /* Search for a matching key in one of the queues */
9271 for (i
= 0; i
< 3; i
++) {
9275 listRewind(lists
[i
],&li
);
9276 while ((ln
= listNext(&li
)) != NULL
) {
9277 iojob
*job
= ln
->value
;
9279 if (job
->canceled
) continue; /* Skip this, already canceled. */
9280 if (job
->key
== o
) {
9281 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9282 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9283 /* Mark the pages as free since the swap didn't happened
9284 * or happened but is now discarded. */
9285 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9286 vmMarkPagesFree(job
->page
,job
->pages
);
9287 /* Cancel the job. It depends on the list the job is
9290 case 0: /* io_newjobs */
9291 /* If the job was yet not processed the best thing to do
9292 * is to remove it from the queue at all */
9294 listDelNode(lists
[i
],ln
);
9296 case 1: /* io_processing */
9297 /* Oh Shi- the thread is messing with the Job:
9299 * Probably it's accessing the object if this is a
9300 * PREPARE_SWAP or DO_SWAP job.
9301 * If it's a LOAD job it may be reading from disk and
9302 * if we don't wait for the job to terminate before to
9303 * cancel it, maybe in a few microseconds data can be
9304 * corrupted in this pages. So the short story is:
9306 * Better to wait for the job to move into the
9307 * next queue (processed)... */
9309 /* We try again and again until the job is completed. */
9311 /* But let's wait some time for the I/O thread
9312 * to finish with this job. After all this condition
9313 * should be very rare. */
9316 case 2: /* io_processed */
9317 /* The job was already processed, that's easy...
9318 * just mark it as canceled so that we'll ignore it
9319 * when processing completed jobs. */
9323 /* Finally we have to adjust the storage type of the object
9324 * in order to "UNDO" the operaiton. */
9325 if (o
->storage
== REDIS_VM_LOADING
)
9326 o
->storage
= REDIS_VM_SWAPPED
;
9327 else if (o
->storage
== REDIS_VM_SWAPPING
)
9328 o
->storage
= REDIS_VM_MEMORY
;
9335 assert(1 != 1); /* We should never reach this */
9338 static void *IOThreadEntryPoint(void *arg
) {
9343 pthread_detach(pthread_self());
9345 /* Get a new job to process */
9347 if (listLength(server
.io_newjobs
) == 0) {
9348 /* No new jobs in queue, exit. */
9349 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9350 (long) pthread_self());
9351 server
.io_active_threads
--;
9355 ln
= listFirst(server
.io_newjobs
);
9357 listDelNode(server
.io_newjobs
,ln
);
9358 /* Add the job in the processing queue */
9359 j
->thread
= pthread_self();
9360 listAddNodeTail(server
.io_processing
,j
);
9361 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9363 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9364 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9366 /* Process the Job */
9367 if (j
->type
== REDIS_IOJOB_LOAD
) {
9368 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9369 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9370 FILE *fp
= fopen("/dev/null","w+");
9371 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9373 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9374 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9378 /* Done: insert the job into the processed queue */
9379 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9380 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9382 listDelNode(server
.io_processing
,ln
);
9383 listAddNodeTail(server
.io_processed
,j
);
9386 /* Signal the main thread there is new stuff to process */
9387 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9389 return NULL
; /* never reached */
9392 static void spawnIOThread(void) {
9394 sigset_t mask
, omask
;
9398 sigaddset(&mask
,SIGCHLD
);
9399 sigaddset(&mask
,SIGHUP
);
9400 sigaddset(&mask
,SIGPIPE
);
9401 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9402 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9403 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9407 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9408 server
.io_active_threads
++;
9411 /* We need to wait for the last thread to exit before we are able to
9412 * fork() in order to BGSAVE or BGREWRITEAOF. */
9413 static void waitEmptyIOJobsQueue(void) {
9415 int io_processed_len
;
9418 if (listLength(server
.io_newjobs
) == 0 &&
9419 listLength(server
.io_processing
) == 0 &&
9420 server
.io_active_threads
== 0)
9425 /* While waiting for empty jobs queue condition we post-process some
9426 * finshed job, as I/O threads may be hanging trying to write against
9427 * the io_ready_pipe_write FD but there are so much pending jobs that
9429 io_processed_len
= listLength(server
.io_processed
);
9431 if (io_processed_len
) {
9432 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9433 usleep(1000); /* 1 millisecond */
9435 usleep(10000); /* 10 milliseconds */
9440 static void vmReopenSwapFile(void) {
9441 /* Note: we don't close the old one as we are in the child process
9442 * and don't want to mess at all with the original file object. */
9443 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9444 if (server
.vm_fp
== NULL
) {
9445 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9446 server
.vm_swap_file
);
9449 server
.vm_fd
= fileno(server
.vm_fp
);
9452 /* This function must be called while with threaded IO locked */
9453 static void queueIOJob(iojob
*j
) {
9454 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9455 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9456 listAddNodeTail(server
.io_newjobs
,j
);
9457 if (server
.io_active_threads
< server
.vm_max_threads
)
9461 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9464 assert(key
->storage
== REDIS_VM_MEMORY
);
9465 assert(key
->refcount
== 1);
9467 j
= zmalloc(sizeof(*j
));
9468 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9474 j
->thread
= (pthread_t
) -1;
9475 key
->storage
= REDIS_VM_SWAPPING
;
9483 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9485 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9486 * If there is not already a job loading the key, it is craeted.
9487 * The key is added to the io_keys list in the client structure, and also
9488 * in the hash table mapping swapped keys to waiting clients, that is,
9489 * server.io_waited_keys. */
9490 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9491 struct dictEntry
*de
;
9495 /* If the key does not exist or is already in RAM we don't need to
9496 * block the client at all. */
9497 de
= dictFind(c
->db
->dict
,key
);
9498 if (de
== NULL
) return 0;
9499 o
= dictGetEntryKey(de
);
9500 if (o
->storage
== REDIS_VM_MEMORY
) {
9502 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9503 /* We were swapping the key, undo it! */
9504 vmCancelThreadedIOJob(o
);
9508 /* OK: the key is either swapped, or being loaded just now. */
9510 /* Add the key to the list of keys this client is waiting for.
9511 * This maps clients to keys they are waiting for. */
9512 listAddNodeTail(c
->io_keys
,key
);
9515 /* Add the client to the swapped keys => clients waiting map. */
9516 de
= dictFind(c
->db
->io_keys
,key
);
9520 /* For every key we take a list of clients blocked for it */
9522 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9524 assert(retval
== DICT_OK
);
9526 l
= dictGetEntryVal(de
);
9528 listAddNodeTail(l
,c
);
9530 /* Are we already loading the key from disk? If not create a job */
9531 if (o
->storage
== REDIS_VM_SWAPPED
) {
9534 o
->storage
= REDIS_VM_LOADING
;
9535 j
= zmalloc(sizeof(*j
));
9536 j
->type
= REDIS_IOJOB_LOAD
;
9539 j
->key
->vtype
= o
->vtype
;
9540 j
->page
= o
->vm
.page
;
9543 j
->thread
= (pthread_t
) -1;
9551 /* Preload keys for any command with first, last and step values for
9552 * the command keys prototype, as defined in the command table. */
9553 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9555 if (cmd
->vm_firstkey
== 0) return;
9556 last
= cmd
->vm_lastkey
;
9557 if (last
< 0) last
= argc
+last
;
9558 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9559 redisAssert(j
< argc
);
9560 waitForSwappedKey(c
,argv
[j
]);
9564 /* Preload keys needed for the ZUNION and ZINTER commands. */
9565 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9567 num
= atoi(c
->argv
[2]->ptr
);
9568 for (i
= 0; i
< num
; i
++) {
9569 waitForSwappedKey(c
,c
->argv
[3+i
]);
9573 /* Is this client attempting to run a command against swapped keys?
9574 * If so, block it ASAP, load the keys in background, then resume it.
9576 * The important idea about this function is that it can fail! If keys will
9577 * still be swapped when the client is resumed, this key lookups will
9578 * just block loading keys from disk. In practical terms this should only
9579 * happen with SORT BY command or if there is a bug in this function.
9581 * Return 1 if the client is marked as blocked, 0 if the client can
9582 * continue as the keys it is going to access appear to be in memory. */
9583 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9584 if (cmd
->vm_preload_proc
!= NULL
) {
9585 cmd
->vm_preload_proc(c
);
9587 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9590 /* If the client was blocked for at least one key, mark it as blocked. */
9591 if (listLength(c
->io_keys
)) {
9592 c
->flags
|= REDIS_IO_WAIT
;
9593 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9594 server
.vm_blocked_clients
++;
9601 /* Remove the 'key' from the list of blocked keys for a given client.
9603 * The function returns 1 when there are no longer blocking keys after
9604 * the current one was removed (and the client can be unblocked). */
9605 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9609 struct dictEntry
*de
;
9611 /* Remove the key from the list of keys this client is waiting for. */
9612 listRewind(c
->io_keys
,&li
);
9613 while ((ln
= listNext(&li
)) != NULL
) {
9614 if (compareStringObjects(ln
->value
,key
) == 0) {
9615 listDelNode(c
->io_keys
,ln
);
9621 /* Remove the client form the key => waiting clients map. */
9622 de
= dictFind(c
->db
->io_keys
,key
);
9624 l
= dictGetEntryVal(de
);
9625 ln
= listSearchKey(l
,c
);
9628 if (listLength(l
) == 0)
9629 dictDelete(c
->db
->io_keys
,key
);
9631 return listLength(c
->io_keys
) == 0;
9634 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9635 struct dictEntry
*de
;
9640 de
= dictFind(db
->io_keys
,key
);
9643 l
= dictGetEntryVal(de
);
9644 len
= listLength(l
);
9645 /* Note: we can't use something like while(listLength(l)) as the list
9646 * can be freed by the calling function when we remove the last element. */
9649 redisClient
*c
= ln
->value
;
9651 if (dontWaitForSwappedKey(c
,key
)) {
9652 /* Put the client in the list of clients ready to go as we
9653 * loaded all the keys about it. */
9654 listAddNodeTail(server
.io_ready_clients
,c
);
9659 /* =========================== Remote Configuration ========================= */
9661 static void configSetCommand(redisClient
*c
) {
9662 robj
*o
= getDecodedObject(c
->argv
[3]);
9663 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9664 zfree(server
.dbfilename
);
9665 server
.dbfilename
= zstrdup(o
->ptr
);
9666 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9667 zfree(server
.requirepass
);
9668 server
.requirepass
= zstrdup(o
->ptr
);
9669 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9670 zfree(server
.masterauth
);
9671 server
.masterauth
= zstrdup(o
->ptr
);
9672 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9673 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9675 addReplySds(c
,sdscatprintf(sdsempty(),
9676 "-ERR not supported CONFIG parameter %s\r\n",
9677 (char*)c
->argv
[2]->ptr
));
9682 addReply(c
,shared
.ok
);
9685 static void configGetCommand(redisClient
*c
) {
9686 robj
*o
= getDecodedObject(c
->argv
[2]);
9687 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9688 char *pattern
= o
->ptr
;
9692 decrRefCount(lenobj
);
9694 if (stringmatch(pattern
,"dbfilename",0)) {
9695 addReplyBulkCString(c
,"dbfilename");
9696 addReplyBulkCString(c
,server
.dbfilename
);
9699 if (stringmatch(pattern
,"requirepass",0)) {
9700 addReplyBulkCString(c
,"requirepass");
9701 addReplyBulkCString(c
,server
.requirepass
);
9704 if (stringmatch(pattern
,"masterauth",0)) {
9705 addReplyBulkCString(c
,"masterauth");
9706 addReplyBulkCString(c
,server
.masterauth
);
9709 if (stringmatch(pattern
,"maxmemory",0)) {
9712 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9713 addReplyBulkCString(c
,"maxmemory");
9714 addReplyBulkCString(c
,buf
);
9718 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9721 static void configCommand(redisClient
*c
) {
9722 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9723 if (c
->argc
!= 4) goto badarity
;
9724 configSetCommand(c
);
9725 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9726 if (c
->argc
!= 3) goto badarity
;
9727 configGetCommand(c
);
9728 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9729 if (c
->argc
!= 2) goto badarity
;
9730 server
.stat_numcommands
= 0;
9731 server
.stat_numconnections
= 0;
9732 server
.stat_expiredkeys
= 0;
9733 server
.stat_starttime
= time(NULL
);
9734 addReply(c
,shared
.ok
);
9736 addReplySds(c
,sdscatprintf(sdsempty(),
9737 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9742 addReplySds(c
,sdscatprintf(sdsempty(),
9743 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9744 (char*) c
->argv
[1]->ptr
));
9747 /* =========================== Pubsub implementation ======================== */
9749 static void freePubsubPattern(void *p
) {
9750 pubsubPattern
*pat
= p
;
9752 decrRefCount(pat
->pattern
);
9756 static int listMatchPubsubPattern(void *a
, void *b
) {
9757 pubsubPattern
*pa
= a
, *pb
= b
;
9759 return (pa
->client
== pb
->client
) &&
9760 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9763 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9764 * 0 if the client was already subscribed to that channel. */
9765 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9766 struct dictEntry
*de
;
9767 list
*clients
= NULL
;
9770 /* Add the channel to the client -> channels hash table */
9771 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9773 incrRefCount(channel
);
9774 /* Add the client to the channel -> list of clients hash table */
9775 de
= dictFind(server
.pubsub_channels
,channel
);
9777 clients
= listCreate();
9778 dictAdd(server
.pubsub_channels
,channel
,clients
);
9779 incrRefCount(channel
);
9781 clients
= dictGetEntryVal(de
);
9783 listAddNodeTail(clients
,c
);
9785 /* Notify the client */
9786 addReply(c
,shared
.mbulk3
);
9787 addReply(c
,shared
.subscribebulk
);
9788 addReplyBulk(c
,channel
);
9789 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9793 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9794 * 0 if the client was not subscribed to the specified channel. */
9795 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9796 struct dictEntry
*de
;
9801 /* Remove the channel from the client -> channels hash table */
9802 incrRefCount(channel
); /* channel may be just a pointer to the same object
9803 we have in the hash tables. Protect it... */
9804 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9806 /* Remove the client from the channel -> clients list hash table */
9807 de
= dictFind(server
.pubsub_channels
,channel
);
9809 clients
= dictGetEntryVal(de
);
9810 ln
= listSearchKey(clients
,c
);
9812 listDelNode(clients
,ln
);
9813 if (listLength(clients
) == 0) {
9814 /* Free the list and associated hash entry at all if this was
9815 * the latest client, so that it will be possible to abuse
9816 * Redis PUBSUB creating millions of channels. */
9817 dictDelete(server
.pubsub_channels
,channel
);
9820 /* Notify the client */
9822 addReply(c
,shared
.mbulk3
);
9823 addReply(c
,shared
.unsubscribebulk
);
9824 addReplyBulk(c
,channel
);
9825 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9826 listLength(c
->pubsub_patterns
));
9829 decrRefCount(channel
); /* it is finally safe to release it */
9833 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9834 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9837 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9840 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9841 incrRefCount(pattern
);
9842 pat
= zmalloc(sizeof(*pat
));
9843 pat
->pattern
= getDecodedObject(pattern
);
9845 listAddNodeTail(server
.pubsub_patterns
,pat
);
9847 /* Notify the client */
9848 addReply(c
,shared
.mbulk3
);
9849 addReply(c
,shared
.psubscribebulk
);
9850 addReplyBulk(c
,pattern
);
9851 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9855 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9856 * 0 if the client was not subscribed to the specified channel. */
9857 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9862 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9863 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9865 listDelNode(c
->pubsub_patterns
,ln
);
9867 pat
.pattern
= pattern
;
9868 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9869 listDelNode(server
.pubsub_patterns
,ln
);
9871 /* Notify the client */
9873 addReply(c
,shared
.mbulk3
);
9874 addReply(c
,shared
.punsubscribebulk
);
9875 addReplyBulk(c
,pattern
);
9876 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9877 listLength(c
->pubsub_patterns
));
9879 decrRefCount(pattern
);
9883 /* Unsubscribe from all the channels. Return the number of channels the
9884 * client was subscribed from. */
9885 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9886 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9890 while((de
= dictNext(di
)) != NULL
) {
9891 robj
*channel
= dictGetEntryKey(de
);
9893 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9895 dictReleaseIterator(di
);
9899 /* Unsubscribe from all the patterns. Return the number of patterns the
9900 * client was subscribed from. */
9901 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9906 listRewind(c
->pubsub_patterns
,&li
);
9907 while ((ln
= listNext(&li
)) != NULL
) {
9908 robj
*pattern
= ln
->value
;
9910 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9915 /* Publish a message */
9916 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9918 struct dictEntry
*de
;
9922 /* Send to clients listening for that channel */
9923 de
= dictFind(server
.pubsub_channels
,channel
);
9925 list
*list
= dictGetEntryVal(de
);
9929 listRewind(list
,&li
);
9930 while ((ln
= listNext(&li
)) != NULL
) {
9931 redisClient
*c
= ln
->value
;
9933 addReply(c
,shared
.mbulk3
);
9934 addReply(c
,shared
.messagebulk
);
9935 addReplyBulk(c
,channel
);
9936 addReplyBulk(c
,message
);
9940 /* Send to clients listening to matching channels */
9941 if (listLength(server
.pubsub_patterns
)) {
9942 listRewind(server
.pubsub_patterns
,&li
);
9943 channel
= getDecodedObject(channel
);
9944 while ((ln
= listNext(&li
)) != NULL
) {
9945 pubsubPattern
*pat
= ln
->value
;
9947 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9948 sdslen(pat
->pattern
->ptr
),
9949 (char*)channel
->ptr
,
9950 sdslen(channel
->ptr
),0)) {
9951 addReply(pat
->client
,shared
.mbulk4
);
9952 addReply(pat
->client
,shared
.pmessagebulk
);
9953 addReplyBulk(pat
->client
,pat
->pattern
);
9954 addReplyBulk(pat
->client
,channel
);
9955 addReplyBulk(pat
->client
,message
);
9959 decrRefCount(channel
);
9964 static void subscribeCommand(redisClient
*c
) {
9967 for (j
= 1; j
< c
->argc
; j
++)
9968 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9971 static void unsubscribeCommand(redisClient
*c
) {
9973 pubsubUnsubscribeAllChannels(c
,1);
9978 for (j
= 1; j
< c
->argc
; j
++)
9979 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9983 static void psubscribeCommand(redisClient
*c
) {
9986 for (j
= 1; j
< c
->argc
; j
++)
9987 pubsubSubscribePattern(c
,c
->argv
[j
]);
9990 static void punsubscribeCommand(redisClient
*c
) {
9992 pubsubUnsubscribeAllPatterns(c
,1);
9997 for (j
= 1; j
< c
->argc
; j
++)
9998 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10002 static void publishCommand(redisClient
*c
) {
10003 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10004 addReplyLong(c
,receivers
);
10007 /* ================================= Debugging ============================== */
10009 static void debugCommand(redisClient
*c
) {
10010 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10011 *((char*)-1) = 'x';
10012 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10013 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10014 addReply(c
,shared
.err
);
10018 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10019 addReply(c
,shared
.err
);
10022 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10023 addReply(c
,shared
.ok
);
10024 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10026 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10027 addReply(c
,shared
.err
);
10030 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10031 addReply(c
,shared
.ok
);
10032 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10033 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10037 addReply(c
,shared
.nokeyerr
);
10040 key
= dictGetEntryKey(de
);
10041 val
= dictGetEntryVal(de
);
10042 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
10043 key
->storage
== REDIS_VM_SWAPPING
)) {
10047 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10048 strenc
= strencoding
[val
->encoding
];
10050 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10053 addReplySds(c
,sdscatprintf(sdsempty(),
10054 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10055 "encoding:%s serializedlength:%lld\r\n",
10056 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10057 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10059 addReplySds(c
,sdscatprintf(sdsempty(),
10060 "+Key at:%p refcount:%d, value swapped at: page %llu "
10061 "using %llu pages\r\n",
10062 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10063 (unsigned long long) key
->vm
.usedpages
));
10065 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10066 lookupKeyRead(c
->db
,c
->argv
[2]);
10067 addReply(c
,shared
.ok
);
10068 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10069 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10072 if (!server
.vm_enabled
) {
10073 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10077 addReply(c
,shared
.nokeyerr
);
10080 key
= dictGetEntryKey(de
);
10081 val
= dictGetEntryVal(de
);
10082 /* If the key is shared we want to create a copy */
10083 if (key
->refcount
> 1) {
10084 robj
*newkey
= dupStringObject(key
);
10086 key
= dictGetEntryKey(de
) = newkey
;
10089 if (key
->storage
!= REDIS_VM_MEMORY
) {
10090 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10091 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10092 dictGetEntryVal(de
) = NULL
;
10093 addReply(c
,shared
.ok
);
10095 addReply(c
,shared
.err
);
10098 addReplySds(c
,sdsnew(
10099 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10103 static void _redisAssert(char *estr
, char *file
, int line
) {
10104 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10105 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
10106 #ifdef HAVE_BACKTRACE
10107 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10108 *((char*)-1) = 'x';
10112 static void _redisPanic(char *msg
, char *file
, int line
) {
10113 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10114 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10115 #ifdef HAVE_BACKTRACE
10116 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10117 *((char*)-1) = 'x';
10121 /* =================================== Main! ================================ */
10124 int linuxOvercommitMemoryValue(void) {
10125 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10128 if (!fp
) return -1;
10129 if (fgets(buf
,64,fp
) == NULL
) {
10138 void linuxOvercommitMemoryWarning(void) {
10139 if (linuxOvercommitMemoryValue() == 0) {
10140 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10143 #endif /* __linux__ */
10145 static void daemonize(void) {
10149 if (fork() != 0) exit(0); /* parent exits */
10150 setsid(); /* create a new session */
10152 /* Every output goes to /dev/null. If Redis is daemonized but
10153 * the 'logfile' is set to 'stdout' in the configuration file
10154 * it will not log at all. */
10155 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10156 dup2(fd
, STDIN_FILENO
);
10157 dup2(fd
, STDOUT_FILENO
);
10158 dup2(fd
, STDERR_FILENO
);
10159 if (fd
> STDERR_FILENO
) close(fd
);
10161 /* Try to write the pid file */
10162 fp
= fopen(server
.pidfile
,"w");
10164 fprintf(fp
,"%d\n",getpid());
10169 static void version() {
10170 printf("Redis server version %s\n", REDIS_VERSION
);
10174 static void usage() {
10175 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10176 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10180 int main(int argc
, char **argv
) {
10183 initServerConfig();
10185 if (strcmp(argv
[1], "-v") == 0 ||
10186 strcmp(argv
[1], "--version") == 0) version();
10187 if (strcmp(argv
[1], "--help") == 0) usage();
10188 resetServerSaveParams();
10189 loadServerConfig(argv
[1]);
10190 } else if ((argc
> 2)) {
10193 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10195 if (server
.daemonize
) daemonize();
10197 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10199 linuxOvercommitMemoryWarning();
10201 start
= time(NULL
);
10202 if (server
.appendonly
) {
10203 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10204 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10206 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10207 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10209 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10210 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10212 aeDeleteEventLoop(server
.el
);
10216 /* ============================= Backtrace support ========================= */
10218 #ifdef HAVE_BACKTRACE
10219 static char *findFuncName(void *pointer
, unsigned long *offset
);
10221 static void *getMcontextEip(ucontext_t
*uc
) {
10222 #if defined(__FreeBSD__)
10223 return (void*) uc
->uc_mcontext
.mc_eip
;
10224 #elif defined(__dietlibc__)
10225 return (void*) uc
->uc_mcontext
.eip
;
10226 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10228 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10230 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10232 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10233 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10234 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10236 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10238 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10239 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10240 #elif defined(__ia64__) /* Linux IA64 */
10241 return (void*) uc
->uc_mcontext
.sc_ip
;
10247 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10249 char **messages
= NULL
;
10250 int i
, trace_size
= 0;
10251 unsigned long offset
=0;
10252 ucontext_t
*uc
= (ucontext_t
*) secret
;
10254 REDIS_NOTUSED(info
);
10256 redisLog(REDIS_WARNING
,
10257 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10258 infostring
= genRedisInfoString();
10259 redisLog(REDIS_WARNING
, "%s",infostring
);
10260 /* It's not safe to sdsfree() the returned string under memory
10261 * corruption conditions. Let it leak as we are going to abort */
10263 trace_size
= backtrace(trace
, 100);
10264 /* overwrite sigaction with caller's address */
10265 if (getMcontextEip(uc
) != NULL
) {
10266 trace
[1] = getMcontextEip(uc
);
10268 messages
= backtrace_symbols(trace
, trace_size
);
10270 for (i
=1; i
<trace_size
; ++i
) {
10271 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10273 p
= strchr(messages
[i
],'+');
10274 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10275 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10277 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10280 /* free(messages); Don't call free() with possibly corrupted memory. */
10284 static void setupSigSegvAction(void) {
10285 struct sigaction act
;
10287 sigemptyset (&act
.sa_mask
);
10288 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10289 * is used. Otherwise, sa_handler is used */
10290 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10291 act
.sa_sigaction
= segvHandler
;
10292 sigaction (SIGSEGV
, &act
, NULL
);
10293 sigaction (SIGBUS
, &act
, NULL
);
10294 sigaction (SIGFPE
, &act
, NULL
);
10295 sigaction (SIGILL
, &act
, NULL
);
10296 sigaction (SIGBUS
, &act
, NULL
);
10300 #include "staticsymbols.h"
10301 /* This function try to convert a pointer into a function name. It's used in
10302 * oreder to provide a backtrace under segmentation fault that's able to
10303 * display functions declared as static (otherwise the backtrace is useless). */
10304 static char *findFuncName(void *pointer
, unsigned long *offset
){
10306 unsigned long off
, minoff
= 0;
10308 /* Try to match against the Symbol with the smallest offset */
10309 for (i
=0; symsTable
[i
].pointer
; i
++) {
10310 unsigned long lp
= (unsigned long) pointer
;
10312 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10313 off
=lp
-symsTable
[i
].pointer
;
10314 if (ret
< 0 || off
< minoff
) {
10320 if (ret
== -1) return NULL
;
10322 return symsTable
[ret
].name
;
10324 #else /* HAVE_BACKTRACE */
10325 static void setupSigSegvAction(void) {
10327 #endif /* HAVE_BACKTRACE */