2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
79 #include "release.h" /* Release and/or git repository information */
85 /* Static server configuration */
86 #define REDIS_SERVERPORT 6379 /* TCP port */
87 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
88 #define REDIS_IOBUF_LEN 1024
89 #define REDIS_LOADBUF_LEN 1024
90 #define REDIS_STATIC_ARGS 8
91 #define REDIS_DEFAULT_DBNUM 16
92 #define REDIS_CONFIGLINE_MAX 1024
93 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
94 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
95 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
96 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
97 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
99 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100 #define REDIS_WRITEV_THRESHOLD 3
101 /* Max number of iovecs used for each writev call */
102 #define REDIS_WRITEV_IOVEC_COUNT 256
104 /* Hash table parameters */
105 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
108 #define REDIS_CMD_BULK 1 /* Bulk write command */
109 #define REDIS_CMD_INLINE 2 /* Inline command */
110 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114 #define REDIS_CMD_DENYOOM 4
115 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
118 #define REDIS_STRING 0
123 #define REDIS_VMPOINTER 8
125 /* Objects encoding. Some kind of objects like Strings and Hashes can be
126 * internally represented in multiple ways. The 'encoding' field of the object
127 * is set to one of this fields for this object. */
128 #define REDIS_ENCODING_RAW 0 /* Raw representation */
129 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
130 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
131 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
133 static char* strencoding
[] = {
134 "raw", "int", "zipmap", "hashtable"
137 /* Object types only used for dumping to disk */
138 #define REDIS_EXPIRETIME 253
139 #define REDIS_SELECTDB 254
140 #define REDIS_EOF 255
142 /* Defines related to the dump file format. To store 32 bits lengths for short
143 * keys requires a lot of space, so we check the most significant 2 bits of
144 * the first byte to interpreter the length:
146 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
147 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
148 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
149 * 11|000000 this means: specially encoded object will follow. The six bits
150 * number specify the kind of object that follows.
151 * See the REDIS_RDB_ENC_* defines.
153 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
154 * values, will fit inside. */
155 #define REDIS_RDB_6BITLEN 0
156 #define REDIS_RDB_14BITLEN 1
157 #define REDIS_RDB_32BITLEN 2
158 #define REDIS_RDB_ENCVAL 3
159 #define REDIS_RDB_LENERR UINT_MAX
161 /* When a length of a string object stored on disk has the first two bits
162 * set, the remaining two bits specify a special encoding for the object
163 * accordingly to the following defines: */
164 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
165 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
166 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
167 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
169 /* Virtual memory object->where field. */
170 #define REDIS_VM_MEMORY 0 /* The object is on memory */
171 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
172 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
173 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
175 /* Virtual memory static configuration stuff.
176 * Check vmFindContiguousPages() to know more about this magic numbers. */
177 #define REDIS_VM_MAX_NEAR_PAGES 65536
178 #define REDIS_VM_MAX_RANDOM_JUMP 4096
179 #define REDIS_VM_MAX_THREADS 32
180 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
181 /* The following is the *percentage* of completed I/O jobs to process when the
182 * handelr is called. While Virtual Memory I/O operations are performed by
183 * threads, this operations must be processed by the main thread when completed
184 * in order to take effect. */
185 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
188 #define REDIS_SLAVE 1 /* This client is a slave server */
189 #define REDIS_MASTER 2 /* This client is a master server */
190 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
191 #define REDIS_MULTI 8 /* This client is in a MULTI context */
192 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
193 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
194 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
196 /* Slave replication state - slave side */
197 #define REDIS_REPL_NONE 0 /* No active replication */
198 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
199 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
201 /* Slave replication state - from the point of view of master
202 * Note that in SEND_BULK and ONLINE state the slave receives new updates
203 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
204 * to start the next background saving in order to send updates to it. */
205 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
206 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
207 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
208 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
210 /* List related stuff */
214 /* Sort operations */
215 #define REDIS_SORT_GET 0
216 #define REDIS_SORT_ASC 1
217 #define REDIS_SORT_DESC 2
218 #define REDIS_SORTKEY_MAX 1024
221 #define REDIS_DEBUG 0
222 #define REDIS_VERBOSE 1
223 #define REDIS_NOTICE 2
224 #define REDIS_WARNING 3
226 /* Anti-warning macro... */
227 #define REDIS_NOTUSED(V) ((void) V)
229 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
230 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
232 /* Append only defines */
233 #define APPENDFSYNC_NO 0
234 #define APPENDFSYNC_ALWAYS 1
235 #define APPENDFSYNC_EVERYSEC 2
237 /* Hashes related defaults */
238 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
239 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
241 /* We can print the stacktrace, so our assert is defined this way: */
242 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
243 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
244 static void _redisAssert(char *estr
, char *file
, int line
);
245 static void _redisPanic(char *msg
, char *file
, int line
);
247 /*================================= Data types ============================== */
249 /* A redis object, that is a type able to hold a string / list / set */
251 /* The actual Redis Object */
252 typedef struct redisObject
{
254 unsigned storage
:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
256 unsigned lru
:22; /* lru time (relative to server.lruclock) */
259 /* VM fields, this are only allocated if VM is active, otherwise the
260 * object allocation function will just allocate
261 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
262 * Redis without VM active will not have any overhead. */
265 /* The VM pointer structure - identifies an object in the swap file.
267 * This object is stored in place of the value
268 * object in the main key->value hash table representing a database.
269 * Note that the first fields (type, storage) are the same as the redisObject
270 * structure so that vmPointer strucuters can be accessed even when casted
271 * as redisObject structures.
273 * This is useful as we don't know if a value object is or not on disk, but we
274 * are always able to read obj->storage to check this. For vmPointer
275 * structures "type" is set to REDIS_VMPOINTER (even if without this field
276 * is still possible to check the kind of object from the value of 'storage').*/
277 typedef struct vmPointer
{
279 unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
281 unsigned int vtype
; /* type of the object stored in the swap file */
282 off_t page
; /* the page at witch the object is stored on disk */
283 off_t usedpages
; /* number of pages used on disk */
286 /* Macro used to initalize a Redis object allocated on the stack.
287 * Note that this macro is taken near the structure definition to make sure
288 * we'll update it when the structure is changed, to avoid bugs like
289 * bug #85 introduced exactly in this way. */
290 #define initStaticStringObject(_var,_ptr) do { \
292 _var.type = REDIS_STRING; \
293 _var.encoding = REDIS_ENCODING_RAW; \
295 _var.storage = REDIS_VM_MEMORY; \
298 typedef struct redisDb
{
299 dict
*dict
; /* The keyspace for this DB */
300 dict
*expires
; /* Timeout of keys with a timeout set */
301 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
302 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
303 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
307 /* Client MULTI/EXEC state */
308 typedef struct multiCmd
{
311 struct redisCommand
*cmd
;
314 typedef struct multiState
{
315 multiCmd
*commands
; /* Array of MULTI commands */
316 int count
; /* Total number of MULTI commands */
319 /* With multiplexing we need to take per-clinet state.
320 * Clients are taken in a liked list. */
321 typedef struct redisClient
{
326 robj
**argv
, **mbargv
;
328 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
329 int multibulk
; /* multi bulk command format active */
332 time_t lastinteraction
; /* time of the last interaction, used for timeout */
333 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
334 int slaveseldb
; /* slave selected db, if this client is a slave */
335 int authenticated
; /* when requirepass is non-NULL */
336 int replstate
; /* replication state if this is a slave */
337 int repldbfd
; /* replication DB file descriptor */
338 long repldboff
; /* replication DB file offset */
339 off_t repldbsize
; /* replication DB file size */
340 multiState mstate
; /* MULTI/EXEC state */
341 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
342 * operation such as BLPOP. Otherwise NULL. */
343 int blocking_keys_num
; /* Number of blocking keys */
344 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
345 * is >= blockingto then the operation timed out. */
346 list
*io_keys
; /* Keys this client is waiting to be loaded from the
347 * swap file in order to continue. */
348 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
349 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
350 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
358 /* Global server state structure */
363 long long dirty
; /* changes to DB from the last save */
365 list
*slaves
, *monitors
;
366 char neterr
[ANET_ERR_LEN
];
368 int cronloops
; /* number of times the cron function run */
369 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
370 time_t lastsave
; /* Unix time of last save succeeede */
371 /* Fields used only for stats */
372 time_t stat_starttime
; /* server start time */
373 long long stat_numcommands
; /* number of processed commands */
374 long long stat_numconnections
; /* number of connections received */
375 long long stat_expiredkeys
; /* number of expired keys */
384 int no_appendfsync_on_rewrite
;
390 pid_t bgsavechildpid
;
391 pid_t bgrewritechildpid
;
392 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
393 sds aofbuf
; /* AOF buffer, written before entering the event loop */
394 struct saveparam
*saveparams
;
399 char *appendfilename
;
403 /* Replication related */
408 redisClient
*master
; /* client that is master for this slave */
410 unsigned int maxclients
;
411 unsigned long long maxmemory
;
412 unsigned int blpop_blocked_clients
;
413 unsigned int vm_blocked_clients
;
414 /* Sort parameters - qsort_r() is only available under BSD so we
415 * have to take this state global, in order to pass it to sortCompare() */
419 /* Virtual memory configuration */
424 unsigned long long vm_max_memory
;
426 size_t hash_max_zipmap_entries
;
427 size_t hash_max_zipmap_value
;
428 /* Virtual memory state */
431 off_t vm_next_page
; /* Next probably empty page */
432 off_t vm_near_pages
; /* Number of pages allocated sequentially */
433 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
434 time_t unixtime
; /* Unix time sampled every second. */
435 /* Virtual memory I/O threads stuff */
436 /* An I/O thread process an element taken from the io_jobs queue and
437 * put the result of the operation in the io_done list. While the
438 * job is being processed, it's put on io_processing queue. */
439 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
440 list
*io_processing
; /* List of VM I/O jobs being processed */
441 list
*io_processed
; /* List of VM I/O jobs already processed */
442 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
443 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
444 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
445 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
446 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
447 int io_active_threads
; /* Number of running I/O threads */
448 int vm_max_threads
; /* Max number of I/O threads running at the same time */
449 /* Our main thread is blocked on the event loop, locking for sockets ready
450 * to be read or written, so when a threaded I/O operation is ready to be
451 * processed by the main thread, the I/O thread will use a unix pipe to
452 * awake the main thread. The followings are the two pipe FDs. */
453 int io_ready_pipe_read
;
454 int io_ready_pipe_write
;
455 /* Virtual memory stats */
456 unsigned long long vm_stats_used_pages
;
457 unsigned long long vm_stats_swapped_objects
;
458 unsigned long long vm_stats_swapouts
;
459 unsigned long long vm_stats_swapins
;
461 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
462 list
*pubsub_patterns
; /* A list of pubsub_patterns */
465 unsigned lruclock
:22; /* clock incrementing every minute, for LRU */
466 unsigned lruclock_padding
:10;
469 typedef struct pubsubPattern
{
474 typedef void redisCommandProc(redisClient
*c
);
475 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
476 struct redisCommand
{
478 redisCommandProc
*proc
;
481 /* Use a function to determine which keys need to be loaded
482 * in the background prior to executing this command. Takes precedence
483 * over vm_firstkey and others, ignored when NULL */
484 redisVmPreloadProc
*vm_preload_proc
;
485 /* What keys should be loaded in background when calling this command? */
486 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
487 int vm_lastkey
; /* THe last argument that's a key */
488 int vm_keystep
; /* The step between first and last key */
491 struct redisFunctionSym
{
493 unsigned long pointer
;
496 typedef struct _redisSortObject
{
504 typedef struct _redisSortOperation
{
507 } redisSortOperation
;
509 /* ZSETs use a specialized version of Skiplists */
511 typedef struct zskiplistNode
{
512 struct zskiplistNode
**forward
;
513 struct zskiplistNode
*backward
;
519 typedef struct zskiplist
{
520 struct zskiplistNode
*header
, *tail
;
521 unsigned long length
;
525 typedef struct zset
{
530 /* Our shared "common" objects */
532 #define REDIS_SHARED_INTEGERS 10000
533 struct sharedObjectsStruct
{
534 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
535 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
536 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
537 *outofrangeerr
, *plus
,
538 *select0
, *select1
, *select2
, *select3
, *select4
,
539 *select5
, *select6
, *select7
, *select8
, *select9
,
540 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
541 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
542 *integers
[REDIS_SHARED_INTEGERS
];
545 /* Global vars that are actally used as constants. The following double
546 * values are used for double on-disk serialization, and are initialized
547 * at runtime to avoid strange compiler optimizations. */
549 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
551 /* VM threaded I/O request message */
552 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
553 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
554 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
555 typedef struct iojob
{
556 int type
; /* Request type, REDIS_IOJOB_* */
557 redisDb
*db
;/* Redis database */
558 robj
*key
; /* This I/O request is about swapping this key */
559 robj
*id
; /* Unique identifier of this job:
560 this is the object to swap for REDIS_IOREQ_*_SWAP, or the
561 vmpointer objct for REDIS_IOREQ_LOAD. */
562 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
563 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
564 off_t page
; /* Swap page where to read/write the object */
565 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
566 int canceled
; /* True if this command was canceled by blocking side of VM */
567 pthread_t thread
; /* ID of the thread processing this entry */
570 /*================================ Prototypes =============================== */
572 static void freeStringObject(robj
*o
);
573 static void freeListObject(robj
*o
);
574 static void freeSetObject(robj
*o
);
575 static void decrRefCount(void *o
);
576 static robj
*createObject(int type
, void *ptr
);
577 static void freeClient(redisClient
*c
);
578 static int rdbLoad(char *filename
);
579 static void addReply(redisClient
*c
, robj
*obj
);
580 static void addReplySds(redisClient
*c
, sds s
);
581 static void incrRefCount(robj
*o
);
582 static int rdbSaveBackground(char *filename
);
583 static robj
*createStringObject(char *ptr
, size_t len
);
584 static robj
*dupStringObject(robj
*o
);
585 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
586 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
587 static void flushAppendOnlyFile(void);
588 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
589 static int syncWithMaster(void);
590 static robj
*tryObjectEncoding(robj
*o
);
591 static robj
*getDecodedObject(robj
*o
);
592 static int removeExpire(redisDb
*db
, robj
*key
);
593 static int expireIfNeeded(redisDb
*db
, robj
*key
);
594 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
595 static int dbDelete(redisDb
*db
, robj
*key
);
596 static time_t getExpire(redisDb
*db
, robj
*key
);
597 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
598 static void updateSlavesWaitingBgsave(int bgsaveerr
);
599 static void freeMemoryIfNeeded(void);
600 static int processCommand(redisClient
*c
);
601 static void setupSigSegvAction(void);
602 static void rdbRemoveTempFile(pid_t childpid
);
603 static void aofRemoveTempFile(pid_t childpid
);
604 static size_t stringObjectLen(robj
*o
);
605 static void processInputBuffer(redisClient
*c
);
606 static zskiplist
*zslCreate(void);
607 static void zslFree(zskiplist
*zsl
);
608 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
609 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
610 static void initClientMultiState(redisClient
*c
);
611 static void freeClientMultiState(redisClient
*c
);
612 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
613 static void unblockClientWaitingData(redisClient
*c
);
614 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
615 static void vmInit(void);
616 static void vmMarkPagesFree(off_t page
, off_t count
);
617 static robj
*vmLoadObject(robj
*o
);
618 static robj
*vmPreviewObject(robj
*o
);
619 static int vmSwapOneObjectBlocking(void);
620 static int vmSwapOneObjectThreaded(void);
621 static int vmCanSwapOut(void);
622 static int tryFreeOneObjectFromFreelist(void);
623 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
624 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
625 static void vmCancelThreadedIOJob(robj
*o
);
626 static void lockThreadedIO(void);
627 static void unlockThreadedIO(void);
628 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
629 static void freeIOJob(iojob
*j
);
630 static void queueIOJob(iojob
*j
);
631 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
632 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
633 static void waitEmptyIOJobsQueue(void);
634 static void vmReopenSwapFile(void);
635 static int vmFreePage(off_t page
);
636 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
637 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
638 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
639 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
640 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
641 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
642 static struct redisCommand
*lookupCommand(char *name
);
643 static void call(redisClient
*c
, struct redisCommand
*cmd
);
644 static void resetClient(redisClient
*c
);
645 static void convertToRealHash(robj
*o
);
646 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
647 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
648 static void freePubsubPattern(void *p
);
649 static int listMatchPubsubPattern(void *a
, void *b
);
650 static int compareStringObjects(robj
*a
, robj
*b
);
651 static int equalStringObjects(robj
*a
, robj
*b
);
653 static int rewriteAppendOnlyFileBackground(void);
654 static vmpointer
*vmSwapObjectBlocking(robj
*val
);
655 static int prepareForShutdown();
656 static void touchWatchedKey(redisDb
*db
, robj
*key
);
657 static void touchWatchedKeysOnFlush(int dbid
);
658 static void unwatchAllKeys(redisClient
*c
);
660 static void authCommand(redisClient
*c
);
661 static void pingCommand(redisClient
*c
);
662 static void echoCommand(redisClient
*c
);
663 static void setCommand(redisClient
*c
);
664 static void setnxCommand(redisClient
*c
);
665 static void setexCommand(redisClient
*c
);
666 static void getCommand(redisClient
*c
);
667 static void delCommand(redisClient
*c
);
668 static void existsCommand(redisClient
*c
);
669 static void incrCommand(redisClient
*c
);
670 static void decrCommand(redisClient
*c
);
671 static void incrbyCommand(redisClient
*c
);
672 static void decrbyCommand(redisClient
*c
);
673 static void selectCommand(redisClient
*c
);
674 static void randomkeyCommand(redisClient
*c
);
675 static void keysCommand(redisClient
*c
);
676 static void dbsizeCommand(redisClient
*c
);
677 static void lastsaveCommand(redisClient
*c
);
678 static void saveCommand(redisClient
*c
);
679 static void bgsaveCommand(redisClient
*c
);
680 static void bgrewriteaofCommand(redisClient
*c
);
681 static void shutdownCommand(redisClient
*c
);
682 static void moveCommand(redisClient
*c
);
683 static void renameCommand(redisClient
*c
);
684 static void renamenxCommand(redisClient
*c
);
685 static void lpushCommand(redisClient
*c
);
686 static void rpushCommand(redisClient
*c
);
687 static void lpopCommand(redisClient
*c
);
688 static void rpopCommand(redisClient
*c
);
689 static void llenCommand(redisClient
*c
);
690 static void lindexCommand(redisClient
*c
);
691 static void lrangeCommand(redisClient
*c
);
692 static void ltrimCommand(redisClient
*c
);
693 static void typeCommand(redisClient
*c
);
694 static void lsetCommand(redisClient
*c
);
695 static void saddCommand(redisClient
*c
);
696 static void sremCommand(redisClient
*c
);
697 static void smoveCommand(redisClient
*c
);
698 static void sismemberCommand(redisClient
*c
);
699 static void scardCommand(redisClient
*c
);
700 static void spopCommand(redisClient
*c
);
701 static void srandmemberCommand(redisClient
*c
);
702 static void sinterCommand(redisClient
*c
);
703 static void sinterstoreCommand(redisClient
*c
);
704 static void sunionCommand(redisClient
*c
);
705 static void sunionstoreCommand(redisClient
*c
);
706 static void sdiffCommand(redisClient
*c
);
707 static void sdiffstoreCommand(redisClient
*c
);
708 static void syncCommand(redisClient
*c
);
709 static void flushdbCommand(redisClient
*c
);
710 static void flushallCommand(redisClient
*c
);
711 static void sortCommand(redisClient
*c
);
712 static void lremCommand(redisClient
*c
);
713 static void rpoplpushcommand(redisClient
*c
);
714 static void infoCommand(redisClient
*c
);
715 static void mgetCommand(redisClient
*c
);
716 static void monitorCommand(redisClient
*c
);
717 static void expireCommand(redisClient
*c
);
718 static void expireatCommand(redisClient
*c
);
719 static void getsetCommand(redisClient
*c
);
720 static void ttlCommand(redisClient
*c
);
721 static void slaveofCommand(redisClient
*c
);
722 static void debugCommand(redisClient
*c
);
723 static void msetCommand(redisClient
*c
);
724 static void msetnxCommand(redisClient
*c
);
725 static void zaddCommand(redisClient
*c
);
726 static void zincrbyCommand(redisClient
*c
);
727 static void zrangeCommand(redisClient
*c
);
728 static void zrangebyscoreCommand(redisClient
*c
);
729 static void zcountCommand(redisClient
*c
);
730 static void zrevrangeCommand(redisClient
*c
);
731 static void zcardCommand(redisClient
*c
);
732 static void zremCommand(redisClient
*c
);
733 static void zscoreCommand(redisClient
*c
);
734 static void zremrangebyscoreCommand(redisClient
*c
);
735 static void multiCommand(redisClient
*c
);
736 static void execCommand(redisClient
*c
);
737 static void discardCommand(redisClient
*c
);
738 static void blpopCommand(redisClient
*c
);
739 static void brpopCommand(redisClient
*c
);
740 static void appendCommand(redisClient
*c
);
741 static void substrCommand(redisClient
*c
);
742 static void zrankCommand(redisClient
*c
);
743 static void zrevrankCommand(redisClient
*c
);
744 static void hsetCommand(redisClient
*c
);
745 static void hsetnxCommand(redisClient
*c
);
746 static void hgetCommand(redisClient
*c
);
747 static void hmsetCommand(redisClient
*c
);
748 static void hmgetCommand(redisClient
*c
);
749 static void hdelCommand(redisClient
*c
);
750 static void hlenCommand(redisClient
*c
);
751 static void zremrangebyrankCommand(redisClient
*c
);
752 static void zunionstoreCommand(redisClient
*c
);
753 static void zinterstoreCommand(redisClient
*c
);
754 static void hkeysCommand(redisClient
*c
);
755 static void hvalsCommand(redisClient
*c
);
756 static void hgetallCommand(redisClient
*c
);
757 static void hexistsCommand(redisClient
*c
);
758 static void configCommand(redisClient
*c
);
759 static void hincrbyCommand(redisClient
*c
);
760 static void subscribeCommand(redisClient
*c
);
761 static void unsubscribeCommand(redisClient
*c
);
762 static void psubscribeCommand(redisClient
*c
);
763 static void punsubscribeCommand(redisClient
*c
);
764 static void publishCommand(redisClient
*c
);
765 static void watchCommand(redisClient
*c
);
766 static void unwatchCommand(redisClient
*c
);
768 /*================================= Globals ================================= */
771 static struct redisServer server
; /* server global state */
772 static struct redisCommand
*commandTable
;
773 static struct redisCommand readonlyCommandTable
[] = {
774 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
776 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
777 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
778 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
779 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
781 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
782 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
783 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
784 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
785 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
786 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
789 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
790 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
792 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
797 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
798 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
799 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
800 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
801 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
802 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
806 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
807 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
808 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
809 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
810 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
811 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
812 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
813 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
814 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
815 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
816 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
817 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
818 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
819 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
821 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
822 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
823 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
824 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
826 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
827 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
828 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
829 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
830 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
831 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
832 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
833 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
834 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
835 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
836 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
837 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
838 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
839 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
840 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
841 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
842 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
843 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
844 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
845 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
846 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
847 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
848 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
849 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
850 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
851 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
852 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
856 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
862 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
864 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
866 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
867 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
868 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
869 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
870 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
871 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
872 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
873 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
874 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
875 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
876 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
877 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
878 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
879 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
880 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
881 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
884 /*============================ Utility functions ============================ */
886 /* Glob-style pattern matching. */
887 static int stringmatchlen(const char *pattern
, int patternLen
,
888 const char *string
, int stringLen
, int nocase
)
893 while (pattern
[1] == '*') {
898 return 1; /* match */
900 if (stringmatchlen(pattern
+1, patternLen
-1,
901 string
, stringLen
, nocase
))
902 return 1; /* match */
906 return 0; /* no match */
910 return 0; /* no match */
920 not = pattern
[0] == '^';
927 if (pattern
[0] == '\\') {
930 if (pattern
[0] == string
[0])
932 } else if (pattern
[0] == ']') {
934 } else if (patternLen
== 0) {
938 } else if (pattern
[1] == '-' && patternLen
>= 3) {
939 int start
= pattern
[0];
940 int end
= pattern
[2];
948 start
= tolower(start
);
954 if (c
>= start
&& c
<= end
)
958 if (pattern
[0] == string
[0])
961 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
971 return 0; /* no match */
977 if (patternLen
>= 2) {
984 if (pattern
[0] != string
[0])
985 return 0; /* no match */
987 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
988 return 0; /* no match */
996 if (stringLen
== 0) {
997 while(*pattern
== '*') {
1004 if (patternLen
== 0 && stringLen
== 0)
1009 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
1010 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
1013 /* Convert a string representing an amount of memory into the number of
1014 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1017 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1019 static long long memtoll(const char *p
, int *err
) {
1022 long mul
; /* unit multiplier */
1024 unsigned int digits
;
1027 /* Search the first non digit character. */
1030 while(*u
&& isdigit(*u
)) u
++;
1031 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1033 } else if (!strcasecmp(u
,"k")) {
1035 } else if (!strcasecmp(u
,"kb")) {
1037 } else if (!strcasecmp(u
,"m")) {
1039 } else if (!strcasecmp(u
,"mb")) {
1041 } else if (!strcasecmp(u
,"g")) {
1042 mul
= 1000L*1000*1000;
1043 } else if (!strcasecmp(u
,"gb")) {
1044 mul
= 1024L*1024*1024;
1050 if (digits
>= sizeof(buf
)) {
1054 memcpy(buf
,p
,digits
);
1056 val
= strtoll(buf
,NULL
,10);
1060 /* Convert a long long into a string. Returns the number of
1061 * characters needed to represent the number, that can be shorter if passed
1062 * buffer length is not enough to store the whole number. */
1063 static int ll2string(char *s
, size_t len
, long long value
) {
1065 unsigned long long v
;
1068 if (len
== 0) return 0;
1069 v
= (value
< 0) ? -value
: value
;
1070 p
= buf
+31; /* point to the last character */
1075 if (value
< 0) *p
-- = '-';
1078 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1084 static void redisLog(int level
, const char *fmt
, ...) {
1088 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1092 if (level
>= server
.verbosity
) {
1098 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1099 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1100 vfprintf(fp
, fmt
, ap
);
1106 if (server
.logfile
) fclose(fp
);
1109 /*====================== Hash table type implementation ==================== */
1111 /* This is an hash table type that uses the SDS dynamic strings libary as
1112 * keys and radis objects as values (objects can hold SDS strings,
1115 static void dictVanillaFree(void *privdata
, void *val
)
1117 DICT_NOTUSED(privdata
);
1121 static void dictListDestructor(void *privdata
, void *val
)
1123 DICT_NOTUSED(privdata
);
1124 listRelease((list
*)val
);
1127 static int dictSdsKeyCompare(void *privdata
, const void *key1
,
1131 DICT_NOTUSED(privdata
);
1133 l1
= sdslen((sds
)key1
);
1134 l2
= sdslen((sds
)key2
);
1135 if (l1
!= l2
) return 0;
1136 return memcmp(key1
, key2
, l1
) == 0;
1139 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1141 DICT_NOTUSED(privdata
);
1143 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1147 static void dictSdsDestructor(void *privdata
, void *val
)
1149 DICT_NOTUSED(privdata
);
1154 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1157 const robj
*o1
= key1
, *o2
= key2
;
1158 return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1161 static unsigned int dictObjHash(const void *key
) {
1162 const robj
*o
= key
;
1163 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1166 static unsigned int dictSdsHash(const void *key
) {
1167 return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
));
1170 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1173 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1176 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1177 o2
->encoding
== REDIS_ENCODING_INT
)
1178 return o1
->ptr
== o2
->ptr
;
1180 o1
= getDecodedObject(o1
);
1181 o2
= getDecodedObject(o2
);
1182 cmp
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1188 static unsigned int dictEncObjHash(const void *key
) {
1189 robj
*o
= (robj
*) key
;
1191 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1192 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1194 if (o
->encoding
== REDIS_ENCODING_INT
) {
1198 len
= ll2string(buf
,32,(long)o
->ptr
);
1199 return dictGenHashFunction((unsigned char*)buf
, len
);
1203 o
= getDecodedObject(o
);
1204 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1212 static dictType setDictType
= {
1213 dictEncObjHash
, /* hash function */
1216 dictEncObjKeyCompare
, /* key compare */
1217 dictRedisObjectDestructor
, /* key destructor */
1218 NULL
/* val destructor */
1221 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1222 static dictType zsetDictType
= {
1223 dictEncObjHash
, /* hash function */
1226 dictEncObjKeyCompare
, /* key compare */
1227 dictRedisObjectDestructor
, /* key destructor */
1228 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1231 /* Db->dict, keys are sds strings, vals are Redis objects. */
1232 static dictType dbDictType
= {
1233 dictSdsHash
, /* hash function */
1236 dictSdsKeyCompare
, /* key compare */
1237 dictSdsDestructor
, /* key destructor */
1238 dictRedisObjectDestructor
/* val destructor */
1242 static dictType keyptrDictType
= {
1243 dictSdsHash
, /* hash function */
1246 dictSdsKeyCompare
, /* key compare */
1247 dictSdsDestructor
, /* key destructor */
1248 NULL
/* val destructor */
1251 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1252 static dictType hashDictType
= {
1253 dictEncObjHash
, /* hash function */
1256 dictEncObjKeyCompare
, /* key compare */
1257 dictRedisObjectDestructor
, /* key destructor */
1258 dictRedisObjectDestructor
/* val destructor */
1261 /* Keylist hash table type has unencoded redis objects as keys and
1262 * lists as values. It's used for blocking operations (BLPOP) and to
1263 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1264 static dictType keylistDictType
= {
1265 dictObjHash
, /* hash function */
1268 dictObjKeyCompare
, /* key compare */
1269 dictRedisObjectDestructor
, /* key destructor */
1270 dictListDestructor
/* val destructor */
1273 static void version();
1275 /* ========================= Random utility functions ======================= */
1277 /* Redis generally does not try to recover from out of memory conditions
1278 * when allocating objects or strings, it is not clear if it will be possible
1279 * to report this condition to the client since the networking layer itself
1280 * is based on heap allocation for send buffers, so we simply abort.
1281 * At least the code will be simpler to read... */
1282 static void oom(const char *msg
) {
1283 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1288 /* ====================== Redis server networking stuff ===================== */
1289 static void closeTimedoutClients(void) {
1292 time_t now
= time(NULL
);
1295 listRewind(server
.clients
,&li
);
1296 while ((ln
= listNext(&li
)) != NULL
) {
1297 c
= listNodeValue(ln
);
1298 if (server
.maxidletime
&&
1299 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1300 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1301 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1302 listLength(c
->pubsub_patterns
) == 0 &&
1303 (now
- c
->lastinteraction
> server
.maxidletime
))
1305 redisLog(REDIS_VERBOSE
,"Closing idle client");
1307 } else if (c
->flags
& REDIS_BLOCKED
) {
1308 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1309 addReply(c
,shared
.nullmultibulk
);
1310 unblockClientWaitingData(c
);
1316 static int htNeedsResize(dict
*dict
) {
1317 long long size
, used
;
1319 size
= dictSlots(dict
);
1320 used
= dictSize(dict
);
1321 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1322 (used
*100/size
< REDIS_HT_MINFILL
));
1325 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1326 * we resize the hash table to save memory */
1327 static void tryResizeHashTables(void) {
1330 for (j
= 0; j
< server
.dbnum
; j
++) {
1331 if (htNeedsResize(server
.db
[j
].dict
))
1332 dictResize(server
.db
[j
].dict
);
1333 if (htNeedsResize(server
.db
[j
].expires
))
1334 dictResize(server
.db
[j
].expires
);
1338 /* Our hash table implementation performs rehashing incrementally while
1339 * we write/read from the hash table. Still if the server is idle, the hash
1340 * table will use two tables for a long time. So we try to use 1 millisecond
1341 * of CPU time at every serverCron() loop in order to rehash some key. */
1342 static void incrementallyRehash(void) {
1345 for (j
= 0; j
< server
.dbnum
; j
++) {
1346 if (dictIsRehashing(server
.db
[j
].dict
)) {
1347 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1348 break; /* already used our millisecond for this loop... */
1353 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1354 void backgroundSaveDoneHandler(int statloc
) {
1355 int exitcode
= WEXITSTATUS(statloc
);
1356 int bysignal
= WIFSIGNALED(statloc
);
1358 if (!bysignal
&& exitcode
== 0) {
1359 redisLog(REDIS_NOTICE
,
1360 "Background saving terminated with success");
1362 server
.lastsave
= time(NULL
);
1363 } else if (!bysignal
&& exitcode
!= 0) {
1364 redisLog(REDIS_WARNING
, "Background saving error");
1366 redisLog(REDIS_WARNING
,
1367 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1368 rdbRemoveTempFile(server
.bgsavechildpid
);
1370 server
.bgsavechildpid
= -1;
1371 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1372 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1373 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1376 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1378 void backgroundRewriteDoneHandler(int statloc
) {
1379 int exitcode
= WEXITSTATUS(statloc
);
1380 int bysignal
= WIFSIGNALED(statloc
);
1382 if (!bysignal
&& exitcode
== 0) {
1386 redisLog(REDIS_NOTICE
,
1387 "Background append only file rewriting terminated with success");
1388 /* Now it's time to flush the differences accumulated by the parent */
1389 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1390 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1392 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1395 /* Flush our data... */
1396 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1397 (signed) sdslen(server
.bgrewritebuf
)) {
1398 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1402 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1403 /* Now our work is to rename the temp file into the stable file. And
1404 * switch the file descriptor used by the server for append only. */
1405 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1406 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1410 /* Mission completed... almost */
1411 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1412 if (server
.appendfd
!= -1) {
1413 /* If append only is actually enabled... */
1414 close(server
.appendfd
);
1415 server
.appendfd
= fd
;
1416 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1417 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1418 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1420 /* If append only is disabled we just generate a dump in this
1421 * format. Why not? */
1424 } else if (!bysignal
&& exitcode
!= 0) {
1425 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1427 redisLog(REDIS_WARNING
,
1428 "Background append only file rewriting terminated by signal %d",
1432 sdsfree(server
.bgrewritebuf
);
1433 server
.bgrewritebuf
= sdsempty();
1434 aofRemoveTempFile(server
.bgrewritechildpid
);
1435 server
.bgrewritechildpid
= -1;
1438 /* This function is called once a background process of some kind terminates,
1439 * as we want to avoid resizing the hash tables when there is a child in order
1440 * to play well with copy-on-write (otherwise when a resize happens lots of
1441 * memory pages are copied). The goal of this function is to update the ability
1442 * for dict.c to resize the hash tables accordingly to the fact we have o not
1443 * running childs. */
1444 static void updateDictResizePolicy(void) {
1445 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1448 dictDisableResize();
1451 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1452 int j
, loops
= server
.cronloops
++;
1453 REDIS_NOTUSED(eventLoop
);
1455 REDIS_NOTUSED(clientData
);
1457 /* We take a cached value of the unix time in the global state because
1458 * with virtual memory and aging there is to store the current time
1459 * in objects at every object access, and accuracy is not needed.
1460 * To access a global var is faster than calling time(NULL) */
1461 server
.unixtime
= time(NULL
);
1462 /* We have just 21 bits per object for LRU information.
1463 * So we use an (eventually wrapping) LRU clock with minutes resolution.
1465 * When we need to select what object to swap, we compute the minimum
1466 * time distance between the current lruclock and the object last access
1467 * lruclock info. Even if clocks will wrap on overflow, there is
1468 * the interesting property that we are sure that at least
1469 * ABS(A-B) minutes passed between current time and timestamp B.
1471 * This is not precise but we don't need at all precision, but just
1472 * something statistically reasonable.
1474 server
.lruclock
= (time(NULL
)/60)&((1<<21)-1);
1476 /* We received a SIGTERM, shutting down here in a safe way, as it is
1477 * not ok doing so inside the signal handler. */
1478 if (server
.shutdown_asap
) {
1479 if (prepareForShutdown() == REDIS_OK
) exit(0);
1480 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1483 /* Show some info about non-empty databases */
1484 for (j
= 0; j
< server
.dbnum
; j
++) {
1485 long long size
, used
, vkeys
;
1487 size
= dictSlots(server
.db
[j
].dict
);
1488 used
= dictSize(server
.db
[j
].dict
);
1489 vkeys
= dictSize(server
.db
[j
].expires
);
1490 if (!(loops
% 50) && (used
|| vkeys
)) {
1491 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1492 /* dictPrintStats(server.dict); */
1496 /* We don't want to resize the hash tables while a bacground saving
1497 * is in progress: the saving child is created using fork() that is
1498 * implemented with a copy-on-write semantic in most modern systems, so
1499 * if we resize the HT while there is the saving child at work actually
1500 * a lot of memory movements in the parent will cause a lot of pages
1502 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1503 if (!(loops
% 10)) tryResizeHashTables();
1504 if (server
.activerehashing
) incrementallyRehash();
1507 /* Show information about connected clients */
1508 if (!(loops
% 50)) {
1509 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1510 listLength(server
.clients
)-listLength(server
.slaves
),
1511 listLength(server
.slaves
),
1512 zmalloc_used_memory());
1515 /* Close connections of timedout clients */
1516 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1517 closeTimedoutClients();
1519 /* Check if a background saving or AOF rewrite in progress terminated */
1520 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1524 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1525 if (pid
== server
.bgsavechildpid
) {
1526 backgroundSaveDoneHandler(statloc
);
1528 backgroundRewriteDoneHandler(statloc
);
1530 updateDictResizePolicy();
1533 /* If there is not a background saving in progress check if
1534 * we have to save now */
1535 time_t now
= time(NULL
);
1536 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1537 struct saveparam
*sp
= server
.saveparams
+j
;
1539 if (server
.dirty
>= sp
->changes
&&
1540 now
-server
.lastsave
> sp
->seconds
) {
1541 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1542 sp
->changes
, sp
->seconds
);
1543 rdbSaveBackground(server
.dbfilename
);
1549 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1550 * will use few CPU cycles if there are few expiring keys, otherwise
1551 * it will get more aggressive to avoid that too much memory is used by
1552 * keys that can be removed from the keyspace. */
1553 for (j
= 0; j
< server
.dbnum
; j
++) {
1555 redisDb
*db
= server
.db
+j
;
1557 /* Continue to expire if at the end of the cycle more than 25%
1558 * of the keys were expired. */
1560 long num
= dictSize(db
->expires
);
1561 time_t now
= time(NULL
);
1564 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1565 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1570 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1571 t
= (time_t) dictGetEntryVal(de
);
1573 sds key
= dictGetEntryKey(de
);
1574 robj
*keyobj
= createStringObject(key
,sdslen(key
));
1576 dbDelete(db
,keyobj
);
1577 decrRefCount(keyobj
);
1579 server
.stat_expiredkeys
++;
1582 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1585 /* Swap a few keys on disk if we are over the memory limit and VM
1586 * is enbled. Try to free objects from the free list first. */
1587 if (vmCanSwapOut()) {
1588 while (server
.vm_enabled
&& zmalloc_used_memory() >
1589 server
.vm_max_memory
)
1593 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1594 retval
= (server
.vm_max_threads
== 0) ?
1595 vmSwapOneObjectBlocking() :
1596 vmSwapOneObjectThreaded();
1597 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1598 zmalloc_used_memory() >
1599 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1601 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1603 /* Note that when using threade I/O we free just one object,
1604 * because anyway when the I/O thread in charge to swap this
1605 * object out will finish, the handler of completed jobs
1606 * will try to swap more objects if we are still out of memory. */
1607 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1611 /* Check if we should connect to a MASTER */
1612 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1613 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1614 if (syncWithMaster() == REDIS_OK
) {
1615 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1616 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1622 /* This function gets called every time Redis is entering the
1623 * main loop of the event driven library, that is, before to sleep
1624 * for ready file descriptors. */
1625 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1626 REDIS_NOTUSED(eventLoop
);
1628 /* Awake clients that got all the swapped keys they requested */
1629 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1633 listRewind(server
.io_ready_clients
,&li
);
1634 while((ln
= listNext(&li
))) {
1635 redisClient
*c
= ln
->value
;
1636 struct redisCommand
*cmd
;
1638 /* Resume the client. */
1639 listDelNode(server
.io_ready_clients
,ln
);
1640 c
->flags
&= (~REDIS_IO_WAIT
);
1641 server
.vm_blocked_clients
--;
1642 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1643 readQueryFromClient
, c
);
1644 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1645 assert(cmd
!= NULL
);
1648 /* There may be more data to process in the input buffer. */
1649 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1650 processInputBuffer(c
);
1653 /* Write the AOF buffer on disk */
1654 flushAppendOnlyFile();
1657 static void createSharedObjects(void) {
1660 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1661 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1662 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1663 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1664 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1665 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1666 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1667 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1668 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1669 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1670 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1671 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1672 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1673 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1674 "-ERR no such key\r\n"));
1675 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1676 "-ERR syntax error\r\n"));
1677 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1678 "-ERR source and destination objects are the same\r\n"));
1679 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1680 "-ERR index out of range\r\n"));
1681 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1682 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1683 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1684 shared
.select0
= createStringObject("select 0\r\n",10);
1685 shared
.select1
= createStringObject("select 1\r\n",10);
1686 shared
.select2
= createStringObject("select 2\r\n",10);
1687 shared
.select3
= createStringObject("select 3\r\n",10);
1688 shared
.select4
= createStringObject("select 4\r\n",10);
1689 shared
.select5
= createStringObject("select 5\r\n",10);
1690 shared
.select6
= createStringObject("select 6\r\n",10);
1691 shared
.select7
= createStringObject("select 7\r\n",10);
1692 shared
.select8
= createStringObject("select 8\r\n",10);
1693 shared
.select9
= createStringObject("select 9\r\n",10);
1694 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1695 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1696 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1697 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1698 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1699 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1700 shared
.mbulk3
= createStringObject("*3\r\n",4);
1701 shared
.mbulk4
= createStringObject("*4\r\n",4);
1702 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1703 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1704 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1708 static void appendServerSaveParams(time_t seconds
, int changes
) {
1709 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1710 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1711 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1712 server
.saveparamslen
++;
1715 static void resetServerSaveParams() {
1716 zfree(server
.saveparams
);
1717 server
.saveparams
= NULL
;
1718 server
.saveparamslen
= 0;
1721 static void initServerConfig() {
1722 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1723 server
.port
= REDIS_SERVERPORT
;
1724 server
.verbosity
= REDIS_VERBOSE
;
1725 server
.maxidletime
= REDIS_MAXIDLETIME
;
1726 server
.saveparams
= NULL
;
1727 server
.logfile
= NULL
; /* NULL = log on standard output */
1728 server
.bindaddr
= NULL
;
1729 server
.glueoutputbuf
= 1;
1730 server
.daemonize
= 0;
1731 server
.appendonly
= 0;
1732 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1733 server
.no_appendfsync_on_rewrite
= 0;
1734 server
.lastfsync
= time(NULL
);
1735 server
.appendfd
= -1;
1736 server
.appendseldb
= -1; /* Make sure the first time will not match */
1737 server
.pidfile
= zstrdup("/var/run/redis.pid");
1738 server
.dbfilename
= zstrdup("dump.rdb");
1739 server
.appendfilename
= zstrdup("appendonly.aof");
1740 server
.requirepass
= NULL
;
1741 server
.rdbcompression
= 1;
1742 server
.activerehashing
= 1;
1743 server
.maxclients
= 0;
1744 server
.blpop_blocked_clients
= 0;
1745 server
.maxmemory
= 0;
1746 server
.vm_enabled
= 0;
1747 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1748 server
.vm_page_size
= 256; /* 256 bytes per page */
1749 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1750 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1751 server
.vm_max_threads
= 4;
1752 server
.vm_blocked_clients
= 0;
1753 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1754 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1755 server
.shutdown_asap
= 0;
1757 resetServerSaveParams();
1759 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1760 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1761 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1762 /* Replication related */
1764 server
.masterauth
= NULL
;
1765 server
.masterhost
= NULL
;
1766 server
.masterport
= 6379;
1767 server
.master
= NULL
;
1768 server
.replstate
= REDIS_REPL_NONE
;
1770 /* Double constants initialization */
1772 R_PosInf
= 1.0/R_Zero
;
1773 R_NegInf
= -1.0/R_Zero
;
1774 R_Nan
= R_Zero
/R_Zero
;
1777 static void initServer() {
1780 signal(SIGHUP
, SIG_IGN
);
1781 signal(SIGPIPE
, SIG_IGN
);
1782 setupSigSegvAction();
1784 server
.devnull
= fopen("/dev/null","w");
1785 if (server
.devnull
== NULL
) {
1786 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1789 server
.clients
= listCreate();
1790 server
.slaves
= listCreate();
1791 server
.monitors
= listCreate();
1792 server
.objfreelist
= listCreate();
1793 createSharedObjects();
1794 server
.el
= aeCreateEventLoop();
1795 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1796 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1797 if (server
.fd
== -1) {
1798 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1801 for (j
= 0; j
< server
.dbnum
; j
++) {
1802 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1803 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1804 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1805 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1806 if (server
.vm_enabled
)
1807 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1808 server
.db
[j
].id
= j
;
1810 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1811 server
.pubsub_patterns
= listCreate();
1812 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1813 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1814 server
.cronloops
= 0;
1815 server
.bgsavechildpid
= -1;
1816 server
.bgrewritechildpid
= -1;
1817 server
.bgrewritebuf
= sdsempty();
1818 server
.aofbuf
= sdsempty();
1819 server
.lastsave
= time(NULL
);
1821 server
.stat_numcommands
= 0;
1822 server
.stat_numconnections
= 0;
1823 server
.stat_expiredkeys
= 0;
1824 server
.stat_starttime
= time(NULL
);
1825 server
.unixtime
= time(NULL
);
1826 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1827 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1828 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1830 if (server
.appendonly
) {
1831 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1832 if (server
.appendfd
== -1) {
1833 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1839 if (server
.vm_enabled
) vmInit();
1842 /* Empty the whole database */
1843 static long long emptyDb() {
1845 long long removed
= 0;
1847 for (j
= 0; j
< server
.dbnum
; j
++) {
1848 removed
+= dictSize(server
.db
[j
].dict
);
1849 dictEmpty(server
.db
[j
].dict
);
1850 dictEmpty(server
.db
[j
].expires
);
1855 static int yesnotoi(char *s
) {
1856 if (!strcasecmp(s
,"yes")) return 1;
1857 else if (!strcasecmp(s
,"no")) return 0;
1861 /* I agree, this is a very rudimental way to load a configuration...
1862 will improve later if the config gets more complex */
1863 static void loadServerConfig(char *filename
) {
1865 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1869 if (filename
[0] == '-' && filename
[1] == '\0')
1872 if ((fp
= fopen(filename
,"r")) == NULL
) {
1873 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1878 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1884 line
= sdstrim(line
," \t\r\n");
1886 /* Skip comments and blank lines*/
1887 if (line
[0] == '#' || line
[0] == '\0') {
1892 /* Split into arguments */
1893 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1894 sdstolower(argv
[0]);
1896 /* Execute config directives */
1897 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1898 server
.maxidletime
= atoi(argv
[1]);
1899 if (server
.maxidletime
< 0) {
1900 err
= "Invalid timeout value"; goto loaderr
;
1902 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1903 server
.port
= atoi(argv
[1]);
1904 if (server
.port
< 1 || server
.port
> 65535) {
1905 err
= "Invalid port"; goto loaderr
;
1907 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1908 server
.bindaddr
= zstrdup(argv
[1]);
1909 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1910 int seconds
= atoi(argv
[1]);
1911 int changes
= atoi(argv
[2]);
1912 if (seconds
< 1 || changes
< 0) {
1913 err
= "Invalid save parameters"; goto loaderr
;
1915 appendServerSaveParams(seconds
,changes
);
1916 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1917 if (chdir(argv
[1]) == -1) {
1918 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1919 argv
[1], strerror(errno
));
1922 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1923 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1924 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1925 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1926 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1928 err
= "Invalid log level. Must be one of debug, notice, warning";
1931 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1934 server
.logfile
= zstrdup(argv
[1]);
1935 if (!strcasecmp(server
.logfile
,"stdout")) {
1936 zfree(server
.logfile
);
1937 server
.logfile
= NULL
;
1939 if (server
.logfile
) {
1940 /* Test if we are able to open the file. The server will not
1941 * be able to abort just for this problem later... */
1942 logfp
= fopen(server
.logfile
,"a");
1943 if (logfp
== NULL
) {
1944 err
= sdscatprintf(sdsempty(),
1945 "Can't open the log file: %s", strerror(errno
));
1950 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1951 server
.dbnum
= atoi(argv
[1]);
1952 if (server
.dbnum
< 1) {
1953 err
= "Invalid number of databases"; goto loaderr
;
1955 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1956 loadServerConfig(argv
[1]);
1957 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1958 server
.maxclients
= atoi(argv
[1]);
1959 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1960 server
.maxmemory
= memtoll(argv
[1],NULL
);
1961 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1962 server
.masterhost
= sdsnew(argv
[1]);
1963 server
.masterport
= atoi(argv
[2]);
1964 server
.replstate
= REDIS_REPL_CONNECT
;
1965 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1966 server
.masterauth
= zstrdup(argv
[1]);
1967 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1968 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1969 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1971 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1972 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1973 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1975 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1976 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1977 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1979 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1980 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1981 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1983 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1984 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1985 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1987 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1988 zfree(server
.appendfilename
);
1989 server
.appendfilename
= zstrdup(argv
[1]);
1990 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
1992 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
1993 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1995 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1996 if (!strcasecmp(argv
[1],"no")) {
1997 server
.appendfsync
= APPENDFSYNC_NO
;
1998 } else if (!strcasecmp(argv
[1],"always")) {
1999 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
2000 } else if (!strcasecmp(argv
[1],"everysec")) {
2001 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
2003 err
= "argument must be 'no', 'always' or 'everysec'";
2006 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
2007 server
.requirepass
= zstrdup(argv
[1]);
2008 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
2009 zfree(server
.pidfile
);
2010 server
.pidfile
= zstrdup(argv
[1]);
2011 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
2012 zfree(server
.dbfilename
);
2013 server
.dbfilename
= zstrdup(argv
[1]);
2014 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
2015 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
2016 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2018 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
2019 zfree(server
.vm_swap_file
);
2020 server
.vm_swap_file
= zstrdup(argv
[1]);
2021 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
2022 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
2023 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
2024 server
.vm_page_size
= memtoll(argv
[1], NULL
);
2025 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
2026 server
.vm_pages
= memtoll(argv
[1], NULL
);
2027 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
2028 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
2029 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
2030 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
2031 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
2032 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
2034 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
2036 for (j
= 0; j
< argc
; j
++)
2041 if (fp
!= stdin
) fclose(fp
);
2045 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2046 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2047 fprintf(stderr
, ">>> '%s'\n", line
);
2048 fprintf(stderr
, "%s\n", err
);
2052 static void freeClientArgv(redisClient
*c
) {
2055 for (j
= 0; j
< c
->argc
; j
++)
2056 decrRefCount(c
->argv
[j
]);
2057 for (j
= 0; j
< c
->mbargc
; j
++)
2058 decrRefCount(c
->mbargv
[j
]);
2063 static void freeClient(redisClient
*c
) {
2066 /* Note that if the client we are freeing is blocked into a blocking
2067 * call, we have to set querybuf to NULL *before* to call
2068 * unblockClientWaitingData() to avoid processInputBuffer() will get
2069 * called. Also it is important to remove the file events after
2070 * this, because this call adds the READABLE event. */
2071 sdsfree(c
->querybuf
);
2073 if (c
->flags
& REDIS_BLOCKED
)
2074 unblockClientWaitingData(c
);
2076 /* UNWATCH all the keys */
2078 listRelease(c
->watched_keys
);
2079 /* Unsubscribe from all the pubsub channels */
2080 pubsubUnsubscribeAllChannels(c
,0);
2081 pubsubUnsubscribeAllPatterns(c
,0);
2082 dictRelease(c
->pubsub_channels
);
2083 listRelease(c
->pubsub_patterns
);
2084 /* Obvious cleanup */
2085 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2086 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2087 listRelease(c
->reply
);
2090 /* Remove from the list of clients */
2091 ln
= listSearchKey(server
.clients
,c
);
2092 redisAssert(ln
!= NULL
);
2093 listDelNode(server
.clients
,ln
);
2094 /* Remove from the list of clients that are now ready to be restarted
2095 * after waiting for swapped keys */
2096 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2097 ln
= listSearchKey(server
.io_ready_clients
,c
);
2099 listDelNode(server
.io_ready_clients
,ln
);
2100 server
.vm_blocked_clients
--;
2103 /* Remove from the list of clients waiting for swapped keys */
2104 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2105 ln
= listFirst(c
->io_keys
);
2106 dontWaitForSwappedKey(c
,ln
->value
);
2108 listRelease(c
->io_keys
);
2109 /* Master/slave cleanup */
2110 if (c
->flags
& REDIS_SLAVE
) {
2111 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2113 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2114 ln
= listSearchKey(l
,c
);
2115 redisAssert(ln
!= NULL
);
2118 if (c
->flags
& REDIS_MASTER
) {
2119 server
.master
= NULL
;
2120 server
.replstate
= REDIS_REPL_CONNECT
;
2122 /* Release memory */
2125 freeClientMultiState(c
);
2129 #define GLUEREPLY_UP_TO (1024)
2130 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2132 char buf
[GLUEREPLY_UP_TO
];
2137 listRewind(c
->reply
,&li
);
2138 while((ln
= listNext(&li
))) {
2142 objlen
= sdslen(o
->ptr
);
2143 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2144 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2146 listDelNode(c
->reply
,ln
);
2148 if (copylen
== 0) return;
2152 /* Now the output buffer is empty, add the new single element */
2153 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2154 listAddNodeHead(c
->reply
,o
);
2157 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2158 redisClient
*c
= privdata
;
2159 int nwritten
= 0, totwritten
= 0, objlen
;
2162 REDIS_NOTUSED(mask
);
2164 /* Use writev() if we have enough buffers to send */
2165 if (!server
.glueoutputbuf
&&
2166 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2167 !(c
->flags
& REDIS_MASTER
))
2169 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2173 while(listLength(c
->reply
)) {
2174 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2175 glueReplyBuffersIfNeeded(c
);
2177 o
= listNodeValue(listFirst(c
->reply
));
2178 objlen
= sdslen(o
->ptr
);
2181 listDelNode(c
->reply
,listFirst(c
->reply
));
2185 if (c
->flags
& REDIS_MASTER
) {
2186 /* Don't reply to a master */
2187 nwritten
= objlen
- c
->sentlen
;
2189 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2190 if (nwritten
<= 0) break;
2192 c
->sentlen
+= nwritten
;
2193 totwritten
+= nwritten
;
2194 /* If we fully sent the object on head go to the next one */
2195 if (c
->sentlen
== objlen
) {
2196 listDelNode(c
->reply
,listFirst(c
->reply
));
2199 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2200 * bytes, in a single threaded server it's a good idea to serve
2201 * other clients as well, even if a very large request comes from
2202 * super fast link that is always able to accept data (in real world
2203 * scenario think about 'KEYS *' against the loopback interfae) */
2204 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2206 if (nwritten
== -1) {
2207 if (errno
== EAGAIN
) {
2210 redisLog(REDIS_VERBOSE
,
2211 "Error writing to client: %s", strerror(errno
));
2216 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2217 if (listLength(c
->reply
) == 0) {
2219 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2223 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2225 redisClient
*c
= privdata
;
2226 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2228 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2229 int offset
, ion
= 0;
2231 REDIS_NOTUSED(mask
);
2234 while (listLength(c
->reply
)) {
2235 offset
= c
->sentlen
;
2239 /* fill-in the iov[] array */
2240 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2241 o
= listNodeValue(node
);
2242 objlen
= sdslen(o
->ptr
);
2244 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2247 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2248 break; /* no more iovecs */
2250 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2251 iov
[ion
].iov_len
= objlen
- offset
;
2252 willwrite
+= objlen
- offset
;
2253 offset
= 0; /* just for the first item */
2260 /* write all collected blocks at once */
2261 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2262 if (errno
!= EAGAIN
) {
2263 redisLog(REDIS_VERBOSE
,
2264 "Error writing to client: %s", strerror(errno
));
2271 totwritten
+= nwritten
;
2272 offset
= c
->sentlen
;
2274 /* remove written robjs from c->reply */
2275 while (nwritten
&& listLength(c
->reply
)) {
2276 o
= listNodeValue(listFirst(c
->reply
));
2277 objlen
= sdslen(o
->ptr
);
2279 if(nwritten
>= objlen
- offset
) {
2280 listDelNode(c
->reply
, listFirst(c
->reply
));
2281 nwritten
-= objlen
- offset
;
2285 c
->sentlen
+= nwritten
;
2293 c
->lastinteraction
= time(NULL
);
2295 if (listLength(c
->reply
) == 0) {
2297 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2301 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2303 ((struct redisCommand
*)r1
)->name
,
2304 ((struct redisCommand
*)r2
)->name
);
2307 static void sortCommandTable() {
2308 /* Copy and sort the read-only version of the command table */
2309 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2310 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2312 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2313 sizeof(struct redisCommand
),qsortRedisCommands
);
2316 static struct redisCommand
*lookupCommand(char *name
) {
2317 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2321 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2322 sizeof(struct redisCommand
),
2323 qsortRedisCommands
);
2326 /* resetClient prepare the client to process the next command */
2327 static void resetClient(redisClient
*c
) {
2333 /* Call() is the core of Redis execution of a command */
2334 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2337 dirty
= server
.dirty
;
2339 dirty
= server
.dirty
-dirty
;
2341 if (server
.appendonly
&& dirty
)
2342 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2343 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2344 listLength(server
.slaves
))
2345 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2346 if (listLength(server
.monitors
))
2347 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2348 server
.stat_numcommands
++;
2351 /* If this function gets called we already read a whole
2352 * command, argments are in the client argv/argc fields.
2353 * processCommand() execute the command or prepare the
2354 * server for a bulk read from the client.
2356 * If 1 is returned the client is still alive and valid and
2357 * and other operations can be performed by the caller. Otherwise
2358 * if 0 is returned the client was destroied (i.e. after QUIT). */
2359 static int processCommand(redisClient
*c
) {
2360 struct redisCommand
*cmd
;
2362 /* Free some memory if needed (maxmemory setting) */
2363 if (server
.maxmemory
) freeMemoryIfNeeded();
2365 /* Handle the multi bulk command type. This is an alternative protocol
2366 * supported by Redis in order to receive commands that are composed of
2367 * multiple binary-safe "bulk" arguments. The latency of processing is
2368 * a bit higher but this allows things like multi-sets, so if this
2369 * protocol is used only for MSET and similar commands this is a big win. */
2370 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2371 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2372 if (c
->multibulk
<= 0) {
2376 decrRefCount(c
->argv
[c
->argc
-1]);
2380 } else if (c
->multibulk
) {
2381 if (c
->bulklen
== -1) {
2382 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2383 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2387 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2388 decrRefCount(c
->argv
[0]);
2389 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2391 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2396 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2400 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2401 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2405 if (c
->multibulk
== 0) {
2409 /* Here we need to swap the multi-bulk argc/argv with the
2410 * normal argc/argv of the client structure. */
2412 c
->argv
= c
->mbargv
;
2413 c
->mbargv
= auxargv
;
2416 c
->argc
= c
->mbargc
;
2417 c
->mbargc
= auxargc
;
2419 /* We need to set bulklen to something different than -1
2420 * in order for the code below to process the command without
2421 * to try to read the last argument of a bulk command as
2422 * a special argument. */
2424 /* continue below and process the command */
2431 /* -- end of multi bulk commands processing -- */
2433 /* The QUIT command is handled as a special case. Normal command
2434 * procs are unable to close the client connection safely */
2435 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2440 /* Now lookup the command and check ASAP about trivial error conditions
2441 * such wrong arity, bad command name and so forth. */
2442 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2445 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2446 (char*)c
->argv
[0]->ptr
));
2449 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2450 (c
->argc
< -cmd
->arity
)) {
2452 sdscatprintf(sdsempty(),
2453 "-ERR wrong number of arguments for '%s' command\r\n",
2457 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2458 /* This is a bulk command, we have to read the last argument yet. */
2459 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2461 decrRefCount(c
->argv
[c
->argc
-1]);
2462 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2464 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2469 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2470 /* It is possible that the bulk read is already in the
2471 * buffer. Check this condition and handle it accordingly.
2472 * This is just a fast path, alternative to call processInputBuffer().
2473 * It's a good idea since the code is small and this condition
2474 * happens most of the times. */
2475 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2476 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2478 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2480 /* Otherwise return... there is to read the last argument
2481 * from the socket. */
2485 /* Let's try to encode the bulk object to save space. */
2486 if (cmd
->flags
& REDIS_CMD_BULK
)
2487 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2489 /* Check if the user is authenticated */
2490 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2491 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2496 /* Handle the maxmemory directive */
2497 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2498 zmalloc_used_memory() > server
.maxmemory
)
2500 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2505 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2506 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2508 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2509 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2510 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2515 /* Exec the command */
2516 if (c
->flags
& REDIS_MULTI
&&
2517 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2518 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2520 queueMultiCommand(c
,cmd
);
2521 addReply(c
,shared
.queued
);
2523 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2524 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2528 /* Prepare the client for the next command */
2533 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2538 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2539 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2540 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2541 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2544 if (argc
<= REDIS_STATIC_ARGS
) {
2547 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2550 lenobj
= createObject(REDIS_STRING
,
2551 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2552 lenobj
->refcount
= 0;
2553 outv
[outc
++] = lenobj
;
2554 for (j
= 0; j
< argc
; j
++) {
2555 lenobj
= createObject(REDIS_STRING
,
2556 sdscatprintf(sdsempty(),"$%lu\r\n",
2557 (unsigned long) stringObjectLen(argv
[j
])));
2558 lenobj
->refcount
= 0;
2559 outv
[outc
++] = lenobj
;
2560 outv
[outc
++] = argv
[j
];
2561 outv
[outc
++] = shared
.crlf
;
2564 /* Increment all the refcounts at start and decrement at end in order to
2565 * be sure to free objects if there is no slave in a replication state
2566 * able to be feed with commands */
2567 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2568 listRewind(slaves
,&li
);
2569 while((ln
= listNext(&li
))) {
2570 redisClient
*slave
= ln
->value
;
2572 /* Don't feed slaves that are still waiting for BGSAVE to start */
2573 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2575 /* Feed all the other slaves, MONITORs and so on */
2576 if (slave
->slaveseldb
!= dictid
) {
2580 case 0: selectcmd
= shared
.select0
; break;
2581 case 1: selectcmd
= shared
.select1
; break;
2582 case 2: selectcmd
= shared
.select2
; break;
2583 case 3: selectcmd
= shared
.select3
; break;
2584 case 4: selectcmd
= shared
.select4
; break;
2585 case 5: selectcmd
= shared
.select5
; break;
2586 case 6: selectcmd
= shared
.select6
; break;
2587 case 7: selectcmd
= shared
.select7
; break;
2588 case 8: selectcmd
= shared
.select8
; break;
2589 case 9: selectcmd
= shared
.select9
; break;
2591 selectcmd
= createObject(REDIS_STRING
,
2592 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2593 selectcmd
->refcount
= 0;
2596 addReply(slave
,selectcmd
);
2597 slave
->slaveseldb
= dictid
;
2599 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2601 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2602 if (outv
!= static_outv
) zfree(outv
);
2605 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2606 s
= sdscatlen(s
,"\"",1);
2611 s
= sdscatprintf(s
,"\\%c",*p
);
2613 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2614 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2615 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2616 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2617 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2620 s
= sdscatprintf(s
,"%c",*p
);
2622 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2627 return sdscatlen(s
,"\"",1);
2630 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2634 sds cmdrepr
= sdsnew("+");
2638 gettimeofday(&tv
,NULL
);
2639 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2640 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2642 for (j
= 0; j
< argc
; j
++) {
2643 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2644 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2646 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2647 sdslen(argv
[j
]->ptr
));
2650 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2652 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2653 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2655 listRewind(monitors
,&li
);
2656 while((ln
= listNext(&li
))) {
2657 redisClient
*monitor
= ln
->value
;
2658 addReply(monitor
,cmdobj
);
2660 decrRefCount(cmdobj
);
2663 static void processInputBuffer(redisClient
*c
) {
2665 /* Before to process the input buffer, make sure the client is not
2666 * waitig for a blocking operation such as BLPOP. Note that the first
2667 * iteration the client is never blocked, otherwise the processInputBuffer
2668 * would not be called at all, but after the execution of the first commands
2669 * in the input buffer the client may be blocked, and the "goto again"
2670 * will try to reiterate. The following line will make it return asap. */
2671 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2672 if (c
->bulklen
== -1) {
2673 /* Read the first line of the query */
2674 char *p
= strchr(c
->querybuf
,'\n');
2681 query
= c
->querybuf
;
2682 c
->querybuf
= sdsempty();
2683 querylen
= 1+(p
-(query
));
2684 if (sdslen(query
) > querylen
) {
2685 /* leave data after the first line of the query in the buffer */
2686 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2688 *p
= '\0'; /* remove "\n" */
2689 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2690 sdsupdatelen(query
);
2692 /* Now we can split the query in arguments */
2693 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2696 if (c
->argv
) zfree(c
->argv
);
2697 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2699 for (j
= 0; j
< argc
; j
++) {
2700 if (sdslen(argv
[j
])) {
2701 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2709 /* Execute the command. If the client is still valid
2710 * after processCommand() return and there is something
2711 * on the query buffer try to process the next command. */
2712 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2714 /* Nothing to process, argc == 0. Just process the query
2715 * buffer if it's not empty or return to the caller */
2716 if (sdslen(c
->querybuf
)) goto again
;
2719 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2720 redisLog(REDIS_VERBOSE
, "Client protocol error");
2725 /* Bulk read handling. Note that if we are at this point
2726 the client already sent a command terminated with a newline,
2727 we are reading the bulk data that is actually the last
2728 argument of the command. */
2729 int qbl
= sdslen(c
->querybuf
);
2731 if (c
->bulklen
<= qbl
) {
2732 /* Copy everything but the final CRLF as final argument */
2733 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2735 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2736 /* Process the command. If the client is still valid after
2737 * the processing and there is more data in the buffer
2738 * try to parse it. */
2739 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2745 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2746 redisClient
*c
= (redisClient
*) privdata
;
2747 char buf
[REDIS_IOBUF_LEN
];
2750 REDIS_NOTUSED(mask
);
2752 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2754 if (errno
== EAGAIN
) {
2757 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2761 } else if (nread
== 0) {
2762 redisLog(REDIS_VERBOSE
, "Client closed connection");
2767 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2768 c
->lastinteraction
= time(NULL
);
2772 processInputBuffer(c
);
2775 static int selectDb(redisClient
*c
, int id
) {
2776 if (id
< 0 || id
>= server
.dbnum
)
2778 c
->db
= &server
.db
[id
];
2782 static void *dupClientReplyValue(void *o
) {
2783 incrRefCount((robj
*)o
);
2787 static int listMatchObjects(void *a
, void *b
) {
2788 return equalStringObjects(a
,b
);
2791 static redisClient
*createClient(int fd
) {
2792 redisClient
*c
= zmalloc(sizeof(*c
));
2794 anetNonBlock(NULL
,fd
);
2795 anetTcpNoDelay(NULL
,fd
);
2796 if (!c
) return NULL
;
2799 c
->querybuf
= sdsempty();
2808 c
->lastinteraction
= time(NULL
);
2809 c
->authenticated
= 0;
2810 c
->replstate
= REDIS_REPL_NONE
;
2811 c
->reply
= listCreate();
2812 listSetFreeMethod(c
->reply
,decrRefCount
);
2813 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2814 c
->blocking_keys
= NULL
;
2815 c
->blocking_keys_num
= 0;
2816 c
->io_keys
= listCreate();
2817 c
->watched_keys
= listCreate();
2818 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2819 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2820 c
->pubsub_patterns
= listCreate();
2821 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2822 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2823 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2824 readQueryFromClient
, c
) == AE_ERR
) {
2828 listAddNodeTail(server
.clients
,c
);
2829 initClientMultiState(c
);
2833 static void addReply(redisClient
*c
, robj
*obj
) {
2834 if (listLength(c
->reply
) == 0 &&
2835 (c
->replstate
== REDIS_REPL_NONE
||
2836 c
->replstate
== REDIS_REPL_ONLINE
) &&
2837 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2838 sendReplyToClient
, c
) == AE_ERR
) return;
2840 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2841 obj
= dupStringObject(obj
);
2842 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2844 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2847 static void addReplySds(redisClient
*c
, sds s
) {
2848 robj
*o
= createObject(REDIS_STRING
,s
);
2853 static void addReplyDouble(redisClient
*c
, double d
) {
2856 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2857 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2858 (unsigned long) strlen(buf
),buf
));
2861 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2866 addReply(c
,shared
.czero
);
2868 } else if (ll
== 1) {
2869 addReply(c
,shared
.cone
);
2873 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2876 addReplySds(c
,sdsnewlen(buf
,len
+3));
2879 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2884 addReply(c
,shared
.czero
);
2886 } else if (ul
== 1) {
2887 addReply(c
,shared
.cone
);
2890 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2891 addReplySds(c
,sdsnewlen(buf
,len
));
2894 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2898 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2899 len
= sdslen(obj
->ptr
);
2901 long n
= (long)obj
->ptr
;
2903 /* Compute how many bytes will take this integer as a radix 10 string */
2909 while((n
= n
/10) != 0) {
2914 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2915 buf
[intlen
+1] = '\r';
2916 buf
[intlen
+2] = '\n';
2917 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2920 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2921 addReplyBulkLen(c
,obj
);
2923 addReply(c
,shared
.crlf
);
2926 static void addReplyBulkSds(redisClient
*c
, sds s
) {
2927 robj
*o
= createStringObject(s
, sdslen(s
));
2932 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2933 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2935 addReply(c
,shared
.nullbulk
);
2937 robj
*o
= createStringObject(s
,strlen(s
));
2943 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2948 REDIS_NOTUSED(mask
);
2949 REDIS_NOTUSED(privdata
);
2951 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2952 if (cfd
== AE_ERR
) {
2953 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2956 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2957 if ((c
= createClient(cfd
)) == NULL
) {
2958 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2959 close(cfd
); /* May be already closed, just ingore errors */
2962 /* If maxclient directive is set and this is one client more... close the
2963 * connection. Note that we create the client instead to check before
2964 * for this condition, since now the socket is already set in nonblocking
2965 * mode and we can send an error for free using the Kernel I/O */
2966 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2967 char *err
= "-ERR max number of clients reached\r\n";
2969 /* That's a best effort error message, don't check write errors */
2970 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2971 /* Nothing to do, Just to avoid the warning... */
2976 server
.stat_numconnections
++;
2979 /* ======================= Redis objects implementation ===================== */
2981 static robj
*createObject(int type
, void *ptr
) {
2984 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2985 if (listLength(server
.objfreelist
)) {
2986 listNode
*head
= listFirst(server
.objfreelist
);
2987 o
= listNodeValue(head
);
2988 listDelNode(server
.objfreelist
,head
);
2989 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2991 if (server
.vm_enabled
)
2992 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2993 o
= zmalloc(sizeof(*o
));
2996 o
->encoding
= REDIS_ENCODING_RAW
;
2999 if (server
.vm_enabled
) {
3000 /* Note that this code may run in the context of an I/O thread
3001 * and accessing server.lruclock in theory is an error
3002 * (no locks). But in practice this is safe, and even if we read
3003 * garbage Redis will not fail. */
3004 o
->lru
= server
.lruclock
;
3005 o
->storage
= REDIS_VM_MEMORY
;
3010 static robj
*createStringObject(char *ptr
, size_t len
) {
3011 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
3014 static robj
*createStringObjectFromLongLong(long long value
) {
3016 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3017 incrRefCount(shared
.integers
[value
]);
3018 o
= shared
.integers
[value
];
3020 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
3021 o
= createObject(REDIS_STRING
, NULL
);
3022 o
->encoding
= REDIS_ENCODING_INT
;
3023 o
->ptr
= (void*)((long)value
);
3025 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
3031 static robj
*dupStringObject(robj
*o
) {
3032 assert(o
->encoding
== REDIS_ENCODING_RAW
);
3033 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
3036 static robj
*createListObject(void) {
3037 list
*l
= listCreate();
3039 listSetFreeMethod(l
,decrRefCount
);
3040 return createObject(REDIS_LIST
,l
);
3043 static robj
*createSetObject(void) {
3044 dict
*d
= dictCreate(&setDictType
,NULL
);
3045 return createObject(REDIS_SET
,d
);
3048 static robj
*createHashObject(void) {
3049 /* All the Hashes start as zipmaps. Will be automatically converted
3050 * into hash tables if there are enough elements or big elements
3052 unsigned char *zm
= zipmapNew();
3053 robj
*o
= createObject(REDIS_HASH
,zm
);
3054 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3058 static robj
*createZsetObject(void) {
3059 zset
*zs
= zmalloc(sizeof(*zs
));
3061 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3062 zs
->zsl
= zslCreate();
3063 return createObject(REDIS_ZSET
,zs
);
3066 static void freeStringObject(robj
*o
) {
3067 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3072 static void freeListObject(robj
*o
) {
3073 listRelease((list
*) o
->ptr
);
3076 static void freeSetObject(robj
*o
) {
3077 dictRelease((dict
*) o
->ptr
);
3080 static void freeZsetObject(robj
*o
) {
3083 dictRelease(zs
->dict
);
3088 static void freeHashObject(robj
*o
) {
3089 switch (o
->encoding
) {
3090 case REDIS_ENCODING_HT
:
3091 dictRelease((dict
*) o
->ptr
);
3093 case REDIS_ENCODING_ZIPMAP
:
3097 redisPanic("Unknown hash encoding type");
3102 static void incrRefCount(robj
*o
) {
3106 static void decrRefCount(void *obj
) {
3109 /* Object is a swapped out value, or in the process of being loaded. */
3110 if (server
.vm_enabled
&&
3111 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3113 vmpointer
*vp
= obj
;
3114 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
);
3115 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
3116 server
.vm_stats_swapped_objects
--;
3121 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3122 /* Object is in memory, or in the process of being swapped out.
3124 * If the object is being swapped out, abort the operation on
3125 * decrRefCount even if the refcount does not drop to 0: the object
3126 * is referenced at least two times, as value of the key AND as
3127 * job->val in the iojob. So if we don't invalidate the iojob, when it is
3128 * done but the relevant key was removed in the meantime, the
3129 * complete jobs handler will not find the key about the job and the
3130 * assert will fail. */
3131 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3132 vmCancelThreadedIOJob(o
);
3133 if (--(o
->refcount
) == 0) {
3135 case REDIS_STRING
: freeStringObject(o
); break;
3136 case REDIS_LIST
: freeListObject(o
); break;
3137 case REDIS_SET
: freeSetObject(o
); break;
3138 case REDIS_ZSET
: freeZsetObject(o
); break;
3139 case REDIS_HASH
: freeHashObject(o
); break;
3140 default: redisPanic("Unknown object type"); break;
3142 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3143 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3144 !listAddNodeHead(server
.objfreelist
,o
))
3146 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3150 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3151 if (o
->type
!= type
) {
3152 addReply(c
,shared
.wrongtypeerr
);
3158 /* Check if the nul-terminated string 's' can be represented by a long
3159 * (that is, is a number that fits into long without any other space or
3160 * character before or after the digits).
3162 * If so, the function returns REDIS_OK and *longval is set to the value
3163 * of the number. Otherwise REDIS_ERR is returned */
3164 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3165 char buf
[32], *endptr
;
3169 value
= strtol(s
, &endptr
, 10);
3170 if (endptr
[0] != '\0') return REDIS_ERR
;
3171 slen
= ll2string(buf
,32,value
);
3173 /* If the number converted back into a string is not identical
3174 * then it's not possible to encode the string as integer */
3175 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3176 if (longval
) *longval
= value
;
3180 /* Try to encode a string object in order to save space */
3181 static robj
*tryObjectEncoding(robj
*o
) {
3185 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3186 return o
; /* Already encoded */
3188 /* It's not safe to encode shared objects: shared objects can be shared
3189 * everywhere in the "object space" of Redis. Encoded objects can only
3190 * appear as "values" (and not, for instance, as keys) */
3191 if (o
->refcount
> 1) return o
;
3193 /* Currently we try to encode only strings */
3194 redisAssert(o
->type
== REDIS_STRING
);
3196 /* Check if we can represent this string as a long integer */
3197 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3199 /* Ok, this object can be encoded */
3200 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3202 incrRefCount(shared
.integers
[value
]);
3203 return shared
.integers
[value
];
3205 o
->encoding
= REDIS_ENCODING_INT
;
3207 o
->ptr
= (void*) value
;
3212 /* Get a decoded version of an encoded object (returned as a new object).
3213 * If the object is already raw-encoded just increment the ref count. */
3214 static robj
*getDecodedObject(robj
*o
) {
3217 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3221 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3224 ll2string(buf
,32,(long)o
->ptr
);
3225 dec
= createStringObject(buf
,strlen(buf
));
3228 redisPanic("Unknown encoding type");
3232 /* Compare two string objects via strcmp() or alike.
3233 * Note that the objects may be integer-encoded. In such a case we
3234 * use ll2string() to get a string representation of the numbers on the stack
3235 * and compare the strings, it's much faster than calling getDecodedObject().
3237 * Important note: if objects are not integer encoded, but binary-safe strings,
3238 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3240 static int compareStringObjects(robj
*a
, robj
*b
) {
3241 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3242 char bufa
[128], bufb
[128], *astr
, *bstr
;
3245 if (a
== b
) return 0;
3246 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3247 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3253 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3254 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3260 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3263 /* Equal string objects return 1 if the two objects are the same from the
3264 * point of view of a string comparison, otherwise 0 is returned. Note that
3265 * this function is faster then checking for (compareStringObject(a,b) == 0)
3266 * because it can perform some more optimization. */
3267 static int equalStringObjects(robj
*a
, robj
*b
) {
3268 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3269 return a
->ptr
== b
->ptr
;
3271 return compareStringObjects(a
,b
) == 0;
3275 static size_t stringObjectLen(robj
*o
) {
3276 redisAssert(o
->type
== REDIS_STRING
);
3277 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3278 return sdslen(o
->ptr
);
3282 return ll2string(buf
,32,(long)o
->ptr
);
3286 static int getDoubleFromObject(robj
*o
, double *target
) {
3293 redisAssert(o
->type
== REDIS_STRING
);
3294 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3295 value
= strtod(o
->ptr
, &eptr
);
3296 if (eptr
[0] != '\0') return REDIS_ERR
;
3297 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3298 value
= (long)o
->ptr
;
3300 redisPanic("Unknown string encoding");
3308 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3310 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3312 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3314 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3323 static int getLongLongFromObject(robj
*o
, long long *target
) {
3330 redisAssert(o
->type
== REDIS_STRING
);
3331 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3332 value
= strtoll(o
->ptr
, &eptr
, 10);
3333 if (eptr
[0] != '\0') return REDIS_ERR
;
3334 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3335 value
= (long)o
->ptr
;
3337 redisPanic("Unknown string encoding");
3345 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3347 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3349 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3351 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3360 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3363 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3364 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3366 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3368 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3377 /* =========================== Keyspace access API ========================== */
3379 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3380 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
3382 robj
*val
= dictGetEntryVal(de
);
3384 if (server
.vm_enabled
) {
3385 if (val
->storage
== REDIS_VM_MEMORY
||
3386 val
->storage
== REDIS_VM_SWAPPING
)
3388 /* If we were swapping the object out, cancel the operation */
3389 if (val
->storage
== REDIS_VM_SWAPPING
)
3390 vmCancelThreadedIOJob(val
);
3391 /* Update the access time for the aging algorithm. */
3392 val
->lru
= server
.lruclock
;
3394 int notify
= (val
->storage
== REDIS_VM_LOADING
);
3396 /* Our value was swapped on disk. Bring it at home. */
3397 redisAssert(val
->type
== REDIS_VMPOINTER
);
3398 val
= vmLoadObject(val
);
3399 dictGetEntryVal(de
) = val
;
3401 /* Clients blocked by the VM subsystem may be waiting for
3403 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3412 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3413 expireIfNeeded(db
,key
);
3414 return lookupKey(db
,key
);
3417 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3418 deleteIfVolatile(db
,key
);
3419 touchWatchedKey(db
,key
);
3420 return lookupKey(db
,key
);
3423 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3424 robj
*o
= lookupKeyRead(c
->db
, key
);
3425 if (!o
) addReply(c
,reply
);
3429 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3430 robj
*o
= lookupKeyWrite(c
->db
, key
);
3431 if (!o
) addReply(c
,reply
);
3435 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
3436 * otherwise REDIS_OK is returned, and the caller should increment the
3437 * refcount of 'val'. */
3438 static int dbAdd(redisDb
*db
, robj
*key
, robj
*val
) {
3439 /* Perform a lookup before adding the key, as we need to copy the
3441 if (dictFind(db
->dict
, key
->ptr
) != NULL
) {
3444 sds copy
= sdsdup(key
->ptr
);
3445 dictAdd(db
->dict
, copy
, val
);
3450 /* If the key does not exist, this is just like dbAdd(). Otherwise
3451 * the value associated to the key is replaced with the new one.
3453 * On update (key already existed) 0 is returned. Otherwise 1. */
3454 static int dbReplace(redisDb
*db
, robj
*key
, robj
*val
) {
3455 if (dictFind(db
->dict
,key
->ptr
) == NULL
) {
3456 sds copy
= sdsdup(key
->ptr
);
3457 dictAdd(db
->dict
, copy
, val
);
3460 dictReplace(db
->dict
, key
->ptr
, val
);
3465 static int dbExists(redisDb
*db
, robj
*key
) {
3466 return dictFind(db
->dict
,key
->ptr
) != NULL
;
3469 /* Return a random key, in form of a Redis object.
3470 * If there are no keys, NULL is returned.
3472 * The function makes sure to return keys not already expired. */
3473 static robj
*dbRandomKey(redisDb
*db
) {
3474 struct dictEntry
*de
;
3480 de
= dictGetRandomKey(db
->dict
);
3481 if (de
== NULL
) return NULL
;
3483 key
= dictGetEntryKey(de
);
3484 keyobj
= createStringObject(key
,sdslen(key
));
3485 if (dictFind(db
->expires
,key
)) {
3486 if (expireIfNeeded(db
,keyobj
)) {
3487 decrRefCount(keyobj
);
3488 continue; /* search for another key. This expired. */
3495 /* Delete a key, value, and associated expiration entry if any, from the DB */
3496 static int dbDelete(redisDb
*db
, robj
*key
) {
3499 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
);
3500 retval
= dictDelete(db
->dict
,key
->ptr
);
3502 return retval
== DICT_OK
;
3505 /*============================ RDB saving/loading =========================== */
3507 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3508 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3512 static int rdbSaveTime(FILE *fp
, time_t t
) {
3513 int32_t t32
= (int32_t) t
;
3514 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3518 /* check rdbLoadLen() comments for more info */
3519 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3520 unsigned char buf
[2];
3523 /* Save a 6 bit len */
3524 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3525 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3526 } else if (len
< (1<<14)) {
3527 /* Save a 14 bit len */
3528 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3530 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3532 /* Save a 32 bit len */
3533 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3534 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3536 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3541 /* Encode 'value' as an integer if possible (if integer will fit the
3542 * supported range). If the function sucessful encoded the integer
3543 * then the (up to 5 bytes) encoded representation is written in the
3544 * string pointed by 'enc' and the length is returned. Otherwise
3546 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3547 /* Finally check if it fits in our ranges */
3548 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3549 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3550 enc
[1] = value
&0xFF;
3552 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3553 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3554 enc
[1] = value
&0xFF;
3555 enc
[2] = (value
>>8)&0xFF;
3557 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3558 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3559 enc
[1] = value
&0xFF;
3560 enc
[2] = (value
>>8)&0xFF;
3561 enc
[3] = (value
>>16)&0xFF;
3562 enc
[4] = (value
>>24)&0xFF;
3569 /* String objects in the form "2391" "-100" without any space and with a
3570 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3571 * encoded as integers to save space */
3572 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3574 char *endptr
, buf
[32];
3576 /* Check if it's possible to encode this value as a number */
3577 value
= strtoll(s
, &endptr
, 10);
3578 if (endptr
[0] != '\0') return 0;
3579 ll2string(buf
,32,value
);
3581 /* If the number converted back into a string is not identical
3582 * then it's not possible to encode the string as integer */
3583 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3585 return rdbEncodeInteger(value
,enc
);
3588 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3589 size_t comprlen
, outlen
;
3593 /* We require at least four bytes compression for this to be worth it */
3594 if (len
<= 4) return 0;
3596 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3597 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3598 if (comprlen
== 0) {
3602 /* Data compressed! Let's save it on disk */
3603 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3604 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3605 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3606 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3607 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3616 /* Save a string objet as [len][data] on disk. If the object is a string
3617 * representation of an integer value we try to safe it in a special form */
3618 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3621 /* Try integer encoding */
3623 unsigned char buf
[5];
3624 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3625 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3630 /* Try LZF compression - under 20 bytes it's unable to compress even
3631 * aaaaaaaaaaaaaaaaaa so skip it */
3632 if (server
.rdbcompression
&& len
> 20) {
3635 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3636 if (retval
== -1) return -1;
3637 if (retval
> 0) return 0;
3638 /* retval == 0 means data can't be compressed, save the old way */
3641 /* Store verbatim */
3642 if (rdbSaveLen(fp
,len
) == -1) return -1;
3643 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3647 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3648 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3651 /* Avoid to decode the object, then encode it again, if the
3652 * object is alrady integer encoded. */
3653 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3654 long val
= (long) obj
->ptr
;
3655 unsigned char buf
[5];
3658 if ((enclen
= rdbEncodeInteger(val
,buf
)) > 0) {
3659 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3662 /* otherwise... fall throught and continue with the usual
3666 /* Avoid incr/decr ref count business when possible.
3667 * This plays well with copy-on-write given that we are probably
3668 * in a child process (BGSAVE). Also this makes sure key objects
3669 * of swapped objects are not incRefCount-ed (an assert does not allow
3670 * this in order to avoid bugs) */
3671 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3672 obj
= getDecodedObject(obj
);
3673 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3676 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3681 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3682 * 8 bit integer specifing the length of the representation.
3683 * This 8 bit integer has special values in order to specify the following
3689 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3690 unsigned char buf
[128];
3696 } else if (!isfinite(val
)) {
3698 buf
[0] = (val
< 0) ? 255 : 254;
3700 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3701 /* Check if the float is in a safe range to be casted into a
3702 * long long. We are assuming that long long is 64 bit here.
3703 * Also we are assuming that there are no implementations around where
3704 * double has precision < 52 bit.
3706 * Under this assumptions we test if a double is inside an interval
3707 * where casting to long long is safe. Then using two castings we
3708 * make sure the decimal part is zero. If all this is true we use
3709 * integer printing function that is much faster. */
3710 double min
= -4503599627370495; /* (2^52)-1 */
3711 double max
= 4503599627370496; /* -(2^52) */
3712 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3713 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3716 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3717 buf
[0] = strlen((char*)buf
+1);
3720 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3724 /* Save a Redis object. */
3725 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3726 if (o
->type
== REDIS_STRING
) {
3727 /* Save a string value */
3728 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3729 } else if (o
->type
== REDIS_LIST
) {
3730 /* Save a list value */
3731 list
*list
= o
->ptr
;
3735 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3736 listRewind(list
,&li
);
3737 while((ln
= listNext(&li
))) {
3738 robj
*eleobj
= listNodeValue(ln
);
3740 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3742 } else if (o
->type
== REDIS_SET
) {
3743 /* Save a set value */
3745 dictIterator
*di
= dictGetIterator(set
);
3748 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3749 while((de
= dictNext(di
)) != NULL
) {
3750 robj
*eleobj
= dictGetEntryKey(de
);
3752 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3754 dictReleaseIterator(di
);
3755 } else if (o
->type
== REDIS_ZSET
) {
3756 /* Save a set value */
3758 dictIterator
*di
= dictGetIterator(zs
->dict
);
3761 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3762 while((de
= dictNext(di
)) != NULL
) {
3763 robj
*eleobj
= dictGetEntryKey(de
);
3764 double *score
= dictGetEntryVal(de
);
3766 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3767 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3769 dictReleaseIterator(di
);
3770 } else if (o
->type
== REDIS_HASH
) {
3771 /* Save a hash value */
3772 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3773 unsigned char *p
= zipmapRewind(o
->ptr
);
3774 unsigned int count
= zipmapLen(o
->ptr
);
3775 unsigned char *key
, *val
;
3776 unsigned int klen
, vlen
;
3778 if (rdbSaveLen(fp
,count
) == -1) return -1;
3779 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3780 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3781 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3784 dictIterator
*di
= dictGetIterator(o
->ptr
);
3787 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3788 while((de
= dictNext(di
)) != NULL
) {
3789 robj
*key
= dictGetEntryKey(de
);
3790 robj
*val
= dictGetEntryVal(de
);
3792 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3793 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3795 dictReleaseIterator(di
);
3798 redisPanic("Unknown object type");
3803 /* Return the length the object will have on disk if saved with
3804 * the rdbSaveObject() function. Currently we use a trick to get
3805 * this length with very little changes to the code. In the future
3806 * we could switch to a faster solution. */
3807 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3808 if (fp
== NULL
) fp
= server
.devnull
;
3810 assert(rdbSaveObject(fp
,o
) != 1);
3814 /* Return the number of pages required to save this object in the swap file */
3815 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3816 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3818 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3821 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3822 static int rdbSave(char *filename
) {
3823 dictIterator
*di
= NULL
;
3828 time_t now
= time(NULL
);
3830 /* Wait for I/O therads to terminate, just in case this is a
3831 * foreground-saving, to avoid seeking the swap file descriptor at the
3833 if (server
.vm_enabled
)
3834 waitEmptyIOJobsQueue();
3836 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3837 fp
= fopen(tmpfile
,"w");
3839 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3842 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3843 for (j
= 0; j
< server
.dbnum
; j
++) {
3844 redisDb
*db
= server
.db
+j
;
3846 if (dictSize(d
) == 0) continue;
3847 di
= dictGetIterator(d
);
3853 /* Write the SELECT DB opcode */
3854 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3855 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3857 /* Iterate this DB writing every entry */
3858 while((de
= dictNext(di
)) != NULL
) {
3859 sds keystr
= dictGetEntryKey(de
);
3860 robj key
, *o
= dictGetEntryVal(de
);
3863 initStaticStringObject(key
,keystr
);
3864 expiretime
= getExpire(db
,&key
);
3866 /* Save the expire time */
3867 if (expiretime
!= -1) {
3868 /* If this key is already expired skip it */
3869 if (expiretime
< now
) continue;
3870 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3871 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3873 /* Save the key and associated value. This requires special
3874 * handling if the value is swapped out. */
3875 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
3876 o
->storage
== REDIS_VM_SWAPPING
) {
3877 /* Save type, key, value */
3878 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3879 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3880 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3882 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3884 /* Get a preview of the object in memory */
3885 po
= vmPreviewObject(o
);
3886 /* Save type, key, value */
3887 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
;
3888 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3889 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3890 /* Remove the loaded object from memory */
3894 dictReleaseIterator(di
);
3897 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3899 /* Make sure data will not remain on the OS's output buffers */
3904 /* Use RENAME to make sure the DB file is changed atomically only
3905 * if the generate DB file is ok. */
3906 if (rename(tmpfile
,filename
) == -1) {
3907 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3911 redisLog(REDIS_NOTICE
,"DB saved on disk");
3913 server
.lastsave
= time(NULL
);
3919 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3920 if (di
) dictReleaseIterator(di
);
3924 static int rdbSaveBackground(char *filename
) {
3927 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3928 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3929 if ((childpid
= fork()) == 0) {
3931 if (server
.vm_enabled
) vmReopenSwapFile();
3933 if (rdbSave(filename
) == REDIS_OK
) {
3940 if (childpid
== -1) {
3941 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3945 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3946 server
.bgsavechildpid
= childpid
;
3947 updateDictResizePolicy();
3950 return REDIS_OK
; /* unreached */
3953 static void rdbRemoveTempFile(pid_t childpid
) {
3956 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3960 static int rdbLoadType(FILE *fp
) {
3962 if (fread(&type
,1,1,fp
) == 0) return -1;
3966 static time_t rdbLoadTime(FILE *fp
) {
3968 if (fread(&t32
,4,1,fp
) == 0) return -1;
3969 return (time_t) t32
;
3972 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3973 * of this file for a description of how this are stored on disk.
3975 * isencoded is set to 1 if the readed length is not actually a length but
3976 * an "encoding type", check the above comments for more info */
3977 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3978 unsigned char buf
[2];
3982 if (isencoded
) *isencoded
= 0;
3983 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3984 type
= (buf
[0]&0xC0)>>6;
3985 if (type
== REDIS_RDB_6BITLEN
) {
3986 /* Read a 6 bit len */
3988 } else if (type
== REDIS_RDB_ENCVAL
) {
3989 /* Read a 6 bit len encoding type */
3990 if (isencoded
) *isencoded
= 1;
3992 } else if (type
== REDIS_RDB_14BITLEN
) {
3993 /* Read a 14 bit len */
3994 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3995 return ((buf
[0]&0x3F)<<8)|buf
[1];
3997 /* Read a 32 bit len */
3998 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
4003 /* Load an integer-encoded object from file 'fp', with the specified
4004 * encoding type 'enctype'. If encode is true the function may return
4005 * an integer-encoded object as reply, otherwise the returned object
4006 * will always be encoded as a raw string. */
4007 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
4008 unsigned char enc
[4];
4011 if (enctype
== REDIS_RDB_ENC_INT8
) {
4012 if (fread(enc
,1,1,fp
) == 0) return NULL
;
4013 val
= (signed char)enc
[0];
4014 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
4016 if (fread(enc
,2,1,fp
) == 0) return NULL
;
4017 v
= enc
[0]|(enc
[1]<<8);
4019 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
4021 if (fread(enc
,4,1,fp
) == 0) return NULL
;
4022 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
4025 val
= 0; /* anti-warning */
4026 redisPanic("Unknown RDB integer encoding type");
4029 return createStringObjectFromLongLong(val
);
4031 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
4034 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
4035 unsigned int len
, clen
;
4036 unsigned char *c
= NULL
;
4039 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4040 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4041 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
4042 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
4043 if (fread(c
,clen
,1,fp
) == 0) goto err
;
4044 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
4046 return createObject(REDIS_STRING
,val
);
4053 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
4058 len
= rdbLoadLen(fp
,&isencoded
);
4061 case REDIS_RDB_ENC_INT8
:
4062 case REDIS_RDB_ENC_INT16
:
4063 case REDIS_RDB_ENC_INT32
:
4064 return rdbLoadIntegerObject(fp
,len
,encode
);
4065 case REDIS_RDB_ENC_LZF
:
4066 return rdbLoadLzfStringObject(fp
);
4068 redisPanic("Unknown RDB encoding type");
4072 if (len
== REDIS_RDB_LENERR
) return NULL
;
4073 val
= sdsnewlen(NULL
,len
);
4074 if (len
&& fread(val
,len
,1,fp
) == 0) {
4078 return createObject(REDIS_STRING
,val
);
4081 static robj
*rdbLoadStringObject(FILE *fp
) {
4082 return rdbGenericLoadStringObject(fp
,0);
4085 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
4086 return rdbGenericLoadStringObject(fp
,1);
4089 /* For information about double serialization check rdbSaveDoubleValue() */
4090 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
4094 if (fread(&len
,1,1,fp
) == 0) return -1;
4096 case 255: *val
= R_NegInf
; return 0;
4097 case 254: *val
= R_PosInf
; return 0;
4098 case 253: *val
= R_Nan
; return 0;
4100 if (fread(buf
,len
,1,fp
) == 0) return -1;
4102 sscanf(buf
, "%lg", val
);
4107 /* Load a Redis object of the specified type from the specified file.
4108 * On success a newly allocated object is returned, otherwise NULL. */
4109 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4112 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4113 if (type
== REDIS_STRING
) {
4114 /* Read string value */
4115 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4116 o
= tryObjectEncoding(o
);
4117 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
4118 /* Read list/set value */
4121 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4122 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
4123 /* It's faster to expand the dict to the right size asap in order
4124 * to avoid rehashing */
4125 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
4126 dictExpand(o
->ptr
,listlen
);
4127 /* Load every single element of the list/set */
4131 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4132 ele
= tryObjectEncoding(ele
);
4133 if (type
== REDIS_LIST
) {
4134 listAddNodeTail((list
*)o
->ptr
,ele
);
4136 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4139 } else if (type
== REDIS_ZSET
) {
4140 /* Read list/set value */
4144 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4145 o
= createZsetObject();
4147 /* Load every single element of the list/set */
4150 double *score
= zmalloc(sizeof(double));
4152 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4153 ele
= tryObjectEncoding(ele
);
4154 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4155 dictAdd(zs
->dict
,ele
,score
);
4156 zslInsert(zs
->zsl
,*score
,ele
);
4157 incrRefCount(ele
); /* added to skiplist */
4159 } else if (type
== REDIS_HASH
) {
4162 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4163 o
= createHashObject();
4164 /* Too many entries? Use an hash table. */
4165 if (hashlen
> server
.hash_max_zipmap_entries
)
4166 convertToRealHash(o
);
4167 /* Load every key/value, then set it into the zipmap or hash
4168 * table, as needed. */
4172 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4173 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4174 /* If we are using a zipmap and there are too big values
4175 * the object is converted to real hash table encoding. */
4176 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4177 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4178 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4180 convertToRealHash(o
);
4183 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4184 unsigned char *zm
= o
->ptr
;
4186 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4187 val
->ptr
,sdslen(val
->ptr
),NULL
);
4192 key
= tryObjectEncoding(key
);
4193 val
= tryObjectEncoding(val
);
4194 dictAdd((dict
*)o
->ptr
,key
,val
);
4198 redisPanic("Unknown object type");
4203 static int rdbLoad(char *filename
) {
4206 int type
, retval
, rdbver
;
4207 int swap_all_values
= 0;
4208 redisDb
*db
= server
.db
+0;
4210 time_t expiretime
, now
= time(NULL
);
4212 fp
= fopen(filename
,"r");
4213 if (!fp
) return REDIS_ERR
;
4214 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4216 if (memcmp(buf
,"REDIS",5) != 0) {
4218 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4221 rdbver
= atoi(buf
+5);
4224 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4233 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4234 if (type
== REDIS_EXPIRETIME
) {
4235 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4236 /* We read the time so we need to read the object type again */
4237 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4239 if (type
== REDIS_EOF
) break;
4240 /* Handle SELECT DB opcode as a special case */
4241 if (type
== REDIS_SELECTDB
) {
4242 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4244 if (dbid
>= (unsigned)server
.dbnum
) {
4245 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4248 db
= server
.db
+dbid
;
4252 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4254 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4255 /* Check if the key already expired */
4256 if (expiretime
!= -1 && expiretime
< now
) {
4261 /* Add the new object in the hash table */
4262 retval
= dbAdd(db
,key
,val
);
4263 if (retval
== REDIS_ERR
) {
4264 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4267 /* Set the expire time if needed */
4268 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4270 /* Handle swapping while loading big datasets when VM is on */
4272 /* If we detecter we are hopeless about fitting something in memory
4273 * we just swap every new key on disk. Directly...
4274 * Note that's important to check for this condition before resorting
4275 * to random sampling, otherwise we may try to swap already
4277 if (swap_all_values
) {
4278 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
4280 /* de may be NULL since the key already expired */
4283 val
= dictGetEntryVal(de
);
4285 if (val
->refcount
== 1 &&
4286 (vp
= vmSwapObjectBlocking(val
)) != NULL
)
4287 dictGetEntryVal(de
) = vp
;
4294 /* Flush data on disk once 32 MB of additional RAM are used... */
4296 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
4299 /* If we have still some hope of having some value fitting memory
4300 * then we try random sampling. */
4301 if (!swap_all_values
&& server
.vm_enabled
&& force_swapout
) {
4302 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4303 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4305 if (zmalloc_used_memory() > server
.vm_max_memory
)
4306 swap_all_values
= 1; /* We are already using too much mem */
4312 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4313 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4315 return REDIS_ERR
; /* Just to avoid warning */
4318 /*================================== Shutdown =============================== */
4319 static int prepareForShutdown() {
4320 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4321 /* Kill the saving child if there is a background saving in progress.
4322 We want to avoid race conditions, for instance our saving child may
4323 overwrite the synchronous saving did by SHUTDOWN. */
4324 if (server
.bgsavechildpid
!= -1) {
4325 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4326 kill(server
.bgsavechildpid
,SIGKILL
);
4327 rdbRemoveTempFile(server
.bgsavechildpid
);
4329 if (server
.appendonly
) {
4330 /* Append only file: fsync() the AOF and exit */
4331 aof_fsync(server
.appendfd
);
4332 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4334 /* Snapshotting. Perform a SYNC SAVE and exit */
4335 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4336 if (server
.daemonize
)
4337 unlink(server
.pidfile
);
4338 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4340 /* Ooops.. error saving! The best we can do is to continue
4341 * operating. Note that if there was a background saving process,
4342 * in the next cron() Redis will be notified that the background
4343 * saving aborted, handling special stuff like slaves pending for
4344 * synchronization... */
4345 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4349 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4353 /*================================== Commands =============================== */
4355 static void authCommand(redisClient
*c
) {
4356 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4357 c
->authenticated
= 1;
4358 addReply(c
,shared
.ok
);
4360 c
->authenticated
= 0;
4361 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4365 static void pingCommand(redisClient
*c
) {
4366 addReply(c
,shared
.pong
);
4369 static void echoCommand(redisClient
*c
) {
4370 addReplyBulk(c
,c
->argv
[1]);
4373 /*=================================== Strings =============================== */
4375 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4377 long seconds
= 0; /* initialized to avoid an harmness warning */
4380 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4383 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4388 touchWatchedKey(c
->db
,key
);
4389 if (nx
) deleteIfVolatile(c
->db
,key
);
4390 retval
= dbAdd(c
->db
,key
,val
);
4391 if (retval
== REDIS_ERR
) {
4393 dbReplace(c
->db
,key
,val
);
4396 addReply(c
,shared
.czero
);
4403 removeExpire(c
->db
,key
);
4404 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4405 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4408 static void setCommand(redisClient
*c
) {
4409 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4412 static void setnxCommand(redisClient
*c
) {
4413 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4416 static void setexCommand(redisClient
*c
) {
4417 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4420 static int getGenericCommand(redisClient
*c
) {
4423 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4426 if (o
->type
!= REDIS_STRING
) {
4427 addReply(c
,shared
.wrongtypeerr
);
4435 static void getCommand(redisClient
*c
) {
4436 getGenericCommand(c
);
4439 static void getsetCommand(redisClient
*c
) {
4440 if (getGenericCommand(c
) == REDIS_ERR
) return;
4441 dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]);
4442 incrRefCount(c
->argv
[2]);
4444 removeExpire(c
->db
,c
->argv
[1]);
4447 static void mgetCommand(redisClient
*c
) {
4450 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4451 for (j
= 1; j
< c
->argc
; j
++) {
4452 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4454 addReply(c
,shared
.nullbulk
);
4456 if (o
->type
!= REDIS_STRING
) {
4457 addReply(c
,shared
.nullbulk
);
4465 static void msetGenericCommand(redisClient
*c
, int nx
) {
4466 int j
, busykeys
= 0;
4468 if ((c
->argc
% 2) == 0) {
4469 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4472 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4473 * set nothing at all if at least one already key exists. */
4475 for (j
= 1; j
< c
->argc
; j
+= 2) {
4476 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4482 addReply(c
, shared
.czero
);
4486 for (j
= 1; j
< c
->argc
; j
+= 2) {
4487 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4488 dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]);
4489 incrRefCount(c
->argv
[j
+1]);
4490 removeExpire(c
->db
,c
->argv
[j
]);
4492 server
.dirty
+= (c
->argc
-1)/2;
4493 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4496 static void msetCommand(redisClient
*c
) {
4497 msetGenericCommand(c
,0);
4500 static void msetnxCommand(redisClient
*c
) {
4501 msetGenericCommand(c
,1);
4504 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4508 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4509 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4510 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4513 o
= createStringObjectFromLongLong(value
);
4514 dbReplace(c
->db
,c
->argv
[1],o
);
4516 addReply(c
,shared
.colon
);
4518 addReply(c
,shared
.crlf
);
4521 static void incrCommand(redisClient
*c
) {
4522 incrDecrCommand(c
,1);
4525 static void decrCommand(redisClient
*c
) {
4526 incrDecrCommand(c
,-1);
4529 static void incrbyCommand(redisClient
*c
) {
4532 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4533 incrDecrCommand(c
,incr
);
4536 static void decrbyCommand(redisClient
*c
) {
4539 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4540 incrDecrCommand(c
,-incr
);
4543 static void appendCommand(redisClient
*c
) {
4548 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4550 /* Create the key */
4551 retval
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]);
4552 incrRefCount(c
->argv
[2]);
4553 totlen
= stringObjectLen(c
->argv
[2]);
4555 if (o
->type
!= REDIS_STRING
) {
4556 addReply(c
,shared
.wrongtypeerr
);
4559 /* If the object is specially encoded or shared we have to make
4561 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4562 robj
*decoded
= getDecodedObject(o
);
4564 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4565 decrRefCount(decoded
);
4566 dbReplace(c
->db
,c
->argv
[1],o
);
4569 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4570 o
->ptr
= sdscatlen(o
->ptr
,
4571 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4573 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4574 (unsigned long) c
->argv
[2]->ptr
);
4576 totlen
= sdslen(o
->ptr
);
4579 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4582 static void substrCommand(redisClient
*c
) {
4584 long start
= atoi(c
->argv
[2]->ptr
);
4585 long end
= atoi(c
->argv
[3]->ptr
);
4586 size_t rangelen
, strlen
;
4589 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4590 checkType(c
,o
,REDIS_STRING
)) return;
4592 o
= getDecodedObject(o
);
4593 strlen
= sdslen(o
->ptr
);
4595 /* convert negative indexes */
4596 if (start
< 0) start
= strlen
+start
;
4597 if (end
< 0) end
= strlen
+end
;
4598 if (start
< 0) start
= 0;
4599 if (end
< 0) end
= 0;
4601 /* indexes sanity checks */
4602 if (start
> end
|| (size_t)start
>= strlen
) {
4603 /* Out of range start or start > end result in null reply */
4604 addReply(c
,shared
.nullbulk
);
4608 if ((size_t)end
>= strlen
) end
= strlen
-1;
4609 rangelen
= (end
-start
)+1;
4611 /* Return the result */
4612 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4613 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4614 addReplySds(c
,range
);
4615 addReply(c
,shared
.crlf
);
4619 /* ========================= Type agnostic commands ========================= */
4621 static void delCommand(redisClient
*c
) {
4624 for (j
= 1; j
< c
->argc
; j
++) {
4625 if (dbDelete(c
->db
,c
->argv
[j
])) {
4626 touchWatchedKey(c
->db
,c
->argv
[j
]);
4631 addReplyLongLong(c
,deleted
);
4634 static void existsCommand(redisClient
*c
) {
4635 expireIfNeeded(c
->db
,c
->argv
[1]);
4636 if (dbExists(c
->db
,c
->argv
[1])) {
4637 addReply(c
, shared
.cone
);
4639 addReply(c
, shared
.czero
);
4643 static void selectCommand(redisClient
*c
) {
4644 int id
= atoi(c
->argv
[1]->ptr
);
4646 if (selectDb(c
,id
) == REDIS_ERR
) {
4647 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4649 addReply(c
,shared
.ok
);
4653 static void randomkeyCommand(redisClient
*c
) {
4656 if ((key
= dbRandomKey(c
->db
)) == NULL
) {
4657 addReply(c
,shared
.nullbulk
);
4661 addReplyBulk(c
,key
);
4665 static void keysCommand(redisClient
*c
) {
4668 sds pattern
= c
->argv
[1]->ptr
;
4669 int plen
= sdslen(pattern
);
4670 unsigned long numkeys
= 0;
4671 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4673 di
= dictGetIterator(c
->db
->dict
);
4675 decrRefCount(lenobj
);
4676 while((de
= dictNext(di
)) != NULL
) {
4677 sds key
= dictGetEntryKey(de
);
4680 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4681 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4682 keyobj
= createStringObject(key
,sdslen(key
));
4683 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4684 addReplyBulk(c
,keyobj
);
4687 decrRefCount(keyobj
);
4690 dictReleaseIterator(di
);
4691 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4694 static void dbsizeCommand(redisClient
*c
) {
4696 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4699 static void lastsaveCommand(redisClient
*c
) {
4701 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4704 static void typeCommand(redisClient
*c
) {
4708 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4713 case REDIS_STRING
: type
= "+string"; break;
4714 case REDIS_LIST
: type
= "+list"; break;
4715 case REDIS_SET
: type
= "+set"; break;
4716 case REDIS_ZSET
: type
= "+zset"; break;
4717 case REDIS_HASH
: type
= "+hash"; break;
4718 default: type
= "+unknown"; break;
4721 addReplySds(c
,sdsnew(type
));
4722 addReply(c
,shared
.crlf
);
4725 static void saveCommand(redisClient
*c
) {
4726 if (server
.bgsavechildpid
!= -1) {
4727 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4730 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4731 addReply(c
,shared
.ok
);
4733 addReply(c
,shared
.err
);
4737 static void bgsaveCommand(redisClient
*c
) {
4738 if (server
.bgsavechildpid
!= -1) {
4739 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4742 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4743 char *status
= "+Background saving started\r\n";
4744 addReplySds(c
,sdsnew(status
));
4746 addReply(c
,shared
.err
);
4750 static void shutdownCommand(redisClient
*c
) {
4751 if (prepareForShutdown() == REDIS_OK
)
4753 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4756 static void renameGenericCommand(redisClient
*c
, int nx
) {
4759 /* To use the same key as src and dst is probably an error */
4760 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4761 addReply(c
,shared
.sameobjecterr
);
4765 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4769 deleteIfVolatile(c
->db
,c
->argv
[2]);
4770 if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) {
4773 addReply(c
,shared
.czero
);
4776 dbReplace(c
->db
,c
->argv
[2],o
);
4778 dbDelete(c
->db
,c
->argv
[1]);
4779 touchWatchedKey(c
->db
,c
->argv
[2]);
4781 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4784 static void renameCommand(redisClient
*c
) {
4785 renameGenericCommand(c
,0);
4788 static void renamenxCommand(redisClient
*c
) {
4789 renameGenericCommand(c
,1);
4792 static void moveCommand(redisClient
*c
) {
4797 /* Obtain source and target DB pointers */
4800 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4801 addReply(c
,shared
.outofrangeerr
);
4805 selectDb(c
,srcid
); /* Back to the source DB */
4807 /* If the user is moving using as target the same
4808 * DB as the source DB it is probably an error. */
4810 addReply(c
,shared
.sameobjecterr
);
4814 /* Check if the element exists and get a reference */
4815 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4817 addReply(c
,shared
.czero
);
4821 /* Try to add the element to the target DB */
4822 deleteIfVolatile(dst
,c
->argv
[1]);
4823 if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) {
4824 addReply(c
,shared
.czero
);
4829 /* OK! key moved, free the entry in the source DB */
4830 dbDelete(src
,c
->argv
[1]);
4832 addReply(c
,shared
.cone
);
4835 /* =================================== Lists ================================ */
4836 static void pushGenericCommand(redisClient
*c
, int where
) {
4840 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4842 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4843 addReply(c
,shared
.cone
);
4846 lobj
= createListObject();
4848 if (where
== REDIS_HEAD
) {
4849 listAddNodeHead(list
,c
->argv
[2]);
4851 listAddNodeTail(list
,c
->argv
[2]);
4853 incrRefCount(c
->argv
[2]);
4854 dbAdd(c
->db
,c
->argv
[1],lobj
);
4856 if (lobj
->type
!= REDIS_LIST
) {
4857 addReply(c
,shared
.wrongtypeerr
);
4860 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4861 addReply(c
,shared
.cone
);
4865 if (where
== REDIS_HEAD
) {
4866 listAddNodeHead(list
,c
->argv
[2]);
4868 listAddNodeTail(list
,c
->argv
[2]);
4870 incrRefCount(c
->argv
[2]);
4873 addReplyLongLong(c
,listLength(list
));
4876 static void lpushCommand(redisClient
*c
) {
4877 pushGenericCommand(c
,REDIS_HEAD
);
4880 static void rpushCommand(redisClient
*c
) {
4881 pushGenericCommand(c
,REDIS_TAIL
);
4884 static void llenCommand(redisClient
*c
) {
4888 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4889 checkType(c
,o
,REDIS_LIST
)) return;
4892 addReplyUlong(c
,listLength(l
));
4895 static void lindexCommand(redisClient
*c
) {
4897 int index
= atoi(c
->argv
[2]->ptr
);
4901 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4902 checkType(c
,o
,REDIS_LIST
)) return;
4905 ln
= listIndex(list
, index
);
4907 addReply(c
,shared
.nullbulk
);
4909 robj
*ele
= listNodeValue(ln
);
4910 addReplyBulk(c
,ele
);
4914 static void lsetCommand(redisClient
*c
) {
4916 int index
= atoi(c
->argv
[2]->ptr
);
4920 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4921 checkType(c
,o
,REDIS_LIST
)) return;
4924 ln
= listIndex(list
, index
);
4926 addReply(c
,shared
.outofrangeerr
);
4928 robj
*ele
= listNodeValue(ln
);
4931 listNodeValue(ln
) = c
->argv
[3];
4932 incrRefCount(c
->argv
[3]);
4933 addReply(c
,shared
.ok
);
4938 static void popGenericCommand(redisClient
*c
, int where
) {
4943 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4944 checkType(c
,o
,REDIS_LIST
)) return;
4947 if (where
== REDIS_HEAD
)
4948 ln
= listFirst(list
);
4950 ln
= listLast(list
);
4953 addReply(c
,shared
.nullbulk
);
4955 robj
*ele
= listNodeValue(ln
);
4956 addReplyBulk(c
,ele
);
4957 listDelNode(list
,ln
);
4958 if (listLength(list
) == 0) dbDelete(c
->db
,c
->argv
[1]);
4963 static void lpopCommand(redisClient
*c
) {
4964 popGenericCommand(c
,REDIS_HEAD
);
4967 static void rpopCommand(redisClient
*c
) {
4968 popGenericCommand(c
,REDIS_TAIL
);
4971 static void lrangeCommand(redisClient
*c
) {
4973 int start
= atoi(c
->argv
[2]->ptr
);
4974 int end
= atoi(c
->argv
[3]->ptr
);
4981 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4982 || checkType(c
,o
,REDIS_LIST
)) return;
4984 llen
= listLength(list
);
4986 /* convert negative indexes */
4987 if (start
< 0) start
= llen
+start
;
4988 if (end
< 0) end
= llen
+end
;
4989 if (start
< 0) start
= 0;
4990 if (end
< 0) end
= 0;
4992 /* indexes sanity checks */
4993 if (start
> end
|| start
>= llen
) {
4994 /* Out of range start or start > end result in empty list */
4995 addReply(c
,shared
.emptymultibulk
);
4998 if (end
>= llen
) end
= llen
-1;
4999 rangelen
= (end
-start
)+1;
5001 /* Return the result in form of a multi-bulk reply */
5002 ln
= listIndex(list
, start
);
5003 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
5004 for (j
= 0; j
< rangelen
; j
++) {
5005 ele
= listNodeValue(ln
);
5006 addReplyBulk(c
,ele
);
5011 static void ltrimCommand(redisClient
*c
) {
5013 int start
= atoi(c
->argv
[2]->ptr
);
5014 int end
= atoi(c
->argv
[3]->ptr
);
5016 int j
, ltrim
, rtrim
;
5020 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
5021 checkType(c
,o
,REDIS_LIST
)) return;
5023 llen
= listLength(list
);
5025 /* convert negative indexes */
5026 if (start
< 0) start
= llen
+start
;
5027 if (end
< 0) end
= llen
+end
;
5028 if (start
< 0) start
= 0;
5029 if (end
< 0) end
= 0;
5031 /* indexes sanity checks */
5032 if (start
> end
|| start
>= llen
) {
5033 /* Out of range start or start > end result in empty list */
5037 if (end
>= llen
) end
= llen
-1;
5042 /* Remove list elements to perform the trim */
5043 for (j
= 0; j
< ltrim
; j
++) {
5044 ln
= listFirst(list
);
5045 listDelNode(list
,ln
);
5047 for (j
= 0; j
< rtrim
; j
++) {
5048 ln
= listLast(list
);
5049 listDelNode(list
,ln
);
5051 if (listLength(list
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5053 addReply(c
,shared
.ok
);
5056 static void lremCommand(redisClient
*c
) {
5059 listNode
*ln
, *next
;
5060 int toremove
= atoi(c
->argv
[2]->ptr
);
5064 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5065 checkType(c
,o
,REDIS_LIST
)) return;
5069 toremove
= -toremove
;
5072 ln
= fromtail
? list
->tail
: list
->head
;
5074 robj
*ele
= listNodeValue(ln
);
5076 next
= fromtail
? ln
->prev
: ln
->next
;
5077 if (equalStringObjects(ele
,c
->argv
[3])) {
5078 listDelNode(list
,ln
);
5081 if (toremove
&& removed
== toremove
) break;
5085 if (listLength(list
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5086 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5089 /* This is the semantic of this command:
5090 * RPOPLPUSH srclist dstlist:
5091 * IF LLEN(srclist) > 0
5092 * element = RPOP srclist
5093 * LPUSH dstlist element
5100 * The idea is to be able to get an element from a list in a reliable way
5101 * since the element is not just returned but pushed against another list
5102 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5104 static void rpoplpushcommand(redisClient
*c
) {
5109 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5110 checkType(c
,sobj
,REDIS_LIST
)) return;
5111 srclist
= sobj
->ptr
;
5112 ln
= listLast(srclist
);
5115 addReply(c
,shared
.nullbulk
);
5117 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5118 robj
*ele
= listNodeValue(ln
);
5121 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
5122 addReply(c
,shared
.wrongtypeerr
);
5126 /* Add the element to the target list (unless it's directly
5127 * passed to some BLPOP-ing client */
5128 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
5130 /* Create the list if the key does not exist */
5131 dobj
= createListObject();
5132 dbAdd(c
->db
,c
->argv
[2],dobj
);
5134 dstlist
= dobj
->ptr
;
5135 listAddNodeHead(dstlist
,ele
);
5139 /* Send the element to the client as reply as well */
5140 addReplyBulk(c
,ele
);
5142 /* Finally remove the element from the source list */
5143 listDelNode(srclist
,ln
);
5144 if (listLength(srclist
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5149 /* ==================================== Sets ================================ */
5151 static void saddCommand(redisClient
*c
) {
5154 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5156 set
= createSetObject();
5157 dbAdd(c
->db
,c
->argv
[1],set
);
5159 if (set
->type
!= REDIS_SET
) {
5160 addReply(c
,shared
.wrongtypeerr
);
5164 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5165 incrRefCount(c
->argv
[2]);
5167 addReply(c
,shared
.cone
);
5169 addReply(c
,shared
.czero
);
5173 static void sremCommand(redisClient
*c
) {
5176 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5177 checkType(c
,set
,REDIS_SET
)) return;
5179 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5181 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5182 if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5183 addReply(c
,shared
.cone
);
5185 addReply(c
,shared
.czero
);
5189 static void smoveCommand(redisClient
*c
) {
5190 robj
*srcset
, *dstset
;
5192 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5193 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5195 /* If the source key does not exist return 0, if it's of the wrong type
5197 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5198 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5201 /* Error if the destination key is not a set as well */
5202 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5203 addReply(c
,shared
.wrongtypeerr
);
5206 /* Remove the element from the source set */
5207 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5208 /* Key not found in the src set! return zero */
5209 addReply(c
,shared
.czero
);
5212 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5213 dbDelete(c
->db
,c
->argv
[1]);
5215 /* Add the element to the destination set */
5217 dstset
= createSetObject();
5218 dbAdd(c
->db
,c
->argv
[2],dstset
);
5220 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5221 incrRefCount(c
->argv
[3]);
5222 addReply(c
,shared
.cone
);
5225 static void sismemberCommand(redisClient
*c
) {
5228 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5229 checkType(c
,set
,REDIS_SET
)) return;
5231 if (dictFind(set
->ptr
,c
->argv
[2]))
5232 addReply(c
,shared
.cone
);
5234 addReply(c
,shared
.czero
);
5237 static void scardCommand(redisClient
*c
) {
5241 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5242 checkType(c
,o
,REDIS_SET
)) return;
5245 addReplyUlong(c
,dictSize(s
));
5248 static void spopCommand(redisClient
*c
) {
5252 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5253 checkType(c
,set
,REDIS_SET
)) return;
5255 de
= dictGetRandomKey(set
->ptr
);
5257 addReply(c
,shared
.nullbulk
);
5259 robj
*ele
= dictGetEntryKey(de
);
5261 addReplyBulk(c
,ele
);
5262 dictDelete(set
->ptr
,ele
);
5263 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5264 if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5269 static void srandmemberCommand(redisClient
*c
) {
5273 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5274 checkType(c
,set
,REDIS_SET
)) return;
5276 de
= dictGetRandomKey(set
->ptr
);
5278 addReply(c
,shared
.nullbulk
);
5280 robj
*ele
= dictGetEntryKey(de
);
5282 addReplyBulk(c
,ele
);
5286 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5287 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5289 return dictSize(*d1
)-dictSize(*d2
);
5292 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5293 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5296 robj
*lenobj
= NULL
, *dstset
= NULL
;
5297 unsigned long j
, cardinality
= 0;
5299 for (j
= 0; j
< setsnum
; j
++) {
5303 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5304 lookupKeyRead(c
->db
,setskeys
[j
]);
5308 if (dbDelete(c
->db
,dstkey
))
5310 addReply(c
,shared
.czero
);
5312 addReply(c
,shared
.emptymultibulk
);
5316 if (setobj
->type
!= REDIS_SET
) {
5318 addReply(c
,shared
.wrongtypeerr
);
5321 dv
[j
] = setobj
->ptr
;
5323 /* Sort sets from the smallest to largest, this will improve our
5324 * algorithm's performace */
5325 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5327 /* The first thing we should output is the total number of elements...
5328 * since this is a multi-bulk write, but at this stage we don't know
5329 * the intersection set size, so we use a trick, append an empty object
5330 * to the output list and save the pointer to later modify it with the
5333 lenobj
= createObject(REDIS_STRING
,NULL
);
5335 decrRefCount(lenobj
);
5337 /* If we have a target key where to store the resulting set
5338 * create this key with an empty set inside */
5339 dstset
= createSetObject();
5342 /* Iterate all the elements of the first (smallest) set, and test
5343 * the element against all the other sets, if at least one set does
5344 * not include the element it is discarded */
5345 di
= dictGetIterator(dv
[0]);
5347 while((de
= dictNext(di
)) != NULL
) {
5350 for (j
= 1; j
< setsnum
; j
++)
5351 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5353 continue; /* at least one set does not contain the member */
5354 ele
= dictGetEntryKey(de
);
5356 addReplyBulk(c
,ele
);
5359 dictAdd(dstset
->ptr
,ele
,NULL
);
5363 dictReleaseIterator(di
);
5366 /* Store the resulting set into the target, if the intersection
5367 * is not an empty set. */
5368 dbDelete(c
->db
,dstkey
);
5369 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5370 dbAdd(c
->db
,dstkey
,dstset
);
5371 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5373 decrRefCount(dstset
);
5374 addReply(c
,shared
.czero
);
5378 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5383 static void sinterCommand(redisClient
*c
) {
5384 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5387 static void sinterstoreCommand(redisClient
*c
) {
5388 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5391 #define REDIS_OP_UNION 0
5392 #define REDIS_OP_DIFF 1
5393 #define REDIS_OP_INTER 2
5395 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5396 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5399 robj
*dstset
= NULL
;
5400 int j
, cardinality
= 0;
5402 for (j
= 0; j
< setsnum
; j
++) {
5406 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5407 lookupKeyRead(c
->db
,setskeys
[j
]);
5412 if (setobj
->type
!= REDIS_SET
) {
5414 addReply(c
,shared
.wrongtypeerr
);
5417 dv
[j
] = setobj
->ptr
;
5420 /* We need a temp set object to store our union. If the dstkey
5421 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5422 * this set object will be the resulting object to set into the target key*/
5423 dstset
= createSetObject();
5425 /* Iterate all the elements of all the sets, add every element a single
5426 * time to the result set */
5427 for (j
= 0; j
< setsnum
; j
++) {
5428 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5429 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5431 di
= dictGetIterator(dv
[j
]);
5433 while((de
= dictNext(di
)) != NULL
) {
5436 /* dictAdd will not add the same element multiple times */
5437 ele
= dictGetEntryKey(de
);
5438 if (op
== REDIS_OP_UNION
|| j
== 0) {
5439 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5443 } else if (op
== REDIS_OP_DIFF
) {
5444 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5449 dictReleaseIterator(di
);
5451 /* result set is empty? Exit asap. */
5452 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5455 /* Output the content of the resulting set, if not in STORE mode */
5457 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5458 di
= dictGetIterator(dstset
->ptr
);
5459 while((de
= dictNext(di
)) != NULL
) {
5462 ele
= dictGetEntryKey(de
);
5463 addReplyBulk(c
,ele
);
5465 dictReleaseIterator(di
);
5466 decrRefCount(dstset
);
5468 /* If we have a target key where to store the resulting set
5469 * create this key with the result set inside */
5470 dbDelete(c
->db
,dstkey
);
5471 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5472 dbAdd(c
->db
,dstkey
,dstset
);
5473 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5475 decrRefCount(dstset
);
5476 addReply(c
,shared
.czero
);
5483 static void sunionCommand(redisClient
*c
) {
5484 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5487 static void sunionstoreCommand(redisClient
*c
) {
5488 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5491 static void sdiffCommand(redisClient
*c
) {
5492 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5495 static void sdiffstoreCommand(redisClient
*c
) {
5496 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5499 /* ==================================== ZSets =============================== */
5501 /* ZSETs are ordered sets using two data structures to hold the same elements
5502 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5505 * The elements are added to an hash table mapping Redis objects to scores.
5506 * At the same time the elements are added to a skip list mapping scores
5507 * to Redis objects (so objects are sorted by scores in this "view"). */
5509 /* This skiplist implementation is almost a C translation of the original
5510 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5511 * Alternative to Balanced Trees", modified in three ways:
5512 * a) this implementation allows for repeated values.
5513 * b) the comparison is not just by key (our 'score') but by satellite data.
5514 * c) there is a back pointer, so it's a doubly linked list with the back
5515 * pointers being only at "level 1". This allows to traverse the list
5516 * from tail to head, useful for ZREVRANGE. */
5518 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5519 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5521 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5523 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5531 static zskiplist
*zslCreate(void) {
5535 zsl
= zmalloc(sizeof(*zsl
));
5538 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5539 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5540 zsl
->header
->forward
[j
] = NULL
;
5542 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5543 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5544 zsl
->header
->span
[j
] = 0;
5546 zsl
->header
->backward
= NULL
;
5551 static void zslFreeNode(zskiplistNode
*node
) {
5552 decrRefCount(node
->obj
);
5553 zfree(node
->forward
);
5558 static void zslFree(zskiplist
*zsl
) {
5559 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5561 zfree(zsl
->header
->forward
);
5562 zfree(zsl
->header
->span
);
5565 next
= node
->forward
[0];
5572 static int zslRandomLevel(void) {
5574 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5576 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5579 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5580 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5581 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5585 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5586 /* store rank that is crossed to reach the insert position */
5587 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5589 while (x
->forward
[i
] &&
5590 (x
->forward
[i
]->score
< score
||
5591 (x
->forward
[i
]->score
== score
&&
5592 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5593 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5598 /* we assume the key is not already inside, since we allow duplicated
5599 * scores, and the re-insertion of score and redis object should never
5600 * happpen since the caller of zslInsert() should test in the hash table
5601 * if the element is already inside or not. */
5602 level
= zslRandomLevel();
5603 if (level
> zsl
->level
) {
5604 for (i
= zsl
->level
; i
< level
; i
++) {
5606 update
[i
] = zsl
->header
;
5607 update
[i
]->span
[i
-1] = zsl
->length
;
5611 x
= zslCreateNode(level
,score
,obj
);
5612 for (i
= 0; i
< level
; i
++) {
5613 x
->forward
[i
] = update
[i
]->forward
[i
];
5614 update
[i
]->forward
[i
] = x
;
5616 /* update span covered by update[i] as x is inserted here */
5618 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5619 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5623 /* increment span for untouched levels */
5624 for (i
= level
; i
< zsl
->level
; i
++) {
5625 update
[i
]->span
[i
-1]++;
5628 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5630 x
->forward
[0]->backward
= x
;
5636 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5637 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5639 for (i
= 0; i
< zsl
->level
; i
++) {
5640 if (update
[i
]->forward
[i
] == x
) {
5642 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5644 update
[i
]->forward
[i
] = x
->forward
[i
];
5646 /* invariant: i > 0, because update[0]->forward[0]
5647 * is always equal to x */
5648 update
[i
]->span
[i
-1] -= 1;
5651 if (x
->forward
[0]) {
5652 x
->forward
[0]->backward
= x
->backward
;
5654 zsl
->tail
= x
->backward
;
5656 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5661 /* Delete an element with matching score/object from the skiplist. */
5662 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5663 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5667 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5668 while (x
->forward
[i
] &&
5669 (x
->forward
[i
]->score
< score
||
5670 (x
->forward
[i
]->score
== score
&&
5671 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5675 /* We may have multiple elements with the same score, what we need
5676 * is to find the element with both the right score and object. */
5678 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5679 zslDeleteNode(zsl
, x
, update
);
5683 return 0; /* not found */
5685 return 0; /* not found */
5688 /* Delete all the elements with score between min and max from the skiplist.
5689 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5690 * Note that this function takes the reference to the hash table view of the
5691 * sorted set, in order to remove the elements from the hash table too. */
5692 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5693 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5694 unsigned long removed
= 0;
5698 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5699 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5703 /* We may have multiple elements with the same score, what we need
5704 * is to find the element with both the right score and object. */
5706 while (x
&& x
->score
<= max
) {
5707 zskiplistNode
*next
= x
->forward
[0];
5708 zslDeleteNode(zsl
, x
, update
);
5709 dictDelete(dict
,x
->obj
);
5714 return removed
; /* not found */
5717 /* Delete all the elements with rank between start and end from the skiplist.
5718 * Start and end are inclusive. Note that start and end need to be 1-based */
5719 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5720 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5721 unsigned long traversed
= 0, removed
= 0;
5725 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5726 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5727 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5735 while (x
&& traversed
<= end
) {
5736 zskiplistNode
*next
= x
->forward
[0];
5737 zslDeleteNode(zsl
, x
, update
);
5738 dictDelete(dict
,x
->obj
);
5747 /* Find the first node having a score equal or greater than the specified one.
5748 * Returns NULL if there is no match. */
5749 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5754 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5755 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5758 /* We may have multiple elements with the same score, what we need
5759 * is to find the element with both the right score and object. */
5760 return x
->forward
[0];
5763 /* Find the rank for an element by both score and key.
5764 * Returns 0 when the element cannot be found, rank otherwise.
5765 * Note that the rank is 1-based due to the span of zsl->header to the
5767 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5769 unsigned long rank
= 0;
5773 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5774 while (x
->forward
[i
] &&
5775 (x
->forward
[i
]->score
< score
||
5776 (x
->forward
[i
]->score
== score
&&
5777 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5778 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5782 /* x might be equal to zsl->header, so test if obj is non-NULL */
5783 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
5790 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5791 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5793 unsigned long traversed
= 0;
5797 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5798 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5800 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5803 if (traversed
== rank
) {
5810 /* The actual Z-commands implementations */
5812 /* This generic command implements both ZADD and ZINCRBY.
5813 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5814 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5815 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5820 if (isnan(scoreval
)) {
5821 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
5825 zsetobj
= lookupKeyWrite(c
->db
,key
);
5826 if (zsetobj
== NULL
) {
5827 zsetobj
= createZsetObject();
5828 dbAdd(c
->db
,key
,zsetobj
);
5830 if (zsetobj
->type
!= REDIS_ZSET
) {
5831 addReply(c
,shared
.wrongtypeerr
);
5837 /* Ok now since we implement both ZADD and ZINCRBY here the code
5838 * needs to handle the two different conditions. It's all about setting
5839 * '*score', that is, the new score to set, to the right value. */
5840 score
= zmalloc(sizeof(double));
5844 /* Read the old score. If the element was not present starts from 0 */
5845 de
= dictFind(zs
->dict
,ele
);
5847 double *oldscore
= dictGetEntryVal(de
);
5848 *score
= *oldscore
+ scoreval
;
5852 if (isnan(*score
)) {
5854 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
5856 /* Note that we don't need to check if the zset may be empty and
5857 * should be removed here, as we can only obtain Nan as score if
5858 * there was already an element in the sorted set. */
5865 /* What follows is a simple remove and re-insert operation that is common
5866 * to both ZADD and ZINCRBY... */
5867 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5868 /* case 1: New element */
5869 incrRefCount(ele
); /* added to hash */
5870 zslInsert(zs
->zsl
,*score
,ele
);
5871 incrRefCount(ele
); /* added to skiplist */
5874 addReplyDouble(c
,*score
);
5876 addReply(c
,shared
.cone
);
5881 /* case 2: Score update operation */
5882 de
= dictFind(zs
->dict
,ele
);
5883 redisAssert(de
!= NULL
);
5884 oldscore
= dictGetEntryVal(de
);
5885 if (*score
!= *oldscore
) {
5888 /* Remove and insert the element in the skip list with new score */
5889 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5890 redisAssert(deleted
!= 0);
5891 zslInsert(zs
->zsl
,*score
,ele
);
5893 /* Update the score in the hash table */
5894 dictReplace(zs
->dict
,ele
,score
);
5900 addReplyDouble(c
,*score
);
5902 addReply(c
,shared
.czero
);
5906 static void zaddCommand(redisClient
*c
) {
5909 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5910 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5913 static void zincrbyCommand(redisClient
*c
) {
5916 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5917 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5920 static void zremCommand(redisClient
*c
) {
5927 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5928 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5931 de
= dictFind(zs
->dict
,c
->argv
[2]);
5933 addReply(c
,shared
.czero
);
5936 /* Delete from the skiplist */
5937 oldscore
= dictGetEntryVal(de
);
5938 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5939 redisAssert(deleted
!= 0);
5941 /* Delete from the hash table */
5942 dictDelete(zs
->dict
,c
->argv
[2]);
5943 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5944 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5946 addReply(c
,shared
.cone
);
5949 static void zremrangebyscoreCommand(redisClient
*c
) {
5956 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5957 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5959 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5960 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5963 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5964 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5965 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5966 server
.dirty
+= deleted
;
5967 addReplyLongLong(c
,deleted
);
5970 static void zremrangebyrankCommand(redisClient
*c
) {
5978 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5979 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5981 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5982 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5984 llen
= zs
->zsl
->length
;
5986 /* convert negative indexes */
5987 if (start
< 0) start
= llen
+start
;
5988 if (end
< 0) end
= llen
+end
;
5989 if (start
< 0) start
= 0;
5990 if (end
< 0) end
= 0;
5992 /* indexes sanity checks */
5993 if (start
> end
|| start
>= llen
) {
5994 addReply(c
,shared
.czero
);
5997 if (end
>= llen
) end
= llen
-1;
5999 /* increment start and end because zsl*Rank functions
6000 * use 1-based rank */
6001 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
6002 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6003 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6004 server
.dirty
+= deleted
;
6005 addReplyLongLong(c
, deleted
);
6013 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
6014 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
6015 unsigned long size1
, size2
;
6016 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
6017 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
6018 return size1
- size2
;
6021 #define REDIS_AGGR_SUM 1
6022 #define REDIS_AGGR_MIN 2
6023 #define REDIS_AGGR_MAX 3
6024 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
6026 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
6027 if (aggregate
== REDIS_AGGR_SUM
) {
6028 *target
= *target
+ val
;
6029 } else if (aggregate
== REDIS_AGGR_MIN
) {
6030 *target
= val
< *target
? val
: *target
;
6031 } else if (aggregate
== REDIS_AGGR_MAX
) {
6032 *target
= val
> *target
? val
: *target
;
6035 redisPanic("Unknown ZUNION/INTER aggregate type");
6039 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
6041 int aggregate
= REDIS_AGGR_SUM
;
6048 /* expect setnum input keys to be given */
6049 setnum
= atoi(c
->argv
[2]->ptr
);
6051 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
6055 /* test if the expected number of keys would overflow */
6056 if (3+setnum
> c
->argc
) {
6057 addReply(c
,shared
.syntaxerr
);
6061 /* read keys to be used for input */
6062 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6063 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6064 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6068 if (obj
->type
== REDIS_ZSET
) {
6069 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6070 } else if (obj
->type
== REDIS_SET
) {
6071 src
[i
].dict
= (obj
->ptr
);
6074 addReply(c
,shared
.wrongtypeerr
);
6079 /* default all weights to 1 */
6080 src
[i
].weight
= 1.0;
6083 /* parse optional extra arguments */
6085 int remaining
= c
->argc
- j
;
6088 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6090 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6091 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6094 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6096 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6097 aggregate
= REDIS_AGGR_SUM
;
6098 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6099 aggregate
= REDIS_AGGR_MIN
;
6100 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6101 aggregate
= REDIS_AGGR_MAX
;
6104 addReply(c
,shared
.syntaxerr
);
6110 addReply(c
,shared
.syntaxerr
);
6116 /* sort sets from the smallest to largest, this will improve our
6117 * algorithm's performance */
6118 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6120 dstobj
= createZsetObject();
6121 dstzset
= dstobj
->ptr
;
6123 if (op
== REDIS_OP_INTER
) {
6124 /* skip going over all entries if the smallest zset is NULL or empty */
6125 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6126 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6127 * from small to large, all src[i > 0].dict are non-empty too */
6128 di
= dictGetIterator(src
[0].dict
);
6129 while((de
= dictNext(di
)) != NULL
) {
6130 double *score
= zmalloc(sizeof(double)), value
;
6131 *score
= src
[0].weight
* zunionInterDictValue(de
);
6133 for (j
= 1; j
< setnum
; j
++) {
6134 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6136 value
= src
[j
].weight
* zunionInterDictValue(other
);
6137 zunionInterAggregate(score
, value
, aggregate
);
6143 /* skip entry when not present in every source dict */
6147 robj
*o
= dictGetEntryKey(de
);
6148 dictAdd(dstzset
->dict
,o
,score
);
6149 incrRefCount(o
); /* added to dictionary */
6150 zslInsert(dstzset
->zsl
,*score
,o
);
6151 incrRefCount(o
); /* added to skiplist */
6154 dictReleaseIterator(di
);
6156 } else if (op
== REDIS_OP_UNION
) {
6157 for (i
= 0; i
< setnum
; i
++) {
6158 if (!src
[i
].dict
) continue;
6160 di
= dictGetIterator(src
[i
].dict
);
6161 while((de
= dictNext(di
)) != NULL
) {
6162 /* skip key when already processed */
6163 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6165 double *score
= zmalloc(sizeof(double)), value
;
6166 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6168 /* because the zsets are sorted by size, its only possible
6169 * for sets at larger indices to hold this entry */
6170 for (j
= (i
+1); j
< setnum
; j
++) {
6171 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6173 value
= src
[j
].weight
* zunionInterDictValue(other
);
6174 zunionInterAggregate(score
, value
, aggregate
);
6178 robj
*o
= dictGetEntryKey(de
);
6179 dictAdd(dstzset
->dict
,o
,score
);
6180 incrRefCount(o
); /* added to dictionary */
6181 zslInsert(dstzset
->zsl
,*score
,o
);
6182 incrRefCount(o
); /* added to skiplist */
6184 dictReleaseIterator(di
);
6187 /* unknown operator */
6188 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6191 dbDelete(c
->db
,dstkey
);
6192 if (dstzset
->zsl
->length
) {
6193 dbAdd(c
->db
,dstkey
,dstobj
);
6194 addReplyLongLong(c
, dstzset
->zsl
->length
);
6197 decrRefCount(dstobj
);
6198 addReply(c
, shared
.czero
);
6203 static void zunionstoreCommand(redisClient
*c
) {
6204 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6207 static void zinterstoreCommand(redisClient
*c
) {
6208 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6211 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6223 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6224 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6226 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6228 } else if (c
->argc
>= 5) {
6229 addReply(c
,shared
.syntaxerr
);
6233 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6234 || checkType(c
,o
,REDIS_ZSET
)) return;
6239 /* convert negative indexes */
6240 if (start
< 0) start
= llen
+start
;
6241 if (end
< 0) end
= llen
+end
;
6242 if (start
< 0) start
= 0;
6243 if (end
< 0) end
= 0;
6245 /* indexes sanity checks */
6246 if (start
> end
|| start
>= llen
) {
6247 /* Out of range start or start > end result in empty list */
6248 addReply(c
,shared
.emptymultibulk
);
6251 if (end
>= llen
) end
= llen
-1;
6252 rangelen
= (end
-start
)+1;
6254 /* check if starting point is trivial, before searching
6255 * the element in log(N) time */
6257 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6260 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6263 /* Return the result in form of a multi-bulk reply */
6264 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6265 withscores
? (rangelen
*2) : rangelen
));
6266 for (j
= 0; j
< rangelen
; j
++) {
6268 addReplyBulk(c
,ele
);
6270 addReplyDouble(c
,ln
->score
);
6271 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6275 static void zrangeCommand(redisClient
*c
) {
6276 zrangeGenericCommand(c
,0);
6279 static void zrevrangeCommand(redisClient
*c
) {
6280 zrangeGenericCommand(c
,1);
6283 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6284 * If justcount is non-zero, just the count is returned. */
6285 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6288 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6289 int offset
= 0, limit
= -1;
6293 /* Parse the min-max interval. If one of the values is prefixed
6294 * by the "(" character, it's considered "open". For instance
6295 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6296 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6297 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6298 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6301 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6303 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6304 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6307 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6310 /* Parse "WITHSCORES": note that if the command was called with
6311 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6312 * enter the following paths to parse WITHSCORES and LIMIT. */
6313 if (c
->argc
== 5 || c
->argc
== 8) {
6314 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6319 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6323 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6328 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6329 addReply(c
,shared
.syntaxerr
);
6331 } else if (c
->argc
== (7 + withscores
)) {
6332 offset
= atoi(c
->argv
[5]->ptr
);
6333 limit
= atoi(c
->argv
[6]->ptr
);
6334 if (offset
< 0) offset
= 0;
6337 /* Ok, lookup the key and get the range */
6338 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6340 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6342 if (o
->type
!= REDIS_ZSET
) {
6343 addReply(c
,shared
.wrongtypeerr
);
6345 zset
*zsetobj
= o
->ptr
;
6346 zskiplist
*zsl
= zsetobj
->zsl
;
6348 robj
*ele
, *lenobj
= NULL
;
6349 unsigned long rangelen
= 0;
6351 /* Get the first node with the score >= min, or with
6352 * score > min if 'minex' is true. */
6353 ln
= zslFirstWithScore(zsl
,min
);
6354 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6357 /* No element matching the speciifed interval */
6358 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6362 /* We don't know in advance how many matching elements there
6363 * are in the list, so we push this object that will represent
6364 * the multi-bulk length in the output buffer, and will "fix"
6367 lenobj
= createObject(REDIS_STRING
,NULL
);
6369 decrRefCount(lenobj
);
6372 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6375 ln
= ln
->forward
[0];
6378 if (limit
== 0) break;
6381 addReplyBulk(c
,ele
);
6383 addReplyDouble(c
,ln
->score
);
6385 ln
= ln
->forward
[0];
6387 if (limit
> 0) limit
--;
6390 addReplyLongLong(c
,(long)rangelen
);
6392 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6393 withscores
? (rangelen
*2) : rangelen
);
6399 static void zrangebyscoreCommand(redisClient
*c
) {
6400 genericZrangebyscoreCommand(c
,0);
6403 static void zcountCommand(redisClient
*c
) {
6404 genericZrangebyscoreCommand(c
,1);
6407 static void zcardCommand(redisClient
*c
) {
6411 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6412 checkType(c
,o
,REDIS_ZSET
)) return;
6415 addReplyUlong(c
,zs
->zsl
->length
);
6418 static void zscoreCommand(redisClient
*c
) {
6423 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6424 checkType(c
,o
,REDIS_ZSET
)) return;
6427 de
= dictFind(zs
->dict
,c
->argv
[2]);
6429 addReply(c
,shared
.nullbulk
);
6431 double *score
= dictGetEntryVal(de
);
6433 addReplyDouble(c
,*score
);
6437 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6445 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6446 checkType(c
,o
,REDIS_ZSET
)) return;
6450 de
= dictFind(zs
->dict
,c
->argv
[2]);
6452 addReply(c
,shared
.nullbulk
);
6456 score
= dictGetEntryVal(de
);
6457 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6460 addReplyLongLong(c
, zsl
->length
- rank
);
6462 addReplyLongLong(c
, rank
-1);
6465 addReply(c
,shared
.nullbulk
);
6469 static void zrankCommand(redisClient
*c
) {
6470 zrankGenericCommand(c
, 0);
6473 static void zrevrankCommand(redisClient
*c
) {
6474 zrankGenericCommand(c
, 1);
6477 /* ========================= Hashes utility functions ======================= */
6478 #define REDIS_HASH_KEY 1
6479 #define REDIS_HASH_VALUE 2
6481 /* Check the length of a number of objects to see if we need to convert a
6482 * zipmap to a real hash. Note that we only check string encoded objects
6483 * as their string length can be queried in constant time. */
6484 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6486 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6488 for (i
= start
; i
<= end
; i
++) {
6489 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6490 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6492 convertToRealHash(subject
);
6498 /* Encode given objects in-place when the hash uses a dict. */
6499 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6500 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6501 if (o1
) *o1
= tryObjectEncoding(*o1
);
6502 if (o2
) *o2
= tryObjectEncoding(*o2
);
6506 /* Get the value from a hash identified by key. Returns either a string
6507 * object or NULL if the value cannot be found. The refcount of the object
6508 * is always increased by 1 when the value was found. */
6509 static robj
*hashGet(robj
*o
, robj
*key
) {
6511 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6514 key
= getDecodedObject(key
);
6515 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6516 value
= createStringObject((char*)v
,vlen
);
6520 dictEntry
*de
= dictFind(o
->ptr
,key
);
6522 value
= dictGetEntryVal(de
);
6523 incrRefCount(value
);
6529 /* Test if the key exists in the given hash. Returns 1 if the key
6530 * exists and 0 when it doesn't. */
6531 static int hashExists(robj
*o
, robj
*key
) {
6532 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6533 key
= getDecodedObject(key
);
6534 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6540 if (dictFind(o
->ptr
,key
) != NULL
) {
6547 /* Add an element, discard the old if the key already exists.
6548 * Return 0 on insert and 1 on update. */
6549 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6551 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6552 key
= getDecodedObject(key
);
6553 value
= getDecodedObject(value
);
6554 o
->ptr
= zipmapSet(o
->ptr
,
6555 key
->ptr
,sdslen(key
->ptr
),
6556 value
->ptr
,sdslen(value
->ptr
), &update
);
6558 decrRefCount(value
);
6560 /* Check if the zipmap needs to be upgraded to a real hash table */
6561 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6562 convertToRealHash(o
);
6564 if (dictReplace(o
->ptr
,key
,value
)) {
6571 incrRefCount(value
);
6576 /* Delete an element from a hash.
6577 * Return 1 on deleted and 0 on not found. */
6578 static int hashDelete(robj
*o
, robj
*key
) {
6580 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6581 key
= getDecodedObject(key
);
6582 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6585 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6586 /* Always check if the dictionary needs a resize after a delete. */
6587 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6592 /* Return the number of elements in a hash. */
6593 static unsigned long hashLength(robj
*o
) {
6594 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6595 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6598 /* Structure to hold hash iteration abstration. Note that iteration over
6599 * hashes involves both fields and values. Because it is possible that
6600 * not both are required, store pointers in the iterator to avoid
6601 * unnecessary memory allocation for fields/values. */
6605 unsigned char *zk
, *zv
;
6606 unsigned int zklen
, zvlen
;
6612 static hashIterator
*hashInitIterator(robj
*subject
) {
6613 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6614 hi
->encoding
= subject
->encoding
;
6615 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6616 hi
->zi
= zipmapRewind(subject
->ptr
);
6617 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6618 hi
->di
= dictGetIterator(subject
->ptr
);
6625 static void hashReleaseIterator(hashIterator
*hi
) {
6626 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6627 dictReleaseIterator(hi
->di
);
6632 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6633 * could be found and REDIS_ERR when the iterator reaches the end. */
6634 static int hashNext(hashIterator
*hi
) {
6635 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6636 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6637 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6639 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6644 /* Get key or value object at current iteration position.
6645 * This increases the refcount of the field object by 1. */
6646 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6648 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6649 if (what
& REDIS_HASH_KEY
) {
6650 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6652 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6655 if (what
& REDIS_HASH_KEY
) {
6656 o
= dictGetEntryKey(hi
->de
);
6658 o
= dictGetEntryVal(hi
->de
);
6665 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6666 robj
*o
= lookupKeyWrite(c
->db
,key
);
6668 o
= createHashObject();
6671 if (o
->type
!= REDIS_HASH
) {
6672 addReply(c
,shared
.wrongtypeerr
);
6679 /* ============================= Hash commands ============================== */
6680 static void hsetCommand(redisClient
*c
) {
6684 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6685 hashTryConversion(o
,c
->argv
,2,3);
6686 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6687 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6688 addReply(c
, update
? shared
.czero
: shared
.cone
);
6692 static void hsetnxCommand(redisClient
*c
) {
6694 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6695 hashTryConversion(o
,c
->argv
,2,3);
6697 if (hashExists(o
, c
->argv
[2])) {
6698 addReply(c
, shared
.czero
);
6700 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6701 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6702 addReply(c
, shared
.cone
);
6707 static void hmsetCommand(redisClient
*c
) {
6711 if ((c
->argc
% 2) == 1) {
6712 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6716 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6717 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6718 for (i
= 2; i
< c
->argc
; i
+= 2) {
6719 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6720 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6722 addReply(c
, shared
.ok
);
6726 static void hincrbyCommand(redisClient
*c
) {
6727 long long value
, incr
;
6728 robj
*o
, *current
, *new;
6730 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6731 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6732 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6733 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6734 "hash value is not an integer") != REDIS_OK
) {
6735 decrRefCount(current
);
6738 decrRefCount(current
);
6744 new = createStringObjectFromLongLong(value
);
6745 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6746 hashSet(o
,c
->argv
[2],new);
6748 addReplyLongLong(c
,value
);
6752 static void hgetCommand(redisClient
*c
) {
6754 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6755 checkType(c
,o
,REDIS_HASH
)) return;
6757 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6758 addReplyBulk(c
,value
);
6759 decrRefCount(value
);
6761 addReply(c
,shared
.nullbulk
);
6765 static void hmgetCommand(redisClient
*c
) {
6768 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6769 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6770 addReply(c
,shared
.wrongtypeerr
);
6773 /* Note the check for o != NULL happens inside the loop. This is
6774 * done because objects that cannot be found are considered to be
6775 * an empty hash. The reply should then be a series of NULLs. */
6776 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6777 for (i
= 2; i
< c
->argc
; i
++) {
6778 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6779 addReplyBulk(c
,value
);
6780 decrRefCount(value
);
6782 addReply(c
,shared
.nullbulk
);
6787 static void hdelCommand(redisClient
*c
) {
6789 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6790 checkType(c
,o
,REDIS_HASH
)) return;
6792 if (hashDelete(o
,c
->argv
[2])) {
6793 if (hashLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6794 addReply(c
,shared
.cone
);
6797 addReply(c
,shared
.czero
);
6801 static void hlenCommand(redisClient
*c
) {
6803 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6804 checkType(c
,o
,REDIS_HASH
)) return;
6806 addReplyUlong(c
,hashLength(o
));
6809 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6810 robj
*o
, *lenobj
, *obj
;
6811 unsigned long count
= 0;
6814 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6815 || checkType(c
,o
,REDIS_HASH
)) return;
6817 lenobj
= createObject(REDIS_STRING
,NULL
);
6819 decrRefCount(lenobj
);
6821 hi
= hashInitIterator(o
);
6822 while (hashNext(hi
) != REDIS_ERR
) {
6823 if (flags
& REDIS_HASH_KEY
) {
6824 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6825 addReplyBulk(c
,obj
);
6829 if (flags
& REDIS_HASH_VALUE
) {
6830 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6831 addReplyBulk(c
,obj
);
6836 hashReleaseIterator(hi
);
6838 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6841 static void hkeysCommand(redisClient
*c
) {
6842 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6845 static void hvalsCommand(redisClient
*c
) {
6846 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6849 static void hgetallCommand(redisClient
*c
) {
6850 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6853 static void hexistsCommand(redisClient
*c
) {
6855 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6856 checkType(c
,o
,REDIS_HASH
)) return;
6858 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6861 static void convertToRealHash(robj
*o
) {
6862 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6863 unsigned int klen
, vlen
;
6864 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6866 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6867 p
= zipmapRewind(zm
);
6868 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6869 robj
*keyobj
, *valobj
;
6871 keyobj
= createStringObject((char*)key
,klen
);
6872 valobj
= createStringObject((char*)val
,vlen
);
6873 keyobj
= tryObjectEncoding(keyobj
);
6874 valobj
= tryObjectEncoding(valobj
);
6875 dictAdd(dict
,keyobj
,valobj
);
6877 o
->encoding
= REDIS_ENCODING_HT
;
6882 /* ========================= Non type-specific commands ==================== */
6884 static void flushdbCommand(redisClient
*c
) {
6885 server
.dirty
+= dictSize(c
->db
->dict
);
6886 touchWatchedKeysOnFlush(c
->db
->id
);
6887 dictEmpty(c
->db
->dict
);
6888 dictEmpty(c
->db
->expires
);
6889 addReply(c
,shared
.ok
);
6892 static void flushallCommand(redisClient
*c
) {
6893 touchWatchedKeysOnFlush(-1);
6894 server
.dirty
+= emptyDb();
6895 addReply(c
,shared
.ok
);
6896 if (server
.bgsavechildpid
!= -1) {
6897 kill(server
.bgsavechildpid
,SIGKILL
);
6898 rdbRemoveTempFile(server
.bgsavechildpid
);
6900 rdbSave(server
.dbfilename
);
6904 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6905 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6907 so
->pattern
= pattern
;
6911 /* Return the value associated to the key with a name obtained
6912 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6913 * The returned object will always have its refcount increased by 1
6914 * when it is non-NULL. */
6915 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6918 robj keyobj
, fieldobj
, *o
;
6919 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6920 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6924 char buf
[REDIS_SORTKEY_MAX
+1];
6925 } keyname
, fieldname
;
6927 /* If the pattern is "#" return the substitution object itself in order
6928 * to implement the "SORT ... GET #" feature. */
6929 spat
= pattern
->ptr
;
6930 if (spat
[0] == '#' && spat
[1] == '\0') {
6931 incrRefCount(subst
);
6935 /* The substitution object may be specially encoded. If so we create
6936 * a decoded object on the fly. Otherwise getDecodedObject will just
6937 * increment the ref count, that we'll decrement later. */
6938 subst
= getDecodedObject(subst
);
6941 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6942 p
= strchr(spat
,'*');
6944 decrRefCount(subst
);
6948 /* Find out if we're dealing with a hash dereference. */
6949 if ((f
= strstr(p
+1, "->")) != NULL
) {
6950 fieldlen
= sdslen(spat
)-(f
-spat
);
6951 /* this also copies \0 character */
6952 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6953 fieldname
.len
= fieldlen
-2;
6959 sublen
= sdslen(ssub
);
6960 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6961 memcpy(keyname
.buf
,spat
,prefixlen
);
6962 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6963 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6964 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6965 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6966 decrRefCount(subst
);
6968 /* Lookup substituted key */
6969 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6970 o
= lookupKeyRead(db
,&keyobj
);
6971 if (o
== NULL
) return NULL
;
6974 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6976 /* Retrieve value from hash by the field name. This operation
6977 * already increases the refcount of the returned object. */
6978 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6979 o
= hashGet(o
, &fieldobj
);
6981 if (o
->type
!= REDIS_STRING
) return NULL
;
6983 /* Every object that this function returns needs to have its refcount
6984 * increased. sortCommand decreases it again. */
6991 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6992 * the additional parameter is not standard but a BSD-specific we have to
6993 * pass sorting parameters via the global 'server' structure */
6994 static int sortCompare(const void *s1
, const void *s2
) {
6995 const redisSortObject
*so1
= s1
, *so2
= s2
;
6998 if (!server
.sort_alpha
) {
6999 /* Numeric sorting. Here it's trivial as we precomputed scores */
7000 if (so1
->u
.score
> so2
->u
.score
) {
7002 } else if (so1
->u
.score
< so2
->u
.score
) {
7008 /* Alphanumeric sorting */
7009 if (server
.sort_bypattern
) {
7010 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
7011 /* At least one compare object is NULL */
7012 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
7014 else if (so1
->u
.cmpobj
== NULL
)
7019 /* We have both the objects, use strcoll */
7020 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
7023 /* Compare elements directly. */
7024 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
7027 return server
.sort_desc
? -cmp
: cmp
;
7030 /* The SORT command is the most complex command in Redis. Warning: this code
7031 * is optimized for speed and a bit less for readability */
7032 static void sortCommand(redisClient
*c
) {
7035 int desc
= 0, alpha
= 0;
7036 int limit_start
= 0, limit_count
= -1, start
, end
;
7037 int j
, dontsort
= 0, vectorlen
;
7038 int getop
= 0; /* GET operation counter */
7039 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
7040 redisSortObject
*vector
; /* Resulting vector to sort */
7042 /* Lookup the key to sort. It must be of the right types */
7043 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
7044 if (sortval
== NULL
) {
7045 addReply(c
,shared
.emptymultibulk
);
7048 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
7049 sortval
->type
!= REDIS_ZSET
)
7051 addReply(c
,shared
.wrongtypeerr
);
7055 /* Create a list of operations to perform for every sorted element.
7056 * Operations can be GET/DEL/INCR/DECR */
7057 operations
= listCreate();
7058 listSetFreeMethod(operations
,zfree
);
7061 /* Now we need to protect sortval incrementing its count, in the future
7062 * SORT may have options able to overwrite/delete keys during the sorting
7063 * and the sorted key itself may get destroied */
7064 incrRefCount(sortval
);
7066 /* The SORT command has an SQL-alike syntax, parse it */
7067 while(j
< c
->argc
) {
7068 int leftargs
= c
->argc
-j
-1;
7069 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7071 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7073 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7075 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7076 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7077 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7079 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7080 storekey
= c
->argv
[j
+1];
7082 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7083 sortby
= c
->argv
[j
+1];
7084 /* If the BY pattern does not contain '*', i.e. it is constant,
7085 * we don't need to sort nor to lookup the weight keys. */
7086 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7088 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7089 listAddNodeTail(operations
,createSortOperation(
7090 REDIS_SORT_GET
,c
->argv
[j
+1]));
7094 decrRefCount(sortval
);
7095 listRelease(operations
);
7096 addReply(c
,shared
.syntaxerr
);
7102 /* Load the sorting vector with all the objects to sort */
7103 switch(sortval
->type
) {
7104 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
7105 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7106 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7107 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7109 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7112 if (sortval
->type
== REDIS_LIST
) {
7113 list
*list
= sortval
->ptr
;
7117 listRewind(list
,&li
);
7118 while((ln
= listNext(&li
))) {
7119 robj
*ele
= ln
->value
;
7120 vector
[j
].obj
= ele
;
7121 vector
[j
].u
.score
= 0;
7122 vector
[j
].u
.cmpobj
= NULL
;
7130 if (sortval
->type
== REDIS_SET
) {
7133 zset
*zs
= sortval
->ptr
;
7137 di
= dictGetIterator(set
);
7138 while((setele
= dictNext(di
)) != NULL
) {
7139 vector
[j
].obj
= dictGetEntryKey(setele
);
7140 vector
[j
].u
.score
= 0;
7141 vector
[j
].u
.cmpobj
= NULL
;
7144 dictReleaseIterator(di
);
7146 redisAssert(j
== vectorlen
);
7148 /* Now it's time to load the right scores in the sorting vector */
7149 if (dontsort
== 0) {
7150 for (j
= 0; j
< vectorlen
; j
++) {
7153 /* lookup value to sort by */
7154 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7155 if (!byval
) continue;
7157 /* use object itself to sort by */
7158 byval
= vector
[j
].obj
;
7162 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7164 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7165 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7166 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7167 /* Don't need to decode the object if it's
7168 * integer-encoded (the only encoding supported) so
7169 * far. We can just cast it */
7170 vector
[j
].u
.score
= (long)byval
->ptr
;
7172 redisAssert(1 != 1);
7176 /* when the object was retrieved using lookupKeyByPattern,
7177 * its refcount needs to be decreased. */
7179 decrRefCount(byval
);
7184 /* We are ready to sort the vector... perform a bit of sanity check
7185 * on the LIMIT option too. We'll use a partial version of quicksort. */
7186 start
= (limit_start
< 0) ? 0 : limit_start
;
7187 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7188 if (start
>= vectorlen
) {
7189 start
= vectorlen
-1;
7192 if (end
>= vectorlen
) end
= vectorlen
-1;
7194 if (dontsort
== 0) {
7195 server
.sort_desc
= desc
;
7196 server
.sort_alpha
= alpha
;
7197 server
.sort_bypattern
= sortby
? 1 : 0;
7198 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7199 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7201 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7204 /* Send command output to the output buffer, performing the specified
7205 * GET/DEL/INCR/DECR operations if any. */
7206 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7207 if (storekey
== NULL
) {
7208 /* STORE option not specified, sent the sorting result to client */
7209 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7210 for (j
= start
; j
<= end
; j
++) {
7214 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7215 listRewind(operations
,&li
);
7216 while((ln
= listNext(&li
))) {
7217 redisSortOperation
*sop
= ln
->value
;
7218 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7221 if (sop
->type
== REDIS_SORT_GET
) {
7223 addReply(c
,shared
.nullbulk
);
7225 addReplyBulk(c
,val
);
7229 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7234 robj
*listObject
= createListObject();
7235 list
*listPtr
= (list
*) listObject
->ptr
;
7237 /* STORE option specified, set the sorting result as a List object */
7238 for (j
= start
; j
<= end
; j
++) {
7243 listAddNodeTail(listPtr
,vector
[j
].obj
);
7244 incrRefCount(vector
[j
].obj
);
7246 listRewind(operations
,&li
);
7247 while((ln
= listNext(&li
))) {
7248 redisSortOperation
*sop
= ln
->value
;
7249 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7252 if (sop
->type
== REDIS_SORT_GET
) {
7254 listAddNodeTail(listPtr
,createStringObject("",0));
7256 /* We should do a incrRefCount on val because it is
7257 * added to the list, but also a decrRefCount because
7258 * it is returned by lookupKeyByPattern. This results
7259 * in doing nothing at all. */
7260 listAddNodeTail(listPtr
,val
);
7263 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7267 dbReplace(c
->db
,storekey
,listObject
);
7268 /* Note: we add 1 because the DB is dirty anyway since even if the
7269 * SORT result is empty a new key is set and maybe the old content
7271 server
.dirty
+= 1+outputlen
;
7272 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7276 decrRefCount(sortval
);
7277 listRelease(operations
);
7278 for (j
= 0; j
< vectorlen
; j
++) {
7279 if (alpha
&& vector
[j
].u
.cmpobj
)
7280 decrRefCount(vector
[j
].u
.cmpobj
);
7285 /* Convert an amount of bytes into a human readable string in the form
7286 * of 100B, 2G, 100M, 4K, and so forth. */
7287 static void bytesToHuman(char *s
, unsigned long long n
) {
7292 sprintf(s
,"%lluB",n
);
7294 } else if (n
< (1024*1024)) {
7295 d
= (double)n
/(1024);
7296 sprintf(s
,"%.2fK",d
);
7297 } else if (n
< (1024LL*1024*1024)) {
7298 d
= (double)n
/(1024*1024);
7299 sprintf(s
,"%.2fM",d
);
7300 } else if (n
< (1024LL*1024*1024*1024)) {
7301 d
= (double)n
/(1024LL*1024*1024);
7302 sprintf(s
,"%.2fG",d
);
7306 /* Create the string returned by the INFO command. This is decoupled
7307 * by the INFO command itself as we need to report the same information
7308 * on memory corruption problems. */
7309 static sds
genRedisInfoString(void) {
7311 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7315 bytesToHuman(hmem
,zmalloc_used_memory());
7316 info
= sdscatprintf(sdsempty(),
7317 "redis_version:%s\r\n"
7318 "redis_git_sha1:%s\r\n"
7319 "redis_git_dirty:%d\r\n"
7321 "multiplexing_api:%s\r\n"
7322 "process_id:%ld\r\n"
7323 "uptime_in_seconds:%ld\r\n"
7324 "uptime_in_days:%ld\r\n"
7325 "connected_clients:%d\r\n"
7326 "connected_slaves:%d\r\n"
7327 "blocked_clients:%d\r\n"
7328 "used_memory:%zu\r\n"
7329 "used_memory_human:%s\r\n"
7330 "changes_since_last_save:%lld\r\n"
7331 "bgsave_in_progress:%d\r\n"
7332 "last_save_time:%ld\r\n"
7333 "bgrewriteaof_in_progress:%d\r\n"
7334 "total_connections_received:%lld\r\n"
7335 "total_commands_processed:%lld\r\n"
7336 "expired_keys:%lld\r\n"
7337 "hash_max_zipmap_entries:%zu\r\n"
7338 "hash_max_zipmap_value:%zu\r\n"
7339 "pubsub_channels:%ld\r\n"
7340 "pubsub_patterns:%u\r\n"
7345 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7346 (sizeof(long) == 8) ? "64" : "32",
7351 listLength(server
.clients
)-listLength(server
.slaves
),
7352 listLength(server
.slaves
),
7353 server
.blpop_blocked_clients
,
7354 zmalloc_used_memory(),
7357 server
.bgsavechildpid
!= -1,
7359 server
.bgrewritechildpid
!= -1,
7360 server
.stat_numconnections
,
7361 server
.stat_numcommands
,
7362 server
.stat_expiredkeys
,
7363 server
.hash_max_zipmap_entries
,
7364 server
.hash_max_zipmap_value
,
7365 dictSize(server
.pubsub_channels
),
7366 listLength(server
.pubsub_patterns
),
7367 server
.vm_enabled
!= 0,
7368 server
.masterhost
== NULL
? "master" : "slave"
7370 if (server
.masterhost
) {
7371 info
= sdscatprintf(info
,
7372 "master_host:%s\r\n"
7373 "master_port:%d\r\n"
7374 "master_link_status:%s\r\n"
7375 "master_last_io_seconds_ago:%d\r\n"
7378 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7380 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7383 if (server
.vm_enabled
) {
7385 info
= sdscatprintf(info
,
7386 "vm_conf_max_memory:%llu\r\n"
7387 "vm_conf_page_size:%llu\r\n"
7388 "vm_conf_pages:%llu\r\n"
7389 "vm_stats_used_pages:%llu\r\n"
7390 "vm_stats_swapped_objects:%llu\r\n"
7391 "vm_stats_swappin_count:%llu\r\n"
7392 "vm_stats_swappout_count:%llu\r\n"
7393 "vm_stats_io_newjobs_len:%lu\r\n"
7394 "vm_stats_io_processing_len:%lu\r\n"
7395 "vm_stats_io_processed_len:%lu\r\n"
7396 "vm_stats_io_active_threads:%lu\r\n"
7397 "vm_stats_blocked_clients:%lu\r\n"
7398 ,(unsigned long long) server
.vm_max_memory
,
7399 (unsigned long long) server
.vm_page_size
,
7400 (unsigned long long) server
.vm_pages
,
7401 (unsigned long long) server
.vm_stats_used_pages
,
7402 (unsigned long long) server
.vm_stats_swapped_objects
,
7403 (unsigned long long) server
.vm_stats_swapins
,
7404 (unsigned long long) server
.vm_stats_swapouts
,
7405 (unsigned long) listLength(server
.io_newjobs
),
7406 (unsigned long) listLength(server
.io_processing
),
7407 (unsigned long) listLength(server
.io_processed
),
7408 (unsigned long) server
.io_active_threads
,
7409 (unsigned long) server
.vm_blocked_clients
7413 for (j
= 0; j
< server
.dbnum
; j
++) {
7414 long long keys
, vkeys
;
7416 keys
= dictSize(server
.db
[j
].dict
);
7417 vkeys
= dictSize(server
.db
[j
].expires
);
7418 if (keys
|| vkeys
) {
7419 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7426 static void infoCommand(redisClient
*c
) {
7427 sds info
= genRedisInfoString();
7428 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7429 (unsigned long)sdslen(info
)));
7430 addReplySds(c
,info
);
7431 addReply(c
,shared
.crlf
);
7434 static void monitorCommand(redisClient
*c
) {
7435 /* ignore MONITOR if aleady slave or in monitor mode */
7436 if (c
->flags
& REDIS_SLAVE
) return;
7438 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7440 listAddNodeTail(server
.monitors
,c
);
7441 addReply(c
,shared
.ok
);
7444 /* ================================= Expire ================================= */
7445 static int removeExpire(redisDb
*db
, robj
*key
) {
7446 if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) {
7453 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7454 sds copy
= sdsdup(key
->ptr
);
7455 if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) {
7463 /* Return the expire time of the specified key, or -1 if no expire
7464 * is associated with this key (i.e. the key is non volatile) */
7465 static time_t getExpire(redisDb
*db
, robj
*key
) {
7468 /* No expire? return ASAP */
7469 if (dictSize(db
->expires
) == 0 ||
7470 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1;
7472 return (time_t) dictGetEntryVal(de
);
7475 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7479 /* No expire? return ASAP */
7480 if (dictSize(db
->expires
) == 0 ||
7481 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
7483 /* Lookup the expire */
7484 when
= (time_t) dictGetEntryVal(de
);
7485 if (time(NULL
) <= when
) return 0;
7487 /* Delete the key */
7489 server
.stat_expiredkeys
++;
7493 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7496 /* No expire? return ASAP */
7497 if (dictSize(db
->expires
) == 0 ||
7498 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
7500 /* Delete the key */
7502 server
.stat_expiredkeys
++;
7503 dictDelete(db
->expires
,key
->ptr
);
7504 return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
;
7507 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7511 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7515 de
= dictFind(c
->db
->dict
,key
->ptr
);
7517 addReply(c
,shared
.czero
);
7521 if (dbDelete(c
->db
,key
)) server
.dirty
++;
7522 addReply(c
, shared
.cone
);
7525 time_t when
= time(NULL
)+seconds
;
7526 if (setExpire(c
->db
,key
,when
)) {
7527 addReply(c
,shared
.cone
);
7530 addReply(c
,shared
.czero
);
7536 static void expireCommand(redisClient
*c
) {
7537 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7540 static void expireatCommand(redisClient
*c
) {
7541 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7544 static void ttlCommand(redisClient
*c
) {
7548 expire
= getExpire(c
->db
,c
->argv
[1]);
7550 ttl
= (int) (expire
-time(NULL
));
7551 if (ttl
< 0) ttl
= -1;
7553 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7556 /* ================================ MULTI/EXEC ============================== */
7558 /* Client state initialization for MULTI/EXEC */
7559 static void initClientMultiState(redisClient
*c
) {
7560 c
->mstate
.commands
= NULL
;
7561 c
->mstate
.count
= 0;
7564 /* Release all the resources associated with MULTI/EXEC state */
7565 static void freeClientMultiState(redisClient
*c
) {
7568 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7570 multiCmd
*mc
= c
->mstate
.commands
+j
;
7572 for (i
= 0; i
< mc
->argc
; i
++)
7573 decrRefCount(mc
->argv
[i
]);
7576 zfree(c
->mstate
.commands
);
7579 /* Add a new command into the MULTI commands queue */
7580 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7584 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7585 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7586 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7589 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7590 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7591 for (j
= 0; j
< c
->argc
; j
++)
7592 incrRefCount(mc
->argv
[j
]);
7596 static void multiCommand(redisClient
*c
) {
7597 if (c
->flags
& REDIS_MULTI
) {
7598 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7601 c
->flags
|= REDIS_MULTI
;
7602 addReply(c
,shared
.ok
);
7605 static void discardCommand(redisClient
*c
) {
7606 if (!(c
->flags
& REDIS_MULTI
)) {
7607 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7611 freeClientMultiState(c
);
7612 initClientMultiState(c
);
7613 c
->flags
&= (~REDIS_MULTI
);
7614 addReply(c
,shared
.ok
);
7617 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7618 * implememntation for more information. */
7619 static void execCommandReplicateMulti(redisClient
*c
) {
7620 struct redisCommand
*cmd
;
7621 robj
*multistring
= createStringObject("MULTI",5);
7623 cmd
= lookupCommand("multi");
7624 if (server
.appendonly
)
7625 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7626 if (listLength(server
.slaves
))
7627 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7628 decrRefCount(multistring
);
7631 static void execCommand(redisClient
*c
) {
7636 if (!(c
->flags
& REDIS_MULTI
)) {
7637 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7641 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7642 * A failed EXEC will return a multi bulk nil object. */
7643 if (c
->flags
& REDIS_DIRTY_CAS
) {
7644 freeClientMultiState(c
);
7645 initClientMultiState(c
);
7646 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7648 addReply(c
,shared
.nullmultibulk
);
7652 /* Replicate a MULTI request now that we are sure the block is executed.
7653 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7654 * both the AOF and the replication link will have the same consistency
7655 * and atomicity guarantees. */
7656 execCommandReplicateMulti(c
);
7658 /* Exec all the queued commands */
7659 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7660 orig_argv
= c
->argv
;
7661 orig_argc
= c
->argc
;
7662 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7663 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7664 c
->argc
= c
->mstate
.commands
[j
].argc
;
7665 c
->argv
= c
->mstate
.commands
[j
].argv
;
7666 call(c
,c
->mstate
.commands
[j
].cmd
);
7668 c
->argv
= orig_argv
;
7669 c
->argc
= orig_argc
;
7670 freeClientMultiState(c
);
7671 initClientMultiState(c
);
7672 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7673 /* Make sure the EXEC command is always replicated / AOF, since we
7674 * always send the MULTI command (we can't know beforehand if the
7675 * next operations will contain at least a modification to the DB). */
7679 /* =========================== Blocking Operations ========================= */
7681 /* Currently Redis blocking operations support is limited to list POP ops,
7682 * so the current implementation is not fully generic, but it is also not
7683 * completely specific so it will not require a rewrite to support new
7684 * kind of blocking operations in the future.
7686 * Still it's important to note that list blocking operations can be already
7687 * used as a notification mechanism in order to implement other blocking
7688 * operations at application level, so there must be a very strong evidence
7689 * of usefulness and generality before new blocking operations are implemented.
7691 * This is how the current blocking POP works, we use BLPOP as example:
7692 * - If the user calls BLPOP and the key exists and contains a non empty list
7693 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7694 * if there is not to block.
7695 * - If instead BLPOP is called and the key does not exists or the list is
7696 * empty we need to block. In order to do so we remove the notification for
7697 * new data to read in the client socket (so that we'll not serve new
7698 * requests if the blocking request is not served). Also we put the client
7699 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7700 * blocking for this keys.
7701 * - If a PUSH operation against a key with blocked clients waiting is
7702 * performed, we serve the first in the list: basically instead to push
7703 * the new element inside the list we return it to the (first / oldest)
7704 * blocking client, unblock the client, and remove it form the list.
7706 * The above comment and the source code should be enough in order to understand
7707 * the implementation and modify / fix it later.
7710 /* Set a client in blocking mode for the specified key, with the specified
7712 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7717 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7718 c
->blocking_keys_num
= numkeys
;
7719 c
->blockingto
= timeout
;
7720 for (j
= 0; j
< numkeys
; j
++) {
7721 /* Add the key in the client structure, to map clients -> keys */
7722 c
->blocking_keys
[j
] = keys
[j
];
7723 incrRefCount(keys
[j
]);
7725 /* And in the other "side", to map keys -> clients */
7726 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7730 /* For every key we take a list of clients blocked for it */
7732 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7733 incrRefCount(keys
[j
]);
7734 assert(retval
== DICT_OK
);
7736 l
= dictGetEntryVal(de
);
7738 listAddNodeTail(l
,c
);
7740 /* Mark the client as a blocked client */
7741 c
->flags
|= REDIS_BLOCKED
;
7742 server
.blpop_blocked_clients
++;
7745 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7746 static void unblockClientWaitingData(redisClient
*c
) {
7751 assert(c
->blocking_keys
!= NULL
);
7752 /* The client may wait for multiple keys, so unblock it for every key. */
7753 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7754 /* Remove this client from the list of clients waiting for this key. */
7755 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7757 l
= dictGetEntryVal(de
);
7758 listDelNode(l
,listSearchKey(l
,c
));
7759 /* If the list is empty we need to remove it to avoid wasting memory */
7760 if (listLength(l
) == 0)
7761 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7762 decrRefCount(c
->blocking_keys
[j
]);
7764 /* Cleanup the client structure */
7765 zfree(c
->blocking_keys
);
7766 c
->blocking_keys
= NULL
;
7767 c
->flags
&= (~REDIS_BLOCKED
);
7768 server
.blpop_blocked_clients
--;
7769 /* We want to process data if there is some command waiting
7770 * in the input buffer. Note that this is safe even if
7771 * unblockClientWaitingData() gets called from freeClient() because
7772 * freeClient() will be smart enough to call this function
7773 * *after* c->querybuf was set to NULL. */
7774 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7777 /* This should be called from any function PUSHing into lists.
7778 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7779 * 'ele' is the element pushed.
7781 * If the function returns 0 there was no client waiting for a list push
7784 * If the function returns 1 there was a client waiting for a list push
7785 * against this key, the element was passed to this client thus it's not
7786 * needed to actually add it to the list and the caller should return asap. */
7787 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7788 struct dictEntry
*de
;
7789 redisClient
*receiver
;
7793 de
= dictFind(c
->db
->blocking_keys
,key
);
7794 if (de
== NULL
) return 0;
7795 l
= dictGetEntryVal(de
);
7798 receiver
= ln
->value
;
7800 addReplySds(receiver
,sdsnew("*2\r\n"));
7801 addReplyBulk(receiver
,key
);
7802 addReplyBulk(receiver
,ele
);
7803 unblockClientWaitingData(receiver
);
7807 /* Blocking RPOP/LPOP */
7808 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7813 for (j
= 1; j
< c
->argc
-1; j
++) {
7814 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7816 if (o
->type
!= REDIS_LIST
) {
7817 addReply(c
,shared
.wrongtypeerr
);
7820 list
*list
= o
->ptr
;
7821 if (listLength(list
) != 0) {
7822 /* If the list contains elements fall back to the usual
7823 * non-blocking POP operation */
7824 robj
*argv
[2], **orig_argv
;
7827 /* We need to alter the command arguments before to call
7828 * popGenericCommand() as the command takes a single key. */
7829 orig_argv
= c
->argv
;
7830 orig_argc
= c
->argc
;
7831 argv
[1] = c
->argv
[j
];
7835 /* Also the return value is different, we need to output
7836 * the multi bulk reply header and the key name. The
7837 * "real" command will add the last element (the value)
7838 * for us. If this souds like an hack to you it's just
7839 * because it is... */
7840 addReplySds(c
,sdsnew("*2\r\n"));
7841 addReplyBulk(c
,argv
[1]);
7842 popGenericCommand(c
,where
);
7844 /* Fix the client structure with the original stuff */
7845 c
->argv
= orig_argv
;
7846 c
->argc
= orig_argc
;
7852 /* If the list is empty or the key does not exists we must block */
7853 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7854 if (timeout
> 0) timeout
+= time(NULL
);
7855 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7858 static void blpopCommand(redisClient
*c
) {
7859 blockingPopGenericCommand(c
,REDIS_HEAD
);
7862 static void brpopCommand(redisClient
*c
) {
7863 blockingPopGenericCommand(c
,REDIS_TAIL
);
7866 /* =============================== Replication ============================= */
7868 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7869 ssize_t nwritten
, ret
= size
;
7870 time_t start
= time(NULL
);
7874 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7875 nwritten
= write(fd
,ptr
,size
);
7876 if (nwritten
== -1) return -1;
7880 if ((time(NULL
)-start
) > timeout
) {
7888 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7889 ssize_t nread
, totread
= 0;
7890 time_t start
= time(NULL
);
7894 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7895 nread
= read(fd
,ptr
,size
);
7896 if (nread
== -1) return -1;
7901 if ((time(NULL
)-start
) > timeout
) {
7909 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7916 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7919 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7930 static void syncCommand(redisClient
*c
) {
7931 /* ignore SYNC if aleady slave or in monitor mode */
7932 if (c
->flags
& REDIS_SLAVE
) return;
7934 /* SYNC can't be issued when the server has pending data to send to
7935 * the client about already issued commands. We need a fresh reply
7936 * buffer registering the differences between the BGSAVE and the current
7937 * dataset, so that we can copy to other slaves if needed. */
7938 if (listLength(c
->reply
) != 0) {
7939 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7943 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7944 /* Here we need to check if there is a background saving operation
7945 * in progress, or if it is required to start one */
7946 if (server
.bgsavechildpid
!= -1) {
7947 /* Ok a background save is in progress. Let's check if it is a good
7948 * one for replication, i.e. if there is another slave that is
7949 * registering differences since the server forked to save */
7954 listRewind(server
.slaves
,&li
);
7955 while((ln
= listNext(&li
))) {
7957 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7960 /* Perfect, the server is already registering differences for
7961 * another slave. Set the right state, and copy the buffer. */
7962 listRelease(c
->reply
);
7963 c
->reply
= listDup(slave
->reply
);
7964 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7965 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7967 /* No way, we need to wait for the next BGSAVE in order to
7968 * register differences */
7969 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7970 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7973 /* Ok we don't have a BGSAVE in progress, let's start one */
7974 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7975 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7976 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7977 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7980 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7983 c
->flags
|= REDIS_SLAVE
;
7985 listAddNodeTail(server
.slaves
,c
);
7989 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7990 redisClient
*slave
= privdata
;
7992 REDIS_NOTUSED(mask
);
7993 char buf
[REDIS_IOBUF_LEN
];
7994 ssize_t nwritten
, buflen
;
7996 if (slave
->repldboff
== 0) {
7997 /* Write the bulk write count before to transfer the DB. In theory here
7998 * we don't know how much room there is in the output buffer of the
7999 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
8000 * operations) will never be smaller than the few bytes we need. */
8003 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
8005 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
8013 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
8014 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
8016 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
8017 (buflen
== 0) ? "premature EOF" : strerror(errno
));
8021 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
8022 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
8027 slave
->repldboff
+= nwritten
;
8028 if (slave
->repldboff
== slave
->repldbsize
) {
8029 close(slave
->repldbfd
);
8030 slave
->repldbfd
= -1;
8031 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8032 slave
->replstate
= REDIS_REPL_ONLINE
;
8033 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
8034 sendReplyToClient
, slave
) == AE_ERR
) {
8038 addReplySds(slave
,sdsempty());
8039 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
8043 /* This function is called at the end of every backgrond saving.
8044 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
8045 * otherwise REDIS_ERR is passed to the function.
8047 * The goal of this function is to handle slaves waiting for a successful
8048 * background saving in order to perform non-blocking synchronization. */
8049 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
8051 int startbgsave
= 0;
8054 listRewind(server
.slaves
,&li
);
8055 while((ln
= listNext(&li
))) {
8056 redisClient
*slave
= ln
->value
;
8058 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8060 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8061 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8062 struct redis_stat buf
;
8064 if (bgsaveerr
!= REDIS_OK
) {
8066 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8069 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8070 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8072 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8075 slave
->repldboff
= 0;
8076 slave
->repldbsize
= buf
.st_size
;
8077 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8078 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8079 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8086 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8089 listRewind(server
.slaves
,&li
);
8090 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8091 while((ln
= listNext(&li
))) {
8092 redisClient
*slave
= ln
->value
;
8094 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8101 static int syncWithMaster(void) {
8102 char buf
[1024], tmpfile
[256], authcmd
[1024];
8104 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8105 int dfd
, maxtries
= 5;
8108 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8113 /* AUTH with the master if required. */
8114 if(server
.masterauth
) {
8115 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8116 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8118 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8122 /* Read the AUTH result. */
8123 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8125 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8129 if (buf
[0] != '+') {
8131 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8136 /* Issue the SYNC command */
8137 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8139 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8143 /* Read the bulk write count */
8144 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8146 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8150 if (buf
[0] != '$') {
8152 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8155 dumpsize
= strtol(buf
+1,NULL
,10);
8156 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8157 /* Read the bulk write data on a temp file */
8159 snprintf(tmpfile
,256,
8160 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8161 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8162 if (dfd
!= -1) break;
8167 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8171 int nread
, nwritten
;
8173 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8175 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8181 nwritten
= write(dfd
,buf
,nread
);
8182 if (nwritten
== -1) {
8183 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8191 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8192 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8198 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8199 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8203 server
.master
= createClient(fd
);
8204 server
.master
->flags
|= REDIS_MASTER
;
8205 server
.master
->authenticated
= 1;
8206 server
.replstate
= REDIS_REPL_CONNECTED
;
8210 static void slaveofCommand(redisClient
*c
) {
8211 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8212 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8213 if (server
.masterhost
) {
8214 sdsfree(server
.masterhost
);
8215 server
.masterhost
= NULL
;
8216 if (server
.master
) freeClient(server
.master
);
8217 server
.replstate
= REDIS_REPL_NONE
;
8218 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8221 sdsfree(server
.masterhost
);
8222 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8223 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8224 if (server
.master
) freeClient(server
.master
);
8225 server
.replstate
= REDIS_REPL_CONNECT
;
8226 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8227 server
.masterhost
, server
.masterport
);
8229 addReply(c
,shared
.ok
);
8232 /* ============================ Maxmemory directive ======================== */
8234 /* Try to free one object form the pre-allocated objects free list.
8235 * This is useful under low mem conditions as by default we take 1 million
8236 * free objects allocated. On success REDIS_OK is returned, otherwise
8238 static int tryFreeOneObjectFromFreelist(void) {
8241 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8242 if (listLength(server
.objfreelist
)) {
8243 listNode
*head
= listFirst(server
.objfreelist
);
8244 o
= listNodeValue(head
);
8245 listDelNode(server
.objfreelist
,head
);
8246 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8250 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8255 /* This function gets called when 'maxmemory' is set on the config file to limit
8256 * the max memory used by the server, and we are out of memory.
8257 * This function will try to, in order:
8259 * - Free objects from the free list
8260 * - Try to remove keys with an EXPIRE set
8262 * It is not possible to free enough memory to reach used-memory < maxmemory
8263 * the server will start refusing commands that will enlarge even more the
8266 static void freeMemoryIfNeeded(void) {
8267 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8268 int j
, k
, freed
= 0;
8270 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8271 for (j
= 0; j
< server
.dbnum
; j
++) {
8273 robj
*minkey
= NULL
;
8274 struct dictEntry
*de
;
8276 if (dictSize(server
.db
[j
].expires
)) {
8278 /* From a sample of three keys drop the one nearest to
8279 * the natural expire */
8280 for (k
= 0; k
< 3; k
++) {
8283 de
= dictGetRandomKey(server
.db
[j
].expires
);
8284 t
= (time_t) dictGetEntryVal(de
);
8285 if (minttl
== -1 || t
< minttl
) {
8286 minkey
= dictGetEntryKey(de
);
8290 dbDelete(server
.db
+j
,minkey
);
8293 if (!freed
) return; /* nothing to free... */
8297 /* ============================== Append Only file ========================== */
8299 /* Called when the user switches from "appendonly yes" to "appendonly no"
8300 * at runtime using the CONFIG command. */
8301 static void stopAppendOnly(void) {
8302 flushAppendOnlyFile();
8303 aof_fsync(server
.appendfd
);
8304 close(server
.appendfd
);
8306 server
.appendfd
= -1;
8307 server
.appendseldb
= -1;
8308 server
.appendonly
= 0;
8309 /* rewrite operation in progress? kill it, wait child exit */
8310 if (server
.bgsavechildpid
!= -1) {
8313 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8314 wait3(&statloc
,0,NULL
);
8315 /* reset the buffer accumulating changes while the child saves */
8316 sdsfree(server
.bgrewritebuf
);
8317 server
.bgrewritebuf
= sdsempty();
8318 server
.bgsavechildpid
= -1;
8322 /* Called when the user switches from "appendonly no" to "appendonly yes"
8323 * at runtime using the CONFIG command. */
8324 static int startAppendOnly(void) {
8325 server
.appendonly
= 1;
8326 server
.lastfsync
= time(NULL
);
8327 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8328 if (server
.appendfd
== -1) {
8329 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8332 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8333 server
.appendonly
= 0;
8334 close(server
.appendfd
);
8335 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8341 /* Write the append only file buffer on disk.
8343 * Since we are required to write the AOF before replying to the client,
8344 * and the only way the client socket can get a write is entering when the
8345 * the event loop, we accumulate all the AOF writes in a memory
8346 * buffer and write it on disk using this function just before entering
8347 * the event loop again. */
8348 static void flushAppendOnlyFile(void) {
8352 if (sdslen(server
.aofbuf
) == 0) return;
8354 /* We want to perform a single write. This should be guaranteed atomic
8355 * at least if the filesystem we are writing is a real physical one.
8356 * While this will save us against the server being killed I don't think
8357 * there is much to do about the whole server stopping for power problems
8359 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8360 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8361 /* Ooops, we are in troubles. The best thing to do for now is
8362 * aborting instead of giving the illusion that everything is
8363 * working as expected. */
8364 if (nwritten
== -1) {
8365 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8367 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8371 sdsfree(server
.aofbuf
);
8372 server
.aofbuf
= sdsempty();
8374 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8375 * childs performing heavy I/O on disk. */
8376 if (server
.no_appendfsync_on_rewrite
&&
8377 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8379 /* Fsync if needed */
8381 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8382 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8383 now
-server
.lastfsync
> 1))
8385 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8386 * flushing metadata. */
8387 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8388 server
.lastfsync
= now
;
8392 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8394 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8395 for (j
= 0; j
< argc
; j
++) {
8396 robj
*o
= getDecodedObject(argv
[j
]);
8397 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8398 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8399 buf
= sdscatlen(buf
,"\r\n",2);
8405 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8410 /* Make sure we can use strtol */
8411 seconds
= getDecodedObject(seconds
);
8412 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8413 decrRefCount(seconds
);
8415 argv
[0] = createStringObject("EXPIREAT",8);
8417 argv
[2] = createObject(REDIS_STRING
,
8418 sdscatprintf(sdsempty(),"%ld",when
));
8419 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8420 decrRefCount(argv
[0]);
8421 decrRefCount(argv
[2]);
8425 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8426 sds buf
= sdsempty();
8429 /* The DB this command was targetting is not the same as the last command
8430 * we appendend. To issue a SELECT command is needed. */
8431 if (dictid
!= server
.appendseldb
) {
8434 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8435 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8436 (unsigned long)strlen(seldb
),seldb
);
8437 server
.appendseldb
= dictid
;
8440 if (cmd
->proc
== expireCommand
) {
8441 /* Translate EXPIRE into EXPIREAT */
8442 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8443 } else if (cmd
->proc
== setexCommand
) {
8444 /* Translate SETEX to SET and EXPIREAT */
8445 tmpargv
[0] = createStringObject("SET",3);
8446 tmpargv
[1] = argv
[1];
8447 tmpargv
[2] = argv
[3];
8448 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8449 decrRefCount(tmpargv
[0]);
8450 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8452 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8455 /* Append to the AOF buffer. This will be flushed on disk just before
8456 * of re-entering the event loop, so before the client will get a
8457 * positive reply about the operation performed. */
8458 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8460 /* If a background append only file rewriting is in progress we want to
8461 * accumulate the differences between the child DB and the current one
8462 * in a buffer, so that when the child process will do its work we
8463 * can append the differences to the new append only file. */
8464 if (server
.bgrewritechildpid
!= -1)
8465 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8470 /* In Redis commands are always executed in the context of a client, so in
8471 * order to load the append only file we need to create a fake client. */
8472 static struct redisClient
*createFakeClient(void) {
8473 struct redisClient
*c
= zmalloc(sizeof(*c
));
8477 c
->querybuf
= sdsempty();
8481 /* We set the fake client as a slave waiting for the synchronization
8482 * so that Redis will not try to send replies to this client. */
8483 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8484 c
->reply
= listCreate();
8485 listSetFreeMethod(c
->reply
,decrRefCount
);
8486 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8487 initClientMultiState(c
);
8491 static void freeFakeClient(struct redisClient
*c
) {
8492 sdsfree(c
->querybuf
);
8493 listRelease(c
->reply
);
8494 freeClientMultiState(c
);
8498 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8499 * error (the append only file is zero-length) REDIS_ERR is returned. On
8500 * fatal error an error message is logged and the program exists. */
8501 int loadAppendOnlyFile(char *filename
) {
8502 struct redisClient
*fakeClient
;
8503 FILE *fp
= fopen(filename
,"r");
8504 struct redis_stat sb
;
8505 int appendonly
= server
.appendonly
;
8507 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8511 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8515 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8516 * to the same file we're about to read. */
8517 server
.appendonly
= 0;
8519 fakeClient
= createFakeClient();
8526 struct redisCommand
*cmd
;
8529 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8535 if (buf
[0] != '*') goto fmterr
;
8537 argv
= zmalloc(sizeof(robj
*)*argc
);
8538 for (j
= 0; j
< argc
; j
++) {
8539 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8540 if (buf
[0] != '$') goto fmterr
;
8541 len
= strtol(buf
+1,NULL
,10);
8542 argsds
= sdsnewlen(NULL
,len
);
8543 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8544 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8545 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8548 /* Command lookup */
8549 cmd
= lookupCommand(argv
[0]->ptr
);
8551 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8554 /* Try object encoding */
8555 if (cmd
->flags
& REDIS_CMD_BULK
)
8556 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8557 /* Run the command in the context of a fake client */
8558 fakeClient
->argc
= argc
;
8559 fakeClient
->argv
= argv
;
8560 cmd
->proc(fakeClient
);
8561 /* Discard the reply objects list from the fake client */
8562 while(listLength(fakeClient
->reply
))
8563 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8564 /* Clean up, ready for the next command */
8565 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8567 /* Handle swapping while loading big datasets when VM is on */
8569 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
8572 if (server
.vm_enabled
&& force_swapout
) {
8573 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8574 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8579 /* This point can only be reached when EOF is reached without errors.
8580 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8581 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8584 freeFakeClient(fakeClient
);
8585 server
.appendonly
= appendonly
;
8590 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8592 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8596 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8600 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8601 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8605 /* Avoid the incr/decr ref count business if possible to help
8606 * copy-on-write (we are often in a child process when this function
8608 * Also makes sure that key objects don't get incrRefCount-ed when VM
8610 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8611 obj
= getDecodedObject(obj
);
8614 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8615 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8616 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8618 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8619 if (decrrc
) decrRefCount(obj
);
8622 if (decrrc
) decrRefCount(obj
);
8626 /* Write binary-safe string into a file in the bulkformat
8627 * $<count>\r\n<payload>\r\n */
8628 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8631 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8632 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8633 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8634 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8638 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8639 static int fwriteBulkDouble(FILE *fp
, double d
) {
8640 char buf
[128], dbuf
[128];
8642 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8643 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8644 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8645 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8649 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8650 static int fwriteBulkLong(FILE *fp
, long l
) {
8651 char buf
[128], lbuf
[128];
8653 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8654 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8655 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8656 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8660 /* Write a sequence of commands able to fully rebuild the dataset into
8661 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8662 static int rewriteAppendOnlyFile(char *filename
) {
8663 dictIterator
*di
= NULL
;
8668 time_t now
= time(NULL
);
8670 /* Note that we have to use a different temp name here compared to the
8671 * one used by rewriteAppendOnlyFileBackground() function. */
8672 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8673 fp
= fopen(tmpfile
,"w");
8675 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8678 for (j
= 0; j
< server
.dbnum
; j
++) {
8679 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8680 redisDb
*db
= server
.db
+j
;
8682 if (dictSize(d
) == 0) continue;
8683 di
= dictGetIterator(d
);
8689 /* SELECT the new DB */
8690 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8691 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8693 /* Iterate this DB writing every entry */
8694 while((de
= dictNext(di
)) != NULL
) {
8695 sds keystr
= dictGetEntryKey(de
);
8700 keystr
= dictGetEntryKey(de
);
8701 o
= dictGetEntryVal(de
);
8702 initStaticStringObject(key
,keystr
);
8703 /* If the value for this key is swapped, load a preview in memory.
8704 * We use a "swapped" flag to remember if we need to free the
8705 * value object instead to just increment the ref count anyway
8706 * in order to avoid copy-on-write of pages if we are forked() */
8707 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
8708 o
->storage
== REDIS_VM_SWAPPING
) {
8711 o
= vmPreviewObject(o
);
8714 expiretime
= getExpire(db
,&key
);
8716 /* Save the key and associated value */
8717 if (o
->type
== REDIS_STRING
) {
8718 /* Emit a SET command */
8719 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8720 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8722 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8723 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8724 } else if (o
->type
== REDIS_LIST
) {
8725 /* Emit the RPUSHes needed to rebuild the list */
8726 list
*list
= o
->ptr
;
8730 listRewind(list
,&li
);
8731 while((ln
= listNext(&li
))) {
8732 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8733 robj
*eleobj
= listNodeValue(ln
);
8735 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8736 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8737 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8739 } else if (o
->type
== REDIS_SET
) {
8740 /* Emit the SADDs needed to rebuild the set */
8742 dictIterator
*di
= dictGetIterator(set
);
8745 while((de
= dictNext(di
)) != NULL
) {
8746 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8747 robj
*eleobj
= dictGetEntryKey(de
);
8749 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8750 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8751 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8753 dictReleaseIterator(di
);
8754 } else if (o
->type
== REDIS_ZSET
) {
8755 /* Emit the ZADDs needed to rebuild the sorted set */
8757 dictIterator
*di
= dictGetIterator(zs
->dict
);
8760 while((de
= dictNext(di
)) != NULL
) {
8761 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8762 robj
*eleobj
= dictGetEntryKey(de
);
8763 double *score
= dictGetEntryVal(de
);
8765 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8766 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8767 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8768 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8770 dictReleaseIterator(di
);
8771 } else if (o
->type
== REDIS_HASH
) {
8772 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8774 /* Emit the HSETs needed to rebuild the hash */
8775 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8776 unsigned char *p
= zipmapRewind(o
->ptr
);
8777 unsigned char *field
, *val
;
8778 unsigned int flen
, vlen
;
8780 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8781 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8782 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8783 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8785 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8789 dictIterator
*di
= dictGetIterator(o
->ptr
);
8792 while((de
= dictNext(di
)) != NULL
) {
8793 robj
*field
= dictGetEntryKey(de
);
8794 robj
*val
= dictGetEntryVal(de
);
8796 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8797 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8798 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8799 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8801 dictReleaseIterator(di
);
8804 redisPanic("Unknown object type");
8806 /* Save the expire time */
8807 if (expiretime
!= -1) {
8808 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8809 /* If this key is already expired skip it */
8810 if (expiretime
< now
) continue;
8811 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8812 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8813 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8815 if (swapped
) decrRefCount(o
);
8817 dictReleaseIterator(di
);
8820 /* Make sure data will not remain on the OS's output buffers */
8822 aof_fsync(fileno(fp
));
8825 /* Use RENAME to make sure the DB file is changed atomically only
8826 * if the generate DB file is ok. */
8827 if (rename(tmpfile
,filename
) == -1) {
8828 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8832 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8838 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8839 if (di
) dictReleaseIterator(di
);
8843 /* This is how rewriting of the append only file in background works:
8845 * 1) The user calls BGREWRITEAOF
8846 * 2) Redis calls this function, that forks():
8847 * 2a) the child rewrite the append only file in a temp file.
8848 * 2b) the parent accumulates differences in server.bgrewritebuf.
8849 * 3) When the child finished '2a' exists.
8850 * 4) The parent will trap the exit code, if it's OK, will append the
8851 * data accumulated into server.bgrewritebuf into the temp file, and
8852 * finally will rename(2) the temp file in the actual file name.
8853 * The the new file is reopened as the new append only file. Profit!
8855 static int rewriteAppendOnlyFileBackground(void) {
8858 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8859 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8860 if ((childpid
= fork()) == 0) {
8864 if (server
.vm_enabled
) vmReopenSwapFile();
8866 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8867 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8874 if (childpid
== -1) {
8875 redisLog(REDIS_WARNING
,
8876 "Can't rewrite append only file in background: fork: %s",
8880 redisLog(REDIS_NOTICE
,
8881 "Background append only file rewriting started by pid %d",childpid
);
8882 server
.bgrewritechildpid
= childpid
;
8883 updateDictResizePolicy();
8884 /* We set appendseldb to -1 in order to force the next call to the
8885 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8886 * accumulated by the parent into server.bgrewritebuf will start
8887 * with a SELECT statement and it will be safe to merge. */
8888 server
.appendseldb
= -1;
8891 return REDIS_OK
; /* unreached */
8894 static void bgrewriteaofCommand(redisClient
*c
) {
8895 if (server
.bgrewritechildpid
!= -1) {
8896 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8899 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8900 char *status
= "+Background append only file rewriting started\r\n";
8901 addReplySds(c
,sdsnew(status
));
8903 addReply(c
,shared
.err
);
8907 static void aofRemoveTempFile(pid_t childpid
) {
8910 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8914 /* Virtual Memory is composed mainly of two subsystems:
8915 * - Blocking Virutal Memory
8916 * - Threaded Virtual Memory I/O
8917 * The two parts are not fully decoupled, but functions are split among two
8918 * different sections of the source code (delimited by comments) in order to
8919 * make more clear what functionality is about the blocking VM and what about
8920 * the threaded (not blocking) VM.
8924 * Redis VM is a blocking VM (one that blocks reading swapped values from
8925 * disk into memory when a value swapped out is needed in memory) that is made
8926 * unblocking by trying to examine the command argument vector in order to
8927 * load in background values that will likely be needed in order to exec
8928 * the command. The command is executed only once all the relevant keys
8929 * are loaded into memory.
8931 * This basically is almost as simple of a blocking VM, but almost as parallel
8932 * as a fully non-blocking VM.
8935 /* =================== Virtual Memory - Blocking Side ====================== */
8937 /* Create a VM pointer object. This kind of objects are used in place of
8938 * values in the key -> value hash table, for swapped out objects. */
8939 static vmpointer
*createVmPointer(int vtype
) {
8940 vmpointer
*vp
= zmalloc(sizeof(vmpointer
));
8942 vp
->type
= REDIS_VMPOINTER
;
8943 vp
->storage
= REDIS_VM_SWAPPED
;
8948 static void vmInit(void) {
8954 if (server
.vm_max_threads
!= 0)
8955 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8957 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8958 /* Try to open the old swap file, otherwise create it */
8959 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8960 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8962 if (server
.vm_fp
== NULL
) {
8963 redisLog(REDIS_WARNING
,
8964 "Can't open the swap file: %s. Exiting.",
8968 server
.vm_fd
= fileno(server
.vm_fp
);
8969 /* Lock the swap file for writing, this is useful in order to avoid
8970 * another instance to use the same swap file for a config error. */
8971 fl
.l_type
= F_WRLCK
;
8972 fl
.l_whence
= SEEK_SET
;
8973 fl
.l_start
= fl
.l_len
= 0;
8974 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
8975 redisLog(REDIS_WARNING
,
8976 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
8980 server
.vm_next_page
= 0;
8981 server
.vm_near_pages
= 0;
8982 server
.vm_stats_used_pages
= 0;
8983 server
.vm_stats_swapped_objects
= 0;
8984 server
.vm_stats_swapouts
= 0;
8985 server
.vm_stats_swapins
= 0;
8986 totsize
= server
.vm_pages
*server
.vm_page_size
;
8987 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8988 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8989 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8993 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8995 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8996 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8997 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8998 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
9000 /* Initialize threaded I/O (used by Virtual Memory) */
9001 server
.io_newjobs
= listCreate();
9002 server
.io_processing
= listCreate();
9003 server
.io_processed
= listCreate();
9004 server
.io_ready_clients
= listCreate();
9005 pthread_mutex_init(&server
.io_mutex
,NULL
);
9006 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
9007 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
9008 server
.io_active_threads
= 0;
9009 if (pipe(pipefds
) == -1) {
9010 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
9014 server
.io_ready_pipe_read
= pipefds
[0];
9015 server
.io_ready_pipe_write
= pipefds
[1];
9016 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
9017 /* LZF requires a lot of stack */
9018 pthread_attr_init(&server
.io_threads_attr
);
9019 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
9020 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
9021 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
9022 /* Listen for events in the threaded I/O pipe */
9023 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
9024 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
9025 oom("creating file event");
9028 /* Mark the page as used */
9029 static void vmMarkPageUsed(off_t page
) {
9030 off_t byte
= page
/8;
9032 redisAssert(vmFreePage(page
) == 1);
9033 server
.vm_bitmap
[byte
] |= 1<<bit
;
9036 /* Mark N contiguous pages as used, with 'page' being the first. */
9037 static void vmMarkPagesUsed(off_t page
, off_t count
) {
9040 for (j
= 0; j
< count
; j
++)
9041 vmMarkPageUsed(page
+j
);
9042 server
.vm_stats_used_pages
+= count
;
9043 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
9044 (long long)count
, (long long)page
);
9047 /* Mark the page as free */
9048 static void vmMarkPageFree(off_t page
) {
9049 off_t byte
= page
/8;
9051 redisAssert(vmFreePage(page
) == 0);
9052 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
9055 /* Mark N contiguous pages as free, with 'page' being the first. */
9056 static void vmMarkPagesFree(off_t page
, off_t count
) {
9059 for (j
= 0; j
< count
; j
++)
9060 vmMarkPageFree(page
+j
);
9061 server
.vm_stats_used_pages
-= count
;
9062 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
9063 (long long)count
, (long long)page
);
9066 /* Test if the page is free */
9067 static int vmFreePage(off_t page
) {
9068 off_t byte
= page
/8;
9070 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9073 /* Find N contiguous free pages storing the first page of the cluster in *first.
9074 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9075 * REDIS_ERR is returned.
9077 * This function uses a simple algorithm: we try to allocate
9078 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9079 * again from the start of the swap file searching for free spaces.
9081 * If it looks pretty clear that there are no free pages near our offset
9082 * we try to find less populated places doing a forward jump of
9083 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9084 * without hurry, and then we jump again and so forth...
9086 * This function can be improved using a free list to avoid to guess
9087 * too much, since we could collect data about freed pages.
9089 * note: I implemented this function just after watching an episode of
9090 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9092 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9093 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9095 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9096 server
.vm_near_pages
= 0;
9097 server
.vm_next_page
= 0;
9099 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9100 base
= server
.vm_next_page
;
9102 while(offset
< server
.vm_pages
) {
9103 off_t
this = base
+offset
;
9105 /* If we overflow, restart from page zero */
9106 if (this >= server
.vm_pages
) {
9107 this -= server
.vm_pages
;
9109 /* Just overflowed, what we found on tail is no longer
9110 * interesting, as it's no longer contiguous. */
9114 if (vmFreePage(this)) {
9115 /* This is a free page */
9117 /* Already got N free pages? Return to the caller, with success */
9119 *first
= this-(n
-1);
9120 server
.vm_next_page
= this+1;
9121 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9125 /* The current one is not a free page */
9129 /* Fast-forward if the current page is not free and we already
9130 * searched enough near this place. */
9132 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9133 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9135 /* Note that even if we rewind after the jump, we are don't need
9136 * to make sure numfree is set to zero as we only jump *if* it
9137 * is set to zero. */
9139 /* Otherwise just check the next page */
9146 /* Write the specified object at the specified page of the swap file */
9147 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9148 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9149 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9150 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9151 redisLog(REDIS_WARNING
,
9152 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9156 rdbSaveObject(server
.vm_fp
,o
);
9157 fflush(server
.vm_fp
);
9158 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9162 /* Transfers the 'val' object to disk. Store all the information
9163 * a 'vmpointer' object containing all the information needed to load the
9164 * object back later is returned.
9166 * If we can't find enough contiguous empty pages to swap the object on disk
9167 * NULL is returned. */
9168 static vmpointer
*vmSwapObjectBlocking(robj
*val
) {
9169 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9173 assert(val
->storage
== REDIS_VM_MEMORY
);
9174 assert(val
->refcount
== 1);
9175 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
;
9176 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
;
9178 vp
= createVmPointer(val
->type
);
9180 vp
->usedpages
= pages
;
9181 decrRefCount(val
); /* Deallocate the object from memory. */
9182 vmMarkPagesUsed(page
,pages
);
9183 redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)",
9185 (unsigned long long) page
, (unsigned long long) pages
);
9186 server
.vm_stats_swapped_objects
++;
9187 server
.vm_stats_swapouts
++;
9191 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9194 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9195 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9196 redisLog(REDIS_WARNING
,
9197 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9201 o
= rdbLoadObject(type
,server
.vm_fp
);
9203 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9206 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9210 /* Load the specified object from swap to memory.
9211 * The newly allocated object is returned.
9213 * If preview is true the unserialized object is returned to the caller but
9214 * the pages are not marked as freed, nor the vp object is freed. */
9215 static robj
*vmGenericLoadObject(vmpointer
*vp
, int preview
) {
9218 redisAssert(vp
->type
== REDIS_VMPOINTER
&&
9219 (vp
->storage
== REDIS_VM_SWAPPED
|| vp
->storage
== REDIS_VM_LOADING
));
9220 val
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
);
9222 redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
);
9223 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9225 server
.vm_stats_swapped_objects
--;
9227 redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
);
9229 server
.vm_stats_swapins
++;
9233 /* Plain object loading, from swap to memory.
9235 * 'o' is actually a redisVmPointer structure that will be freed by the call.
9236 * The return value is the loaded object. */
9237 static robj
*vmLoadObject(robj
*o
) {
9238 /* If we are loading the object in background, stop it, we
9239 * need to load this object synchronously ASAP. */
9240 if (o
->storage
== REDIS_VM_LOADING
)
9241 vmCancelThreadedIOJob(o
);
9242 return vmGenericLoadObject((vmpointer
*)o
,0);
9245 /* Just load the value on disk, without to modify the key.
9246 * This is useful when we want to perform some operation on the value
9247 * without to really bring it from swap to memory, like while saving the
9248 * dataset or rewriting the append only log. */
9249 static robj
*vmPreviewObject(robj
*o
) {
9250 return vmGenericLoadObject((vmpointer
*)o
,1);
9253 /* How a good candidate is this object for swapping?
9254 * The better candidate it is, the greater the returned value.
9256 * Currently we try to perform a fast estimation of the object size in
9257 * memory, and combine it with aging informations.
9259 * Basically swappability = idle-time * log(estimated size)
9261 * Bigger objects are preferred over smaller objects, but not
9262 * proportionally, this is why we use the logarithm. This algorithm is
9263 * just a first try and will probably be tuned later. */
9264 static double computeObjectSwappability(robj
*o
) {
9265 /* actual age can be >= minage, but not < minage. As we use wrapping
9266 * 21 bit clocks with minutes resolution for the LRU. */
9267 time_t minage
= abs(server
.lruclock
- o
->lru
);
9271 struct dictEntry
*de
;
9274 if (minage
<= 0) return 0;
9277 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9280 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9285 listNode
*ln
= listFirst(l
);
9287 asize
= sizeof(list
);
9289 robj
*ele
= ln
->value
;
9292 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9293 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9294 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9299 z
= (o
->type
== REDIS_ZSET
);
9300 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9302 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9303 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9308 de
= dictGetRandomKey(d
);
9309 ele
= dictGetEntryKey(de
);
9310 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9311 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9312 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9313 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9317 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9318 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9319 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9320 unsigned int klen
, vlen
;
9321 unsigned char *key
, *val
;
9323 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9327 asize
= len
*(klen
+vlen
+3);
9328 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9330 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9335 de
= dictGetRandomKey(d
);
9336 ele
= dictGetEntryKey(de
);
9337 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9338 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9339 ele
= dictGetEntryVal(de
);
9340 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9341 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9342 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9347 return (double)minage
*log(1+asize
);
9350 /* Try to swap an object that's a good candidate for swapping.
9351 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9352 * to swap any object at all.
9354 * If 'usethreaded' is true, Redis will try to swap the object in background
9355 * using I/O threads. */
9356 static int vmSwapOneObject(int usethreads
) {
9358 struct dictEntry
*best
= NULL
;
9359 double best_swappability
= 0;
9360 redisDb
*best_db
= NULL
;
9364 for (j
= 0; j
< server
.dbnum
; j
++) {
9365 redisDb
*db
= server
.db
+j
;
9366 /* Why maxtries is set to 100?
9367 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9368 * are swappable objects */
9371 if (dictSize(db
->dict
) == 0) continue;
9372 for (i
= 0; i
< 5; i
++) {
9374 double swappability
;
9376 if (maxtries
) maxtries
--;
9377 de
= dictGetRandomKey(db
->dict
);
9378 val
= dictGetEntryVal(de
);
9379 /* Only swap objects that are currently in memory.
9381 * Also don't swap shared objects: not a good idea in general and
9382 * we need to ensure that the main thread does not touch the
9383 * object while the I/O thread is using it, but we can't
9384 * control other keys without adding additional mutex. */
9385 if (val
->storage
!= REDIS_VM_MEMORY
|| val
->refcount
!= 1) {
9386 if (maxtries
) i
--; /* don't count this try */
9389 swappability
= computeObjectSwappability(val
);
9390 if (!best
|| swappability
> best_swappability
) {
9392 best_swappability
= swappability
;
9397 if (best
== NULL
) return REDIS_ERR
;
9398 key
= dictGetEntryKey(best
);
9399 val
= dictGetEntryVal(best
);
9401 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9402 key
, best_swappability
);
9406 vmSwapObjectThreaded(createStringObject(key
,sdslen(key
)),val
,best_db
);
9411 if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
9412 dictGetEntryVal(best
) = vp
;
9420 static int vmSwapOneObjectBlocking() {
9421 return vmSwapOneObject(0);
9424 static int vmSwapOneObjectThreaded() {
9425 return vmSwapOneObject(1);
9428 /* Return true if it's safe to swap out objects in a given moment.
9429 * Basically we don't want to swap objects out while there is a BGSAVE
9430 * or a BGAEOREWRITE running in backgroud. */
9431 static int vmCanSwapOut(void) {
9432 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9435 /* =================== Virtual Memory - Threaded I/O ======================= */
9437 static void freeIOJob(iojob
*j
) {
9438 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9439 j
->type
== REDIS_IOJOB_DO_SWAP
||
9440 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9442 /* we fix the storage type, otherwise decrRefCount() will try to
9443 * kill the I/O thread Job (that does no longer exists). */
9444 if (j
->val
->storage
== REDIS_VM_SWAPPING
)
9445 j
->val
->storage
= REDIS_VM_MEMORY
;
9446 decrRefCount(j
->val
);
9448 decrRefCount(j
->key
);
9452 /* Every time a thread finished a Job, it writes a byte into the write side
9453 * of an unix pipe in order to "awake" the main thread, and this function
9455 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9459 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9461 REDIS_NOTUSED(mask
);
9462 REDIS_NOTUSED(privdata
);
9464 /* For every byte we read in the read side of the pipe, there is one
9465 * I/O job completed to process. */
9466 while((retval
= read(fd
,buf
,1)) == 1) {
9469 struct dictEntry
*de
;
9471 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9473 /* Get the processed element (the oldest one) */
9475 assert(listLength(server
.io_processed
) != 0);
9476 if (toprocess
== -1) {
9477 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9478 if (toprocess
<= 0) toprocess
= 1;
9480 ln
= listFirst(server
.io_processed
);
9482 listDelNode(server
.io_processed
,ln
);
9484 /* If this job is marked as canceled, just ignore it */
9489 /* Post process it in the main thread, as there are things we
9490 * can do just here to avoid race conditions and/or invasive locks */
9491 redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
);
9492 de
= dictFind(j
->db
->dict
,j
->key
->ptr
);
9493 redisAssert(de
!= NULL
);
9494 if (j
->type
== REDIS_IOJOB_LOAD
) {
9496 vmpointer
*vp
= dictGetEntryVal(de
);
9498 /* Key loaded, bring it at home */
9499 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9500 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9501 (unsigned char*) j
->key
->ptr
);
9502 server
.vm_stats_swapped_objects
--;
9503 server
.vm_stats_swapins
++;
9504 dictGetEntryVal(de
) = j
->val
;
9505 incrRefCount(j
->val
);
9507 /* Handle clients waiting for this key to be loaded. */
9508 handleClientsBlockedOnSwappedKey(db
,j
->key
);
9511 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9512 /* Now we know the amount of pages required to swap this object.
9513 * Let's find some space for it, and queue this task again
9514 * rebranded as REDIS_IOJOB_DO_SWAP. */
9515 if (!vmCanSwapOut() ||
9516 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9518 /* Ooops... no space or we can't swap as there is
9519 * a fork()ed Redis trying to save stuff on disk. */
9520 j
->val
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9523 /* Note that we need to mark this pages as used now,
9524 * if the job will be canceled, we'll mark them as freed
9526 vmMarkPagesUsed(j
->page
,j
->pages
);
9527 j
->type
= REDIS_IOJOB_DO_SWAP
;
9532 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9535 /* Key swapped. We can finally free some memory. */
9536 if (j
->val
->storage
!= REDIS_VM_SWAPPING
) {
9537 vmpointer
*vp
= (vmpointer
*) j
->id
;
9538 printf("storage: %d\n",vp
->storage
);
9539 printf("key->name: %s\n",(char*)j
->key
->ptr
);
9540 printf("val: %p\n",(void*)j
->val
);
9541 printf("val->type: %d\n",j
->val
->type
);
9542 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9544 redisAssert(j
->val
->storage
== REDIS_VM_SWAPPING
);
9545 vp
= createVmPointer(j
->val
->type
);
9547 vp
->usedpages
= j
->pages
;
9548 dictGetEntryVal(de
) = vp
;
9549 /* Fix the storage otherwise decrRefCount will attempt to
9550 * remove the associated I/O job */
9551 j
->val
->storage
= REDIS_VM_MEMORY
;
9552 decrRefCount(j
->val
);
9553 redisLog(REDIS_DEBUG
,
9554 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9555 (unsigned char*) j
->key
->ptr
,
9556 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9557 server
.vm_stats_swapped_objects
++;
9558 server
.vm_stats_swapouts
++;
9560 /* Put a few more swap requests in queue if we are still
9562 if (trytoswap
&& vmCanSwapOut() &&
9563 zmalloc_used_memory() > server
.vm_max_memory
)
9568 more
= listLength(server
.io_newjobs
) <
9569 (unsigned) server
.vm_max_threads
;
9571 /* Don't waste CPU time if swappable objects are rare. */
9572 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9580 if (processed
== toprocess
) return;
9582 if (retval
< 0 && errno
!= EAGAIN
) {
9583 redisLog(REDIS_WARNING
,
9584 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9589 static void lockThreadedIO(void) {
9590 pthread_mutex_lock(&server
.io_mutex
);
9593 static void unlockThreadedIO(void) {
9594 pthread_mutex_unlock(&server
.io_mutex
);
9597 /* Remove the specified object from the threaded I/O queue if still not
9598 * processed, otherwise make sure to flag it as canceled. */
9599 static void vmCancelThreadedIOJob(robj
*o
) {
9601 server
.io_newjobs
, /* 0 */
9602 server
.io_processing
, /* 1 */
9603 server
.io_processed
/* 2 */
9607 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9610 /* Search for a matching object in one of the queues */
9611 for (i
= 0; i
< 3; i
++) {
9615 listRewind(lists
[i
],&li
);
9616 while ((ln
= listNext(&li
)) != NULL
) {
9617 iojob
*job
= ln
->value
;
9619 if (job
->canceled
) continue; /* Skip this, already canceled. */
9621 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
9622 (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
);
9623 /* Mark the pages as free since the swap didn't happened
9624 * or happened but is now discarded. */
9625 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9626 vmMarkPagesFree(job
->page
,job
->pages
);
9627 /* Cancel the job. It depends on the list the job is
9630 case 0: /* io_newjobs */
9631 /* If the job was yet not processed the best thing to do
9632 * is to remove it from the queue at all */
9634 listDelNode(lists
[i
],ln
);
9636 case 1: /* io_processing */
9637 /* Oh Shi- the thread is messing with the Job:
9639 * Probably it's accessing the object if this is a
9640 * PREPARE_SWAP or DO_SWAP job.
9641 * If it's a LOAD job it may be reading from disk and
9642 * if we don't wait for the job to terminate before to
9643 * cancel it, maybe in a few microseconds data can be
9644 * corrupted in this pages. So the short story is:
9646 * Better to wait for the job to move into the
9647 * next queue (processed)... */
9649 /* We try again and again until the job is completed. */
9651 /* But let's wait some time for the I/O thread
9652 * to finish with this job. After all this condition
9653 * should be very rare. */
9656 case 2: /* io_processed */
9657 /* The job was already processed, that's easy...
9658 * just mark it as canceled so that we'll ignore it
9659 * when processing completed jobs. */
9663 /* Finally we have to adjust the storage type of the object
9664 * in order to "UNDO" the operaiton. */
9665 if (o
->storage
== REDIS_VM_LOADING
)
9666 o
->storage
= REDIS_VM_SWAPPED
;
9667 else if (o
->storage
== REDIS_VM_SWAPPING
)
9668 o
->storage
= REDIS_VM_MEMORY
;
9670 redisLog(REDIS_DEBUG
,"*** DONE");
9676 printf("Not found: %p\n", (void*)o
);
9677 redisAssert(1 != 1); /* We should never reach this */
9680 static void *IOThreadEntryPoint(void *arg
) {
9685 pthread_detach(pthread_self());
9687 /* Get a new job to process */
9689 if (listLength(server
.io_newjobs
) == 0) {
9690 /* No new jobs in queue, exit. */
9691 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9692 (long) pthread_self());
9693 server
.io_active_threads
--;
9697 ln
= listFirst(server
.io_newjobs
);
9699 listDelNode(server
.io_newjobs
,ln
);
9700 /* Add the job in the processing queue */
9701 j
->thread
= pthread_self();
9702 listAddNodeTail(server
.io_processing
,j
);
9703 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9705 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9706 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9708 /* Process the Job */
9709 if (j
->type
== REDIS_IOJOB_LOAD
) {
9710 vmpointer
*vp
= (vmpointer
*)j
->id
;
9711 j
->val
= vmReadObjectFromSwap(j
->page
,vp
->vtype
);
9712 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9713 FILE *fp
= fopen("/dev/null","w+");
9714 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9716 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9717 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9721 /* Done: insert the job into the processed queue */
9722 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9723 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9725 listDelNode(server
.io_processing
,ln
);
9726 listAddNodeTail(server
.io_processed
,j
);
9729 /* Signal the main thread there is new stuff to process */
9730 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9732 return NULL
; /* never reached */
9735 static void spawnIOThread(void) {
9737 sigset_t mask
, omask
;
9741 sigaddset(&mask
,SIGCHLD
);
9742 sigaddset(&mask
,SIGHUP
);
9743 sigaddset(&mask
,SIGPIPE
);
9744 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9745 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9746 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9750 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9751 server
.io_active_threads
++;
9754 /* We need to wait for the last thread to exit before we are able to
9755 * fork() in order to BGSAVE or BGREWRITEAOF. */
9756 static void waitEmptyIOJobsQueue(void) {
9758 int io_processed_len
;
9761 if (listLength(server
.io_newjobs
) == 0 &&
9762 listLength(server
.io_processing
) == 0 &&
9763 server
.io_active_threads
== 0)
9768 /* While waiting for empty jobs queue condition we post-process some
9769 * finshed job, as I/O threads may be hanging trying to write against
9770 * the io_ready_pipe_write FD but there are so much pending jobs that
9772 io_processed_len
= listLength(server
.io_processed
);
9774 if (io_processed_len
) {
9775 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9776 usleep(1000); /* 1 millisecond */
9778 usleep(10000); /* 10 milliseconds */
9783 static void vmReopenSwapFile(void) {
9784 /* Note: we don't close the old one as we are in the child process
9785 * and don't want to mess at all with the original file object. */
9786 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9787 if (server
.vm_fp
== NULL
) {
9788 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9789 server
.vm_swap_file
);
9792 server
.vm_fd
= fileno(server
.vm_fp
);
9795 /* This function must be called while with threaded IO locked */
9796 static void queueIOJob(iojob
*j
) {
9797 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9798 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9799 listAddNodeTail(server
.io_newjobs
,j
);
9800 if (server
.io_active_threads
< server
.vm_max_threads
)
9804 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9807 j
= zmalloc(sizeof(*j
));
9808 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9812 j
->id
= j
->val
= val
;
9815 j
->thread
= (pthread_t
) -1;
9816 val
->storage
= REDIS_VM_SWAPPING
;
9824 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9826 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9827 * If there is not already a job loading the key, it is craeted.
9828 * The key is added to the io_keys list in the client structure, and also
9829 * in the hash table mapping swapped keys to waiting clients, that is,
9830 * server.io_waited_keys. */
9831 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9832 struct dictEntry
*de
;
9836 /* If the key does not exist or is already in RAM we don't need to
9837 * block the client at all. */
9838 de
= dictFind(c
->db
->dict
,key
->ptr
);
9839 if (de
== NULL
) return 0;
9840 o
= dictGetEntryVal(de
);
9841 if (o
->storage
== REDIS_VM_MEMORY
) {
9843 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9844 /* We were swapping the key, undo it! */
9845 vmCancelThreadedIOJob(o
);
9849 /* OK: the key is either swapped, or being loaded just now. */
9851 /* Add the key to the list of keys this client is waiting for.
9852 * This maps clients to keys they are waiting for. */
9853 listAddNodeTail(c
->io_keys
,key
);
9856 /* Add the client to the swapped keys => clients waiting map. */
9857 de
= dictFind(c
->db
->io_keys
,key
);
9861 /* For every key we take a list of clients blocked for it */
9863 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9865 assert(retval
== DICT_OK
);
9867 l
= dictGetEntryVal(de
);
9869 listAddNodeTail(l
,c
);
9871 /* Are we already loading the key from disk? If not create a job */
9872 if (o
->storage
== REDIS_VM_SWAPPED
) {
9874 vmpointer
*vp
= (vmpointer
*)o
;
9876 o
->storage
= REDIS_VM_LOADING
;
9877 j
= zmalloc(sizeof(*j
));
9878 j
->type
= REDIS_IOJOB_LOAD
;
9886 j
->thread
= (pthread_t
) -1;
9894 /* Preload keys for any command with first, last and step values for
9895 * the command keys prototype, as defined in the command table. */
9896 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9898 if (cmd
->vm_firstkey
== 0) return;
9899 last
= cmd
->vm_lastkey
;
9900 if (last
< 0) last
= argc
+last
;
9901 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
9902 redisAssert(j
< argc
);
9903 waitForSwappedKey(c
,argv
[j
]);
9907 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
9908 * Note that the number of keys to preload is user-defined, so we need to
9909 * apply a sanity check against argc. */
9910 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9914 num
= atoi(argv
[2]->ptr
);
9915 if (num
> (argc
-3)) return;
9916 for (i
= 0; i
< num
; i
++) {
9917 waitForSwappedKey(c
,argv
[3+i
]);
9921 /* Preload keys needed to execute the entire MULTI/EXEC block.
9923 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9924 * and will block the client when any command requires a swapped out value. */
9925 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
9927 struct redisCommand
*mcmd
;
9930 REDIS_NOTUSED(argc
);
9931 REDIS_NOTUSED(argv
);
9933 if (!(c
->flags
& REDIS_MULTI
)) return;
9934 for (i
= 0; i
< c
->mstate
.count
; i
++) {
9935 mcmd
= c
->mstate
.commands
[i
].cmd
;
9936 margc
= c
->mstate
.commands
[i
].argc
;
9937 margv
= c
->mstate
.commands
[i
].argv
;
9939 if (mcmd
->vm_preload_proc
!= NULL
) {
9940 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
9942 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
9947 /* Is this client attempting to run a command against swapped keys?
9948 * If so, block it ASAP, load the keys in background, then resume it.
9950 * The important idea about this function is that it can fail! If keys will
9951 * still be swapped when the client is resumed, this key lookups will
9952 * just block loading keys from disk. In practical terms this should only
9953 * happen with SORT BY command or if there is a bug in this function.
9955 * Return 1 if the client is marked as blocked, 0 if the client can
9956 * continue as the keys it is going to access appear to be in memory. */
9957 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
9958 if (cmd
->vm_preload_proc
!= NULL
) {
9959 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
9961 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
9964 /* If the client was blocked for at least one key, mark it as blocked. */
9965 if (listLength(c
->io_keys
)) {
9966 c
->flags
|= REDIS_IO_WAIT
;
9967 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9968 server
.vm_blocked_clients
++;
9975 /* Remove the 'key' from the list of blocked keys for a given client.
9977 * The function returns 1 when there are no longer blocking keys after
9978 * the current one was removed (and the client can be unblocked). */
9979 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9983 struct dictEntry
*de
;
9985 /* Remove the key from the list of keys this client is waiting for. */
9986 listRewind(c
->io_keys
,&li
);
9987 while ((ln
= listNext(&li
)) != NULL
) {
9988 if (equalStringObjects(ln
->value
,key
)) {
9989 listDelNode(c
->io_keys
,ln
);
9995 /* Remove the client form the key => waiting clients map. */
9996 de
= dictFind(c
->db
->io_keys
,key
);
9998 l
= dictGetEntryVal(de
);
9999 ln
= listSearchKey(l
,c
);
10000 assert(ln
!= NULL
);
10002 if (listLength(l
) == 0)
10003 dictDelete(c
->db
->io_keys
,key
);
10005 return listLength(c
->io_keys
) == 0;
10008 /* Every time we now a key was loaded back in memory, we handle clients
10009 * waiting for this key if any. */
10010 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
10011 struct dictEntry
*de
;
10016 de
= dictFind(db
->io_keys
,key
);
10019 l
= dictGetEntryVal(de
);
10020 len
= listLength(l
);
10021 /* Note: we can't use something like while(listLength(l)) as the list
10022 * can be freed by the calling function when we remove the last element. */
10025 redisClient
*c
= ln
->value
;
10027 if (dontWaitForSwappedKey(c
,key
)) {
10028 /* Put the client in the list of clients ready to go as we
10029 * loaded all the keys about it. */
10030 listAddNodeTail(server
.io_ready_clients
,c
);
10035 /* =========================== Remote Configuration ========================= */
10037 static void configSetCommand(redisClient
*c
) {
10038 robj
*o
= getDecodedObject(c
->argv
[3]);
10041 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
10042 zfree(server
.dbfilename
);
10043 server
.dbfilename
= zstrdup(o
->ptr
);
10044 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
10045 zfree(server
.requirepass
);
10046 server
.requirepass
= zstrdup(o
->ptr
);
10047 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
10048 zfree(server
.masterauth
);
10049 server
.masterauth
= zstrdup(o
->ptr
);
10050 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
10051 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10052 ll
< 0) goto badfmt
;
10053 server
.maxmemory
= ll
;
10054 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
10055 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10056 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
10057 server
.maxidletime
= ll
;
10058 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
10059 if (!strcasecmp(o
->ptr
,"no")) {
10060 server
.appendfsync
= APPENDFSYNC_NO
;
10061 } else if (!strcasecmp(o
->ptr
,"everysec")) {
10062 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10063 } else if (!strcasecmp(o
->ptr
,"always")) {
10064 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10068 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10069 int yn
= yesnotoi(o
->ptr
);
10071 if (yn
== -1) goto badfmt
;
10072 server
.no_appendfsync_on_rewrite
= yn
;
10073 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10074 int old
= server
.appendonly
;
10075 int new = yesnotoi(o
->ptr
);
10077 if (new == -1) goto badfmt
;
10082 if (startAppendOnly() == REDIS_ERR
) {
10083 addReplySds(c
,sdscatprintf(sdsempty(),
10084 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10090 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10092 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10094 /* Perform sanity check before setting the new config:
10095 * - Even number of args
10096 * - Seconds >= 1, changes >= 0 */
10098 sdsfreesplitres(v
,vlen
);
10101 for (j
= 0; j
< vlen
; j
++) {
10105 val
= strtoll(v
[j
], &eptr
, 10);
10106 if (eptr
[0] != '\0' ||
10107 ((j
& 1) == 0 && val
< 1) ||
10108 ((j
& 1) == 1 && val
< 0)) {
10109 sdsfreesplitres(v
,vlen
);
10113 /* Finally set the new config */
10114 resetServerSaveParams();
10115 for (j
= 0; j
< vlen
; j
+= 2) {
10119 seconds
= strtoll(v
[j
],NULL
,10);
10120 changes
= strtoll(v
[j
+1],NULL
,10);
10121 appendServerSaveParams(seconds
, changes
);
10123 sdsfreesplitres(v
,vlen
);
10125 addReplySds(c
,sdscatprintf(sdsempty(),
10126 "-ERR not supported CONFIG parameter %s\r\n",
10127 (char*)c
->argv
[2]->ptr
));
10132 addReply(c
,shared
.ok
);
10135 badfmt
: /* Bad format errors */
10136 addReplySds(c
,sdscatprintf(sdsempty(),
10137 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10139 (char*)c
->argv
[2]->ptr
));
10143 static void configGetCommand(redisClient
*c
) {
10144 robj
*o
= getDecodedObject(c
->argv
[2]);
10145 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10146 char *pattern
= o
->ptr
;
10149 addReply(c
,lenobj
);
10150 decrRefCount(lenobj
);
10152 if (stringmatch(pattern
,"dbfilename",0)) {
10153 addReplyBulkCString(c
,"dbfilename");
10154 addReplyBulkCString(c
,server
.dbfilename
);
10157 if (stringmatch(pattern
,"requirepass",0)) {
10158 addReplyBulkCString(c
,"requirepass");
10159 addReplyBulkCString(c
,server
.requirepass
);
10162 if (stringmatch(pattern
,"masterauth",0)) {
10163 addReplyBulkCString(c
,"masterauth");
10164 addReplyBulkCString(c
,server
.masterauth
);
10167 if (stringmatch(pattern
,"maxmemory",0)) {
10170 ll2string(buf
,128,server
.maxmemory
);
10171 addReplyBulkCString(c
,"maxmemory");
10172 addReplyBulkCString(c
,buf
);
10175 if (stringmatch(pattern
,"timeout",0)) {
10178 ll2string(buf
,128,server
.maxidletime
);
10179 addReplyBulkCString(c
,"timeout");
10180 addReplyBulkCString(c
,buf
);
10183 if (stringmatch(pattern
,"appendonly",0)) {
10184 addReplyBulkCString(c
,"appendonly");
10185 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10188 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10189 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10190 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10193 if (stringmatch(pattern
,"appendfsync",0)) {
10196 switch(server
.appendfsync
) {
10197 case APPENDFSYNC_NO
: policy
= "no"; break;
10198 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10199 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10200 default: policy
= "unknown"; break; /* too harmless to panic */
10202 addReplyBulkCString(c
,"appendfsync");
10203 addReplyBulkCString(c
,policy
);
10206 if (stringmatch(pattern
,"save",0)) {
10207 sds buf
= sdsempty();
10210 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10211 buf
= sdscatprintf(buf
,"%ld %d",
10212 server
.saveparams
[j
].seconds
,
10213 server
.saveparams
[j
].changes
);
10214 if (j
!= server
.saveparamslen
-1)
10215 buf
= sdscatlen(buf
," ",1);
10217 addReplyBulkCString(c
,"save");
10218 addReplyBulkCString(c
,buf
);
10223 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10226 static void configCommand(redisClient
*c
) {
10227 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10228 if (c
->argc
!= 4) goto badarity
;
10229 configSetCommand(c
);
10230 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10231 if (c
->argc
!= 3) goto badarity
;
10232 configGetCommand(c
);
10233 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10234 if (c
->argc
!= 2) goto badarity
;
10235 server
.stat_numcommands
= 0;
10236 server
.stat_numconnections
= 0;
10237 server
.stat_expiredkeys
= 0;
10238 server
.stat_starttime
= time(NULL
);
10239 addReply(c
,shared
.ok
);
10241 addReplySds(c
,sdscatprintf(sdsempty(),
10242 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10247 addReplySds(c
,sdscatprintf(sdsempty(),
10248 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10249 (char*) c
->argv
[1]->ptr
));
10252 /* =========================== Pubsub implementation ======================== */
10254 static void freePubsubPattern(void *p
) {
10255 pubsubPattern
*pat
= p
;
10257 decrRefCount(pat
->pattern
);
10261 static int listMatchPubsubPattern(void *a
, void *b
) {
10262 pubsubPattern
*pa
= a
, *pb
= b
;
10264 return (pa
->client
== pb
->client
) &&
10265 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10268 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10269 * 0 if the client was already subscribed to that channel. */
10270 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10271 struct dictEntry
*de
;
10272 list
*clients
= NULL
;
10275 /* Add the channel to the client -> channels hash table */
10276 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10278 incrRefCount(channel
);
10279 /* Add the client to the channel -> list of clients hash table */
10280 de
= dictFind(server
.pubsub_channels
,channel
);
10282 clients
= listCreate();
10283 dictAdd(server
.pubsub_channels
,channel
,clients
);
10284 incrRefCount(channel
);
10286 clients
= dictGetEntryVal(de
);
10288 listAddNodeTail(clients
,c
);
10290 /* Notify the client */
10291 addReply(c
,shared
.mbulk3
);
10292 addReply(c
,shared
.subscribebulk
);
10293 addReplyBulk(c
,channel
);
10294 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10298 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10299 * 0 if the client was not subscribed to the specified channel. */
10300 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10301 struct dictEntry
*de
;
10306 /* Remove the channel from the client -> channels hash table */
10307 incrRefCount(channel
); /* channel may be just a pointer to the same object
10308 we have in the hash tables. Protect it... */
10309 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10311 /* Remove the client from the channel -> clients list hash table */
10312 de
= dictFind(server
.pubsub_channels
,channel
);
10313 assert(de
!= NULL
);
10314 clients
= dictGetEntryVal(de
);
10315 ln
= listSearchKey(clients
,c
);
10316 assert(ln
!= NULL
);
10317 listDelNode(clients
,ln
);
10318 if (listLength(clients
) == 0) {
10319 /* Free the list and associated hash entry at all if this was
10320 * the latest client, so that it will be possible to abuse
10321 * Redis PUBSUB creating millions of channels. */
10322 dictDelete(server
.pubsub_channels
,channel
);
10325 /* Notify the client */
10327 addReply(c
,shared
.mbulk3
);
10328 addReply(c
,shared
.unsubscribebulk
);
10329 addReplyBulk(c
,channel
);
10330 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10331 listLength(c
->pubsub_patterns
));
10334 decrRefCount(channel
); /* it is finally safe to release it */
10338 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10339 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10342 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10344 pubsubPattern
*pat
;
10345 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10346 incrRefCount(pattern
);
10347 pat
= zmalloc(sizeof(*pat
));
10348 pat
->pattern
= getDecodedObject(pattern
);
10350 listAddNodeTail(server
.pubsub_patterns
,pat
);
10352 /* Notify the client */
10353 addReply(c
,shared
.mbulk3
);
10354 addReply(c
,shared
.psubscribebulk
);
10355 addReplyBulk(c
,pattern
);
10356 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10360 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10361 * 0 if the client was not subscribed to the specified channel. */
10362 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10367 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10368 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10370 listDelNode(c
->pubsub_patterns
,ln
);
10372 pat
.pattern
= pattern
;
10373 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10374 listDelNode(server
.pubsub_patterns
,ln
);
10376 /* Notify the client */
10378 addReply(c
,shared
.mbulk3
);
10379 addReply(c
,shared
.punsubscribebulk
);
10380 addReplyBulk(c
,pattern
);
10381 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10382 listLength(c
->pubsub_patterns
));
10384 decrRefCount(pattern
);
10388 /* Unsubscribe from all the channels. Return the number of channels the
10389 * client was subscribed from. */
10390 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10391 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10395 while((de
= dictNext(di
)) != NULL
) {
10396 robj
*channel
= dictGetEntryKey(de
);
10398 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10400 dictReleaseIterator(di
);
10404 /* Unsubscribe from all the patterns. Return the number of patterns the
10405 * client was subscribed from. */
10406 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10411 listRewind(c
->pubsub_patterns
,&li
);
10412 while ((ln
= listNext(&li
)) != NULL
) {
10413 robj
*pattern
= ln
->value
;
10415 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10420 /* Publish a message */
10421 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10423 struct dictEntry
*de
;
10427 /* Send to clients listening for that channel */
10428 de
= dictFind(server
.pubsub_channels
,channel
);
10430 list
*list
= dictGetEntryVal(de
);
10434 listRewind(list
,&li
);
10435 while ((ln
= listNext(&li
)) != NULL
) {
10436 redisClient
*c
= ln
->value
;
10438 addReply(c
,shared
.mbulk3
);
10439 addReply(c
,shared
.messagebulk
);
10440 addReplyBulk(c
,channel
);
10441 addReplyBulk(c
,message
);
10445 /* Send to clients listening to matching channels */
10446 if (listLength(server
.pubsub_patterns
)) {
10447 listRewind(server
.pubsub_patterns
,&li
);
10448 channel
= getDecodedObject(channel
);
10449 while ((ln
= listNext(&li
)) != NULL
) {
10450 pubsubPattern
*pat
= ln
->value
;
10452 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10453 sdslen(pat
->pattern
->ptr
),
10454 (char*)channel
->ptr
,
10455 sdslen(channel
->ptr
),0)) {
10456 addReply(pat
->client
,shared
.mbulk4
);
10457 addReply(pat
->client
,shared
.pmessagebulk
);
10458 addReplyBulk(pat
->client
,pat
->pattern
);
10459 addReplyBulk(pat
->client
,channel
);
10460 addReplyBulk(pat
->client
,message
);
10464 decrRefCount(channel
);
10469 static void subscribeCommand(redisClient
*c
) {
10472 for (j
= 1; j
< c
->argc
; j
++)
10473 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10476 static void unsubscribeCommand(redisClient
*c
) {
10477 if (c
->argc
== 1) {
10478 pubsubUnsubscribeAllChannels(c
,1);
10483 for (j
= 1; j
< c
->argc
; j
++)
10484 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10488 static void psubscribeCommand(redisClient
*c
) {
10491 for (j
= 1; j
< c
->argc
; j
++)
10492 pubsubSubscribePattern(c
,c
->argv
[j
]);
10495 static void punsubscribeCommand(redisClient
*c
) {
10496 if (c
->argc
== 1) {
10497 pubsubUnsubscribeAllPatterns(c
,1);
10502 for (j
= 1; j
< c
->argc
; j
++)
10503 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10507 static void publishCommand(redisClient
*c
) {
10508 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10509 addReplyLongLong(c
,receivers
);
10512 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10514 * The implementation uses a per-DB hash table mapping keys to list of clients
10515 * WATCHing those keys, so that given a key that is going to be modified
10516 * we can mark all the associated clients as dirty.
10518 * Also every client contains a list of WATCHed keys so that's possible to
10519 * un-watch such keys when the client is freed or when UNWATCH is called. */
10521 /* In the client->watched_keys list we need to use watchedKey structures
10522 * as in order to identify a key in Redis we need both the key name and the
10524 typedef struct watchedKey
{
10529 /* Watch for the specified key */
10530 static void watchForKey(redisClient
*c
, robj
*key
) {
10531 list
*clients
= NULL
;
10536 /* Check if we are already watching for this key */
10537 listRewind(c
->watched_keys
,&li
);
10538 while((ln
= listNext(&li
))) {
10539 wk
= listNodeValue(ln
);
10540 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10541 return; /* Key already watched */
10543 /* This key is not already watched in this DB. Let's add it */
10544 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10546 clients
= listCreate();
10547 dictAdd(c
->db
->watched_keys
,key
,clients
);
10550 listAddNodeTail(clients
,c
);
10551 /* Add the new key to the lits of keys watched by this client */
10552 wk
= zmalloc(sizeof(*wk
));
10556 listAddNodeTail(c
->watched_keys
,wk
);
10559 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10560 * flag is up to the caller. */
10561 static void unwatchAllKeys(redisClient
*c
) {
10565 if (listLength(c
->watched_keys
) == 0) return;
10566 listRewind(c
->watched_keys
,&li
);
10567 while((ln
= listNext(&li
))) {
10571 /* Lookup the watched key -> clients list and remove the client
10573 wk
= listNodeValue(ln
);
10574 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10575 assert(clients
!= NULL
);
10576 listDelNode(clients
,listSearchKey(clients
,c
));
10577 /* Kill the entry at all if this was the only client */
10578 if (listLength(clients
) == 0)
10579 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10580 /* Remove this watched key from the client->watched list */
10581 listDelNode(c
->watched_keys
,ln
);
10582 decrRefCount(wk
->key
);
10587 /* "Touch" a key, so that if this key is being WATCHed by some client the
10588 * next EXEC will fail. */
10589 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10594 if (dictSize(db
->watched_keys
) == 0) return;
10595 clients
= dictFetchValue(db
->watched_keys
, key
);
10596 if (!clients
) return;
10598 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10599 /* Check if we are already watching for this key */
10600 listRewind(clients
,&li
);
10601 while((ln
= listNext(&li
))) {
10602 redisClient
*c
= listNodeValue(ln
);
10604 c
->flags
|= REDIS_DIRTY_CAS
;
10608 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10609 * flush but will be deleted as effect of the flushing operation should
10610 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10611 * a FLUSHALL operation (all the DBs flushed). */
10612 static void touchWatchedKeysOnFlush(int dbid
) {
10616 /* For every client, check all the waited keys */
10617 listRewind(server
.clients
,&li1
);
10618 while((ln
= listNext(&li1
))) {
10619 redisClient
*c
= listNodeValue(ln
);
10620 listRewind(c
->watched_keys
,&li2
);
10621 while((ln
= listNext(&li2
))) {
10622 watchedKey
*wk
= listNodeValue(ln
);
10624 /* For every watched key matching the specified DB, if the
10625 * key exists, mark the client as dirty, as the key will be
10627 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10628 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
)
10629 c
->flags
|= REDIS_DIRTY_CAS
;
10635 static void watchCommand(redisClient
*c
) {
10638 if (c
->flags
& REDIS_MULTI
) {
10639 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10642 for (j
= 1; j
< c
->argc
; j
++)
10643 watchForKey(c
,c
->argv
[j
]);
10644 addReply(c
,shared
.ok
);
10647 static void unwatchCommand(redisClient
*c
) {
10649 c
->flags
&= (~REDIS_DIRTY_CAS
);
10650 addReply(c
,shared
.ok
);
10653 /* ================================= Debugging ============================== */
10655 /* Compute the sha1 of string at 's' with 'len' bytes long.
10656 * The SHA1 is then xored againt the string pointed by digest.
10657 * Since xor is commutative, this operation is used in order to
10658 * "add" digests relative to unordered elements.
10660 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10661 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10663 unsigned char hash
[20], *s
= ptr
;
10667 SHA1Update(&ctx
,s
,len
);
10668 SHA1Final(hash
,&ctx
);
10670 for (j
= 0; j
< 20; j
++)
10671 digest
[j
] ^= hash
[j
];
10674 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10675 o
= getDecodedObject(o
);
10676 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10680 /* This function instead of just computing the SHA1 and xoring it
10681 * against diget, also perform the digest of "digest" itself and
10682 * replace the old value with the new one.
10684 * So the final digest will be:
10686 * digest = SHA1(digest xor SHA1(data))
10688 * This function is used every time we want to preserve the order so
10689 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10691 * Also note that mixdigest("foo") followed by mixdigest("bar")
10692 * will lead to a different digest compared to "fo", "obar".
10694 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10698 xorDigest(digest
,s
,len
);
10700 SHA1Update(&ctx
,digest
,20);
10701 SHA1Final(digest
,&ctx
);
10704 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10705 o
= getDecodedObject(o
);
10706 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10710 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10711 * are not ordered, we use a trick: every aggregate digest is the xor
10712 * of the digests of their elements. This way the order will not change
10713 * the result. For list instead we use a feedback entering the output digest
10714 * as input in order to ensure that a different ordered list will result in
10715 * a different digest. */
10716 static void computeDatasetDigest(unsigned char *final
) {
10717 unsigned char digest
[20];
10719 dictIterator
*di
= NULL
;
10724 memset(final
,0,20); /* Start with a clean result */
10726 for (j
= 0; j
< server
.dbnum
; j
++) {
10727 redisDb
*db
= server
.db
+j
;
10729 if (dictSize(db
->dict
) == 0) continue;
10730 di
= dictGetIterator(db
->dict
);
10732 /* hash the DB id, so the same dataset moved in a different
10733 * DB will lead to a different digest */
10735 mixDigest(final
,&aux
,sizeof(aux
));
10737 /* Iterate this DB writing every entry */
10738 while((de
= dictNext(di
)) != NULL
) {
10743 memset(digest
,0,20); /* This key-val digest */
10744 key
= dictGetEntryKey(de
);
10745 keyobj
= createStringObject(key
,sdslen(key
));
10747 mixDigest(digest
,key
,sdslen(key
));
10749 /* Make sure the key is loaded if VM is active */
10750 o
= lookupKeyRead(db
,keyobj
);
10752 aux
= htonl(o
->type
);
10753 mixDigest(digest
,&aux
,sizeof(aux
));
10754 expiretime
= getExpire(db
,keyobj
);
10756 /* Save the key and associated value */
10757 if (o
->type
== REDIS_STRING
) {
10758 mixObjectDigest(digest
,o
);
10759 } else if (o
->type
== REDIS_LIST
) {
10760 list
*list
= o
->ptr
;
10764 listRewind(list
,&li
);
10765 while((ln
= listNext(&li
))) {
10766 robj
*eleobj
= listNodeValue(ln
);
10768 mixObjectDigest(digest
,eleobj
);
10770 } else if (o
->type
== REDIS_SET
) {
10771 dict
*set
= o
->ptr
;
10772 dictIterator
*di
= dictGetIterator(set
);
10775 while((de
= dictNext(di
)) != NULL
) {
10776 robj
*eleobj
= dictGetEntryKey(de
);
10778 xorObjectDigest(digest
,eleobj
);
10780 dictReleaseIterator(di
);
10781 } else if (o
->type
== REDIS_ZSET
) {
10783 dictIterator
*di
= dictGetIterator(zs
->dict
);
10786 while((de
= dictNext(di
)) != NULL
) {
10787 robj
*eleobj
= dictGetEntryKey(de
);
10788 double *score
= dictGetEntryVal(de
);
10789 unsigned char eledigest
[20];
10791 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
10792 memset(eledigest
,0,20);
10793 mixObjectDigest(eledigest
,eleobj
);
10794 mixDigest(eledigest
,buf
,strlen(buf
));
10795 xorDigest(digest
,eledigest
,20);
10797 dictReleaseIterator(di
);
10798 } else if (o
->type
== REDIS_HASH
) {
10802 hi
= hashInitIterator(o
);
10803 while (hashNext(hi
) != REDIS_ERR
) {
10804 unsigned char eledigest
[20];
10806 memset(eledigest
,0,20);
10807 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
10808 mixObjectDigest(eledigest
,obj
);
10810 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
10811 mixObjectDigest(eledigest
,obj
);
10813 xorDigest(digest
,eledigest
,20);
10815 hashReleaseIterator(hi
);
10817 redisPanic("Unknown object type");
10819 /* If the key has an expire, add it to the mix */
10820 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
10821 /* We can finally xor the key-val digest to the final digest */
10822 xorDigest(final
,digest
,20);
10823 decrRefCount(keyobj
);
10825 dictReleaseIterator(di
);
10829 static void debugCommand(redisClient
*c
) {
10830 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
10831 *((char*)-1) = 'x';
10832 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
10833 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
10834 addReply(c
,shared
.err
);
10838 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
10839 addReply(c
,shared
.err
);
10842 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
10843 addReply(c
,shared
.ok
);
10844 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
10846 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
10847 addReply(c
,shared
.err
);
10850 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
10851 addReply(c
,shared
.ok
);
10852 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
10853 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
10857 addReply(c
,shared
.nokeyerr
);
10860 val
= dictGetEntryVal(de
);
10861 if (!server
.vm_enabled
|| (val
->storage
== REDIS_VM_MEMORY
||
10862 val
->storage
== REDIS_VM_SWAPPING
)) {
10866 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
10867 strenc
= strencoding
[val
->encoding
];
10869 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10872 addReplySds(c
,sdscatprintf(sdsempty(),
10873 "+Value at:%p refcount:%d "
10874 "encoding:%s serializedlength:%lld\r\n",
10875 (void*)val
, val
->refcount
,
10876 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10878 vmpointer
*vp
= (vmpointer
*) val
;
10879 addReplySds(c
,sdscatprintf(sdsempty(),
10880 "+Value swapped at: page %llu "
10881 "using %llu pages\r\n",
10882 (unsigned long long) vp
->page
,
10883 (unsigned long long) vp
->usedpages
));
10885 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10886 lookupKeyRead(c
->db
,c
->argv
[2]);
10887 addReply(c
,shared
.ok
);
10888 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10889 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
10893 if (!server
.vm_enabled
) {
10894 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10898 addReply(c
,shared
.nokeyerr
);
10901 val
= dictGetEntryVal(de
);
10903 if (val
->storage
!= REDIS_VM_MEMORY
) {
10904 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10905 } else if (val
->refcount
!= 1) {
10906 addReplySds(c
,sdsnew("-ERR Object is shared\r\n"));
10907 } else if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
10908 dictGetEntryVal(de
) = vp
;
10909 addReply(c
,shared
.ok
);
10911 addReply(c
,shared
.err
);
10913 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
10918 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
10920 for (j
= 0; j
< keys
; j
++) {
10921 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
10922 key
= createStringObject(buf
,strlen(buf
));
10923 if (lookupKeyRead(c
->db
,key
) != NULL
) {
10927 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
10928 val
= createStringObject(buf
,strlen(buf
));
10929 dbAdd(c
->db
,key
,val
);
10932 addReply(c
,shared
.ok
);
10933 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
10934 unsigned char digest
[20];
10935 sds d
= sdsnew("+");
10938 computeDatasetDigest(digest
);
10939 for (j
= 0; j
< 20; j
++)
10940 d
= sdscatprintf(d
, "%02x",digest
[j
]);
10942 d
= sdscatlen(d
,"\r\n",2);
10945 addReplySds(c
,sdsnew(
10946 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10950 static void _redisAssert(char *estr
, char *file
, int line
) {
10951 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10952 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
10953 #ifdef HAVE_BACKTRACE
10954 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10955 *((char*)-1) = 'x';
10959 static void _redisPanic(char *msg
, char *file
, int line
) {
10960 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10961 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10962 #ifdef HAVE_BACKTRACE
10963 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10964 *((char*)-1) = 'x';
10968 /* =================================== Main! ================================ */
10971 int linuxOvercommitMemoryValue(void) {
10972 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10975 if (!fp
) return -1;
10976 if (fgets(buf
,64,fp
) == NULL
) {
10985 void linuxOvercommitMemoryWarning(void) {
10986 if (linuxOvercommitMemoryValue() == 0) {
10987 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10990 #endif /* __linux__ */
10992 static void daemonize(void) {
10996 if (fork() != 0) exit(0); /* parent exits */
10997 setsid(); /* create a new session */
10999 /* Every output goes to /dev/null. If Redis is daemonized but
11000 * the 'logfile' is set to 'stdout' in the configuration file
11001 * it will not log at all. */
11002 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
11003 dup2(fd
, STDIN_FILENO
);
11004 dup2(fd
, STDOUT_FILENO
);
11005 dup2(fd
, STDERR_FILENO
);
11006 if (fd
> STDERR_FILENO
) close(fd
);
11008 /* Try to write the pid file */
11009 fp
= fopen(server
.pidfile
,"w");
11011 fprintf(fp
,"%d\n",getpid());
11016 static void version() {
11017 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
11018 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
11022 static void usage() {
11023 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
11024 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
11028 int main(int argc
, char **argv
) {
11031 initServerConfig();
11032 sortCommandTable();
11034 if (strcmp(argv
[1], "-v") == 0 ||
11035 strcmp(argv
[1], "--version") == 0) version();
11036 if (strcmp(argv
[1], "--help") == 0) usage();
11037 resetServerSaveParams();
11038 loadServerConfig(argv
[1]);
11039 } else if ((argc
> 2)) {
11042 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
11044 if (server
.daemonize
) daemonize();
11046 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
11048 linuxOvercommitMemoryWarning();
11050 start
= time(NULL
);
11051 if (server
.appendonly
) {
11052 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
11053 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
11055 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11056 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11058 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11059 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11061 aeDeleteEventLoop(server
.el
);
11065 /* ============================= Backtrace support ========================= */
11067 #ifdef HAVE_BACKTRACE
11068 static char *findFuncName(void *pointer
, unsigned long *offset
);
11070 static void *getMcontextEip(ucontext_t
*uc
) {
11071 #if defined(__FreeBSD__)
11072 return (void*) uc
->uc_mcontext
.mc_eip
;
11073 #elif defined(__dietlibc__)
11074 return (void*) uc
->uc_mcontext
.eip
;
11075 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11077 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11079 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11081 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11082 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11083 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11085 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11087 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11088 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11089 #elif defined(__ia64__) /* Linux IA64 */
11090 return (void*) uc
->uc_mcontext
.sc_ip
;
11096 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11098 char **messages
= NULL
;
11099 int i
, trace_size
= 0;
11100 unsigned long offset
=0;
11101 ucontext_t
*uc
= (ucontext_t
*) secret
;
11103 REDIS_NOTUSED(info
);
11105 redisLog(REDIS_WARNING
,
11106 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11107 infostring
= genRedisInfoString();
11108 redisLog(REDIS_WARNING
, "%s",infostring
);
11109 /* It's not safe to sdsfree() the returned string under memory
11110 * corruption conditions. Let it leak as we are going to abort */
11112 trace_size
= backtrace(trace
, 100);
11113 /* overwrite sigaction with caller's address */
11114 if (getMcontextEip(uc
) != NULL
) {
11115 trace
[1] = getMcontextEip(uc
);
11117 messages
= backtrace_symbols(trace
, trace_size
);
11119 for (i
=1; i
<trace_size
; ++i
) {
11120 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11122 p
= strchr(messages
[i
],'+');
11123 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11124 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11126 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11129 /* free(messages); Don't call free() with possibly corrupted memory. */
11133 static void sigtermHandler(int sig
) {
11134 REDIS_NOTUSED(sig
);
11136 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11137 server
.shutdown_asap
= 1;
11140 static void setupSigSegvAction(void) {
11141 struct sigaction act
;
11143 sigemptyset (&act
.sa_mask
);
11144 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11145 * is used. Otherwise, sa_handler is used */
11146 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11147 act
.sa_sigaction
= segvHandler
;
11148 sigaction (SIGSEGV
, &act
, NULL
);
11149 sigaction (SIGBUS
, &act
, NULL
);
11150 sigaction (SIGFPE
, &act
, NULL
);
11151 sigaction (SIGILL
, &act
, NULL
);
11152 sigaction (SIGBUS
, &act
, NULL
);
11154 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11155 act
.sa_handler
= sigtermHandler
;
11156 sigaction (SIGTERM
, &act
, NULL
);
11160 #include "staticsymbols.h"
11161 /* This function try to convert a pointer into a function name. It's used in
11162 * oreder to provide a backtrace under segmentation fault that's able to
11163 * display functions declared as static (otherwise the backtrace is useless). */
11164 static char *findFuncName(void *pointer
, unsigned long *offset
){
11166 unsigned long off
, minoff
= 0;
11168 /* Try to match against the Symbol with the smallest offset */
11169 for (i
=0; symsTable
[i
].pointer
; i
++) {
11170 unsigned long lp
= (unsigned long) pointer
;
11172 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11173 off
=lp
-symsTable
[i
].pointer
;
11174 if (ret
< 0 || off
< minoff
) {
11180 if (ret
== -1) return NULL
;
11182 return symsTable
[ret
].name
;
11184 #else /* HAVE_BACKTRACE */
11185 static void setupSigSegvAction(void) {
11187 #endif /* HAVE_BACKTRACE */