2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "ziplist.h" /* Compact list data structure */
79 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
80 #include "release.h" /* Release and/or git repository information */
86 /* Static server configuration */
87 #define REDIS_SERVERPORT 6379 /* TCP port */
88 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
89 #define REDIS_IOBUF_LEN 1024
90 #define REDIS_LOADBUF_LEN 1024
91 #define REDIS_STATIC_ARGS 8
92 #define REDIS_DEFAULT_DBNUM 16
93 #define REDIS_CONFIGLINE_MAX 1024
94 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
95 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
96 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
97 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
98 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
100 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
101 #define REDIS_WRITEV_THRESHOLD 3
102 /* Max number of iovecs used for each writev call */
103 #define REDIS_WRITEV_IOVEC_COUNT 256
105 /* Hash table parameters */
106 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
109 #define REDIS_CMD_BULK 1 /* Bulk write command */
110 #define REDIS_CMD_INLINE 2 /* Inline command */
111 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
112 this flags will return an error when the 'maxmemory' option is set in the
113 config file and the server is using more than maxmemory bytes of memory.
114 In short this commands are denied on low memory conditions. */
115 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
119 #define REDIS_STRING 0
124 #define REDIS_VMPOINTER 8
126 /* Objects encoding. Some kind of objects like Strings and Hashes can be
127 * internally represented in multiple ways. The 'encoding' field of the object
128 * is set to one of this fields for this object. */
129 #define REDIS_ENCODING_RAW 0 /* Raw representation */
130 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
131 #define REDIS_ENCODING_HT 2 /* Encoded as hash table */
132 #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
133 #define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */
134 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
136 static char* strencoding
[] = {
137 "raw", "int", "hashtable", "zipmap", "list", "ziplist"
140 /* Object types only used for dumping to disk */
141 #define REDIS_EXPIRETIME 253
142 #define REDIS_SELECTDB 254
143 #define REDIS_EOF 255
145 /* Defines related to the dump file format. To store 32 bits lengths for short
146 * keys requires a lot of space, so we check the most significant 2 bits of
147 * the first byte to interpreter the length:
149 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
150 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
151 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
152 * 11|000000 this means: specially encoded object will follow. The six bits
153 * number specify the kind of object that follows.
154 * See the REDIS_RDB_ENC_* defines.
156 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
157 * values, will fit inside. */
158 #define REDIS_RDB_6BITLEN 0
159 #define REDIS_RDB_14BITLEN 1
160 #define REDIS_RDB_32BITLEN 2
161 #define REDIS_RDB_ENCVAL 3
162 #define REDIS_RDB_LENERR UINT_MAX
164 /* When a length of a string object stored on disk has the first two bits
165 * set, the remaining two bits specify a special encoding for the object
166 * accordingly to the following defines: */
167 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
168 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
169 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
170 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
172 /* Virtual memory object->where field. */
173 #define REDIS_VM_MEMORY 0 /* The object is on memory */
174 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
175 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
176 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
178 /* Virtual memory static configuration stuff.
179 * Check vmFindContiguousPages() to know more about this magic numbers. */
180 #define REDIS_VM_MAX_NEAR_PAGES 65536
181 #define REDIS_VM_MAX_RANDOM_JUMP 4096
182 #define REDIS_VM_MAX_THREADS 32
183 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
184 /* The following is the *percentage* of completed I/O jobs to process when the
185 * handelr is called. While Virtual Memory I/O operations are performed by
186 * threads, this operations must be processed by the main thread when completed
187 * in order to take effect. */
188 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
191 #define REDIS_SLAVE 1 /* This client is a slave server */
192 #define REDIS_MASTER 2 /* This client is a master server */
193 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
194 #define REDIS_MULTI 8 /* This client is in a MULTI context */
195 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
196 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
197 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
199 /* Slave replication state - slave side */
200 #define REDIS_REPL_NONE 0 /* No active replication */
201 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
202 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
204 /* Slave replication state - from the point of view of master
205 * Note that in SEND_BULK and ONLINE state the slave receives new updates
206 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
207 * to start the next background saving in order to send updates to it. */
208 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
209 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
210 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
211 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
213 /* List related stuff */
217 /* Sort operations */
218 #define REDIS_SORT_GET 0
219 #define REDIS_SORT_ASC 1
220 #define REDIS_SORT_DESC 2
221 #define REDIS_SORTKEY_MAX 1024
224 #define REDIS_DEBUG 0
225 #define REDIS_VERBOSE 1
226 #define REDIS_NOTICE 2
227 #define REDIS_WARNING 3
229 /* Anti-warning macro... */
230 #define REDIS_NOTUSED(V) ((void) V)
232 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
233 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
235 /* Append only defines */
236 #define APPENDFSYNC_NO 0
237 #define APPENDFSYNC_ALWAYS 1
238 #define APPENDFSYNC_EVERYSEC 2
240 /* Zip structure related defaults */
241 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
242 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
243 #define REDIS_LIST_MAX_ZIPLIST_ENTRIES 1024
244 #define REDIS_LIST_MAX_ZIPLIST_VALUE 32
246 /* We can print the stacktrace, so our assert is defined this way: */
247 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
248 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
249 static void _redisAssert(char *estr
, char *file
, int line
);
250 static void _redisPanic(char *msg
, char *file
, int line
);
252 /*================================= Data types ============================== */
254 /* A redis object, that is a type able to hold a string / list / set */
256 /* The actual Redis Object */
257 typedef struct redisObject
{
259 unsigned storage
:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
261 unsigned lru
:22; /* lru time (relative to server.lruclock) */
264 /* VM fields, this are only allocated if VM is active, otherwise the
265 * object allocation function will just allocate
266 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
267 * Redis without VM active will not have any overhead. */
270 /* The VM pointer structure - identifies an object in the swap file.
272 * This object is stored in place of the value
273 * object in the main key->value hash table representing a database.
274 * Note that the first fields (type, storage) are the same as the redisObject
275 * structure so that vmPointer strucuters can be accessed even when casted
276 * as redisObject structures.
278 * This is useful as we don't know if a value object is or not on disk, but we
279 * are always able to read obj->storage to check this. For vmPointer
280 * structures "type" is set to REDIS_VMPOINTER (even if without this field
281 * is still possible to check the kind of object from the value of 'storage').*/
282 typedef struct vmPointer
{
284 unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
286 unsigned int vtype
; /* type of the object stored in the swap file */
287 off_t page
; /* the page at witch the object is stored on disk */
288 off_t usedpages
; /* number of pages used on disk */
291 /* Macro used to initalize a Redis object allocated on the stack.
292 * Note that this macro is taken near the structure definition to make sure
293 * we'll update it when the structure is changed, to avoid bugs like
294 * bug #85 introduced exactly in this way. */
295 #define initStaticStringObject(_var,_ptr) do { \
297 _var.type = REDIS_STRING; \
298 _var.encoding = REDIS_ENCODING_RAW; \
300 _var.storage = REDIS_VM_MEMORY; \
303 typedef struct redisDb
{
304 dict
*dict
; /* The keyspace for this DB */
305 dict
*expires
; /* Timeout of keys with a timeout set */
306 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
307 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
308 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
312 /* Client MULTI/EXEC state */
313 typedef struct multiCmd
{
316 struct redisCommand
*cmd
;
319 typedef struct multiState
{
320 multiCmd
*commands
; /* Array of MULTI commands */
321 int count
; /* Total number of MULTI commands */
324 /* With multiplexing we need to take per-clinet state.
325 * Clients are taken in a liked list. */
326 typedef struct redisClient
{
331 robj
**argv
, **mbargv
;
333 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
334 int multibulk
; /* multi bulk command format active */
337 time_t lastinteraction
; /* time of the last interaction, used for timeout */
338 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
339 int slaveseldb
; /* slave selected db, if this client is a slave */
340 int authenticated
; /* when requirepass is non-NULL */
341 int replstate
; /* replication state if this is a slave */
342 int repldbfd
; /* replication DB file descriptor */
343 long repldboff
; /* replication DB file offset */
344 off_t repldbsize
; /* replication DB file size */
345 multiState mstate
; /* MULTI/EXEC state */
346 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
347 * operation such as BLPOP. Otherwise NULL. */
348 int blocking_keys_num
; /* Number of blocking keys */
349 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
350 * is >= blockingto then the operation timed out. */
351 list
*io_keys
; /* Keys this client is waiting to be loaded from the
352 * swap file in order to continue. */
353 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
354 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
355 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
363 /* Global server state structure */
368 long long dirty
; /* changes to DB from the last save */
370 list
*slaves
, *monitors
;
371 char neterr
[ANET_ERR_LEN
];
373 int cronloops
; /* number of times the cron function run */
374 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
375 time_t lastsave
; /* Unix time of last save succeeede */
376 /* Fields used only for stats */
377 time_t stat_starttime
; /* server start time */
378 long long stat_numcommands
; /* number of processed commands */
379 long long stat_numconnections
; /* number of connections received */
380 long long stat_expiredkeys
; /* number of expired keys */
389 int no_appendfsync_on_rewrite
;
395 pid_t bgsavechildpid
;
396 pid_t bgrewritechildpid
;
397 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
398 sds aofbuf
; /* AOF buffer, written before entering the event loop */
399 struct saveparam
*saveparams
;
404 char *appendfilename
;
408 /* Replication related */
413 redisClient
*master
; /* client that is master for this slave */
415 unsigned int maxclients
;
416 unsigned long long maxmemory
;
417 unsigned int blpop_blocked_clients
;
418 unsigned int vm_blocked_clients
;
419 /* Sort parameters - qsort_r() is only available under BSD so we
420 * have to take this state global, in order to pass it to sortCompare() */
424 /* Virtual memory configuration */
429 unsigned long long vm_max_memory
;
430 /* Zip structure config */
431 size_t hash_max_zipmap_entries
;
432 size_t hash_max_zipmap_value
;
433 size_t list_max_ziplist_entries
;
434 size_t list_max_ziplist_value
;
435 /* Virtual memory state */
438 off_t vm_next_page
; /* Next probably empty page */
439 off_t vm_near_pages
; /* Number of pages allocated sequentially */
440 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
441 time_t unixtime
; /* Unix time sampled every second. */
442 /* Virtual memory I/O threads stuff */
443 /* An I/O thread process an element taken from the io_jobs queue and
444 * put the result of the operation in the io_done list. While the
445 * job is being processed, it's put on io_processing queue. */
446 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
447 list
*io_processing
; /* List of VM I/O jobs being processed */
448 list
*io_processed
; /* List of VM I/O jobs already processed */
449 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
450 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
451 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
452 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
453 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
454 int io_active_threads
; /* Number of running I/O threads */
455 int vm_max_threads
; /* Max number of I/O threads running at the same time */
456 /* Our main thread is blocked on the event loop, locking for sockets ready
457 * to be read or written, so when a threaded I/O operation is ready to be
458 * processed by the main thread, the I/O thread will use a unix pipe to
459 * awake the main thread. The followings are the two pipe FDs. */
460 int io_ready_pipe_read
;
461 int io_ready_pipe_write
;
462 /* Virtual memory stats */
463 unsigned long long vm_stats_used_pages
;
464 unsigned long long vm_stats_swapped_objects
;
465 unsigned long long vm_stats_swapouts
;
466 unsigned long long vm_stats_swapins
;
468 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
469 list
*pubsub_patterns
; /* A list of pubsub_patterns */
472 unsigned lruclock
:22; /* clock incrementing every minute, for LRU */
473 unsigned lruclock_padding
:10;
476 typedef struct pubsubPattern
{
481 typedef void redisCommandProc(redisClient
*c
);
482 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
483 struct redisCommand
{
485 redisCommandProc
*proc
;
488 /* Use a function to determine which keys need to be loaded
489 * in the background prior to executing this command. Takes precedence
490 * over vm_firstkey and others, ignored when NULL */
491 redisVmPreloadProc
*vm_preload_proc
;
492 /* What keys should be loaded in background when calling this command? */
493 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
494 int vm_lastkey
; /* THe last argument that's a key */
495 int vm_keystep
; /* The step between first and last key */
498 struct redisFunctionSym
{
500 unsigned long pointer
;
503 typedef struct _redisSortObject
{
511 typedef struct _redisSortOperation
{
514 } redisSortOperation
;
516 /* ZSETs use a specialized version of Skiplists */
518 typedef struct zskiplistNode
{
519 struct zskiplistNode
**forward
;
520 struct zskiplistNode
*backward
;
526 typedef struct zskiplist
{
527 struct zskiplistNode
*header
, *tail
;
528 unsigned long length
;
532 typedef struct zset
{
537 /* Our shared "common" objects */
539 #define REDIS_SHARED_INTEGERS 10000
540 struct sharedObjectsStruct
{
541 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
542 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
543 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
544 *outofrangeerr
, *plus
,
545 *select0
, *select1
, *select2
, *select3
, *select4
,
546 *select5
, *select6
, *select7
, *select8
, *select9
,
547 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
548 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
549 *integers
[REDIS_SHARED_INTEGERS
];
552 /* Global vars that are actally used as constants. The following double
553 * values are used for double on-disk serialization, and are initialized
554 * at runtime to avoid strange compiler optimizations. */
556 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
558 /* VM threaded I/O request message */
559 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
560 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
561 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
562 typedef struct iojob
{
563 int type
; /* Request type, REDIS_IOJOB_* */
564 redisDb
*db
;/* Redis database */
565 robj
*key
; /* This I/O request is about swapping this key */
566 robj
*id
; /* Unique identifier of this job:
567 this is the object to swap for REDIS_IOREQ_*_SWAP, or the
568 vmpointer objct for REDIS_IOREQ_LOAD. */
569 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
570 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
571 off_t page
; /* Swap page where to read/write the object */
572 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
573 int canceled
; /* True if this command was canceled by blocking side of VM */
574 pthread_t thread
; /* ID of the thread processing this entry */
577 /*================================ Prototypes =============================== */
579 static void freeStringObject(robj
*o
);
580 static void freeListObject(robj
*o
);
581 static void freeSetObject(robj
*o
);
582 static void decrRefCount(void *o
);
583 static robj
*createObject(int type
, void *ptr
);
584 static void freeClient(redisClient
*c
);
585 static int rdbLoad(char *filename
);
586 static void addReply(redisClient
*c
, robj
*obj
);
587 static void addReplySds(redisClient
*c
, sds s
);
588 static void incrRefCount(robj
*o
);
589 static int rdbSaveBackground(char *filename
);
590 static robj
*createStringObject(char *ptr
, size_t len
);
591 static robj
*dupStringObject(robj
*o
);
592 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
593 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
594 static void flushAppendOnlyFile(void);
595 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
596 static int syncWithMaster(void);
597 static robj
*tryObjectEncoding(robj
*o
);
598 static robj
*getDecodedObject(robj
*o
);
599 static int removeExpire(redisDb
*db
, robj
*key
);
600 static int expireIfNeeded(redisDb
*db
, robj
*key
);
601 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
602 static int dbDelete(redisDb
*db
, robj
*key
);
603 static time_t getExpire(redisDb
*db
, robj
*key
);
604 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
605 static void updateSlavesWaitingBgsave(int bgsaveerr
);
606 static void freeMemoryIfNeeded(void);
607 static int processCommand(redisClient
*c
);
608 static void setupSigSegvAction(void);
609 static void rdbRemoveTempFile(pid_t childpid
);
610 static void aofRemoveTempFile(pid_t childpid
);
611 static size_t stringObjectLen(robj
*o
);
612 static void processInputBuffer(redisClient
*c
);
613 static zskiplist
*zslCreate(void);
614 static void zslFree(zskiplist
*zsl
);
615 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
616 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
617 static void initClientMultiState(redisClient
*c
);
618 static void freeClientMultiState(redisClient
*c
);
619 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
620 static void unblockClientWaitingData(redisClient
*c
);
621 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
622 static void vmInit(void);
623 static void vmMarkPagesFree(off_t page
, off_t count
);
624 static robj
*vmLoadObject(robj
*o
);
625 static robj
*vmPreviewObject(robj
*o
);
626 static int vmSwapOneObjectBlocking(void);
627 static int vmSwapOneObjectThreaded(void);
628 static int vmCanSwapOut(void);
629 static int tryFreeOneObjectFromFreelist(void);
630 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
631 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
632 static void vmCancelThreadedIOJob(robj
*o
);
633 static void lockThreadedIO(void);
634 static void unlockThreadedIO(void);
635 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
636 static void freeIOJob(iojob
*j
);
637 static void queueIOJob(iojob
*j
);
638 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
639 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
640 static void waitEmptyIOJobsQueue(void);
641 static void vmReopenSwapFile(void);
642 static int vmFreePage(off_t page
);
643 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
644 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
645 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
646 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
647 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
648 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
649 static struct redisCommand
*lookupCommand(char *name
);
650 static void call(redisClient
*c
, struct redisCommand
*cmd
);
651 static void resetClient(redisClient
*c
);
652 static void convertToRealHash(robj
*o
);
653 static void listTypeConvert(robj
*o
, int enc
);
654 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
655 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
656 static void freePubsubPattern(void *p
);
657 static int listMatchPubsubPattern(void *a
, void *b
);
658 static int compareStringObjects(robj
*a
, robj
*b
);
659 static int equalStringObjects(robj
*a
, robj
*b
);
661 static int rewriteAppendOnlyFileBackground(void);
662 static vmpointer
*vmSwapObjectBlocking(robj
*val
);
663 static int prepareForShutdown();
664 static void touchWatchedKey(redisDb
*db
, robj
*key
);
665 static void touchWatchedKeysOnFlush(int dbid
);
666 static void unwatchAllKeys(redisClient
*c
);
668 static void authCommand(redisClient
*c
);
669 static void pingCommand(redisClient
*c
);
670 static void echoCommand(redisClient
*c
);
671 static void setCommand(redisClient
*c
);
672 static void setnxCommand(redisClient
*c
);
673 static void setexCommand(redisClient
*c
);
674 static void getCommand(redisClient
*c
);
675 static void delCommand(redisClient
*c
);
676 static void existsCommand(redisClient
*c
);
677 static void incrCommand(redisClient
*c
);
678 static void decrCommand(redisClient
*c
);
679 static void incrbyCommand(redisClient
*c
);
680 static void decrbyCommand(redisClient
*c
);
681 static void selectCommand(redisClient
*c
);
682 static void randomkeyCommand(redisClient
*c
);
683 static void keysCommand(redisClient
*c
);
684 static void dbsizeCommand(redisClient
*c
);
685 static void lastsaveCommand(redisClient
*c
);
686 static void saveCommand(redisClient
*c
);
687 static void bgsaveCommand(redisClient
*c
);
688 static void bgrewriteaofCommand(redisClient
*c
);
689 static void shutdownCommand(redisClient
*c
);
690 static void moveCommand(redisClient
*c
);
691 static void renameCommand(redisClient
*c
);
692 static void renamenxCommand(redisClient
*c
);
693 static void lpushCommand(redisClient
*c
);
694 static void rpushCommand(redisClient
*c
);
695 static void lpopCommand(redisClient
*c
);
696 static void rpopCommand(redisClient
*c
);
697 static void llenCommand(redisClient
*c
);
698 static void lindexCommand(redisClient
*c
);
699 static void lrangeCommand(redisClient
*c
);
700 static void ltrimCommand(redisClient
*c
);
701 static void typeCommand(redisClient
*c
);
702 static void lsetCommand(redisClient
*c
);
703 static void saddCommand(redisClient
*c
);
704 static void sremCommand(redisClient
*c
);
705 static void smoveCommand(redisClient
*c
);
706 static void sismemberCommand(redisClient
*c
);
707 static void scardCommand(redisClient
*c
);
708 static void spopCommand(redisClient
*c
);
709 static void srandmemberCommand(redisClient
*c
);
710 static void sinterCommand(redisClient
*c
);
711 static void sinterstoreCommand(redisClient
*c
);
712 static void sunionCommand(redisClient
*c
);
713 static void sunionstoreCommand(redisClient
*c
);
714 static void sdiffCommand(redisClient
*c
);
715 static void sdiffstoreCommand(redisClient
*c
);
716 static void syncCommand(redisClient
*c
);
717 static void flushdbCommand(redisClient
*c
);
718 static void flushallCommand(redisClient
*c
);
719 static void sortCommand(redisClient
*c
);
720 static void lremCommand(redisClient
*c
);
721 static void rpoplpushcommand(redisClient
*c
);
722 static void infoCommand(redisClient
*c
);
723 static void mgetCommand(redisClient
*c
);
724 static void monitorCommand(redisClient
*c
);
725 static void expireCommand(redisClient
*c
);
726 static void expireatCommand(redisClient
*c
);
727 static void getsetCommand(redisClient
*c
);
728 static void ttlCommand(redisClient
*c
);
729 static void slaveofCommand(redisClient
*c
);
730 static void debugCommand(redisClient
*c
);
731 static void msetCommand(redisClient
*c
);
732 static void msetnxCommand(redisClient
*c
);
733 static void zaddCommand(redisClient
*c
);
734 static void zincrbyCommand(redisClient
*c
);
735 static void zrangeCommand(redisClient
*c
);
736 static void zrangebyscoreCommand(redisClient
*c
);
737 static void zcountCommand(redisClient
*c
);
738 static void zrevrangeCommand(redisClient
*c
);
739 static void zcardCommand(redisClient
*c
);
740 static void zremCommand(redisClient
*c
);
741 static void zscoreCommand(redisClient
*c
);
742 static void zremrangebyscoreCommand(redisClient
*c
);
743 static void multiCommand(redisClient
*c
);
744 static void execCommand(redisClient
*c
);
745 static void discardCommand(redisClient
*c
);
746 static void blpopCommand(redisClient
*c
);
747 static void brpopCommand(redisClient
*c
);
748 static void appendCommand(redisClient
*c
);
749 static void substrCommand(redisClient
*c
);
750 static void zrankCommand(redisClient
*c
);
751 static void zrevrankCommand(redisClient
*c
);
752 static void hsetCommand(redisClient
*c
);
753 static void hsetnxCommand(redisClient
*c
);
754 static void hgetCommand(redisClient
*c
);
755 static void hmsetCommand(redisClient
*c
);
756 static void hmgetCommand(redisClient
*c
);
757 static void hdelCommand(redisClient
*c
);
758 static void hlenCommand(redisClient
*c
);
759 static void zremrangebyrankCommand(redisClient
*c
);
760 static void zunionstoreCommand(redisClient
*c
);
761 static void zinterstoreCommand(redisClient
*c
);
762 static void hkeysCommand(redisClient
*c
);
763 static void hvalsCommand(redisClient
*c
);
764 static void hgetallCommand(redisClient
*c
);
765 static void hexistsCommand(redisClient
*c
);
766 static void configCommand(redisClient
*c
);
767 static void hincrbyCommand(redisClient
*c
);
768 static void subscribeCommand(redisClient
*c
);
769 static void unsubscribeCommand(redisClient
*c
);
770 static void psubscribeCommand(redisClient
*c
);
771 static void punsubscribeCommand(redisClient
*c
);
772 static void publishCommand(redisClient
*c
);
773 static void watchCommand(redisClient
*c
);
774 static void unwatchCommand(redisClient
*c
);
776 /*================================= Globals ================================= */
779 static struct redisServer server
; /* server global state */
780 static struct redisCommand
*commandTable
;
781 static struct redisCommand readonlyCommandTable
[] = {
782 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
783 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
784 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
785 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
786 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
788 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
789 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
790 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
791 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
793 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
795 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
797 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
802 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
805 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
806 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
807 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
808 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
809 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
810 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
812 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
813 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
814 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
815 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
816 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
817 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
818 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
819 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
821 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
822 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
823 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
824 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
825 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
826 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
827 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
828 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
829 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
830 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
831 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
833 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
834 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
835 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
836 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
837 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
838 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
839 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
840 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
841 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
842 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
843 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
844 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
845 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
846 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
847 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
848 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
849 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
850 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
851 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
852 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
855 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
856 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
857 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
859 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
864 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
865 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
866 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
867 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
868 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
869 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
870 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
871 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
872 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
873 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
874 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
875 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
876 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
877 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
878 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
879 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
880 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
881 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
882 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
883 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
884 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
885 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
886 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
887 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
888 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
889 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
892 /*============================ Utility functions ============================ */
894 /* Glob-style pattern matching. */
895 static int stringmatchlen(const char *pattern
, int patternLen
,
896 const char *string
, int stringLen
, int nocase
)
901 while (pattern
[1] == '*') {
906 return 1; /* match */
908 if (stringmatchlen(pattern
+1, patternLen
-1,
909 string
, stringLen
, nocase
))
910 return 1; /* match */
914 return 0; /* no match */
918 return 0; /* no match */
928 not = pattern
[0] == '^';
935 if (pattern
[0] == '\\') {
938 if (pattern
[0] == string
[0])
940 } else if (pattern
[0] == ']') {
942 } else if (patternLen
== 0) {
946 } else if (pattern
[1] == '-' && patternLen
>= 3) {
947 int start
= pattern
[0];
948 int end
= pattern
[2];
956 start
= tolower(start
);
962 if (c
>= start
&& c
<= end
)
966 if (pattern
[0] == string
[0])
969 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
979 return 0; /* no match */
985 if (patternLen
>= 2) {
992 if (pattern
[0] != string
[0])
993 return 0; /* no match */
995 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
996 return 0; /* no match */
1004 if (stringLen
== 0) {
1005 while(*pattern
== '*') {
1012 if (patternLen
== 0 && stringLen
== 0)
1017 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
1018 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
1021 /* Convert a string representing an amount of memory into the number of
1022 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1025 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1027 static long long memtoll(const char *p
, int *err
) {
1030 long mul
; /* unit multiplier */
1032 unsigned int digits
;
1035 /* Search the first non digit character. */
1038 while(*u
&& isdigit(*u
)) u
++;
1039 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1041 } else if (!strcasecmp(u
,"k")) {
1043 } else if (!strcasecmp(u
,"kb")) {
1045 } else if (!strcasecmp(u
,"m")) {
1047 } else if (!strcasecmp(u
,"mb")) {
1049 } else if (!strcasecmp(u
,"g")) {
1050 mul
= 1000L*1000*1000;
1051 } else if (!strcasecmp(u
,"gb")) {
1052 mul
= 1024L*1024*1024;
1058 if (digits
>= sizeof(buf
)) {
1062 memcpy(buf
,p
,digits
);
1064 val
= strtoll(buf
,NULL
,10);
1068 /* Convert a long long into a string. Returns the number of
1069 * characters needed to represent the number, that can be shorter if passed
1070 * buffer length is not enough to store the whole number. */
1071 static int ll2string(char *s
, size_t len
, long long value
) {
1073 unsigned long long v
;
1076 if (len
== 0) return 0;
1077 v
= (value
< 0) ? -value
: value
;
1078 p
= buf
+31; /* point to the last character */
1083 if (value
< 0) *p
-- = '-';
1086 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1092 static void redisLog(int level
, const char *fmt
, ...) {
1096 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1100 if (level
>= server
.verbosity
) {
1106 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1107 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1108 vfprintf(fp
, fmt
, ap
);
1114 if (server
.logfile
) fclose(fp
);
1117 /*====================== Hash table type implementation ==================== */
1119 /* This is an hash table type that uses the SDS dynamic strings libary as
1120 * keys and radis objects as values (objects can hold SDS strings,
1123 static void dictVanillaFree(void *privdata
, void *val
)
1125 DICT_NOTUSED(privdata
);
1129 static void dictListDestructor(void *privdata
, void *val
)
1131 DICT_NOTUSED(privdata
);
1132 listRelease((list
*)val
);
1135 static int dictSdsKeyCompare(void *privdata
, const void *key1
,
1139 DICT_NOTUSED(privdata
);
1141 l1
= sdslen((sds
)key1
);
1142 l2
= sdslen((sds
)key2
);
1143 if (l1
!= l2
) return 0;
1144 return memcmp(key1
, key2
, l1
) == 0;
1147 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1149 DICT_NOTUSED(privdata
);
1151 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1155 static void dictSdsDestructor(void *privdata
, void *val
)
1157 DICT_NOTUSED(privdata
);
1162 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1165 const robj
*o1
= key1
, *o2
= key2
;
1166 return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1169 static unsigned int dictObjHash(const void *key
) {
1170 const robj
*o
= key
;
1171 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1174 static unsigned int dictSdsHash(const void *key
) {
1175 return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
));
1178 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1181 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1184 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1185 o2
->encoding
== REDIS_ENCODING_INT
)
1186 return o1
->ptr
== o2
->ptr
;
1188 o1
= getDecodedObject(o1
);
1189 o2
= getDecodedObject(o2
);
1190 cmp
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1196 static unsigned int dictEncObjHash(const void *key
) {
1197 robj
*o
= (robj
*) key
;
1199 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1200 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1202 if (o
->encoding
== REDIS_ENCODING_INT
) {
1206 len
= ll2string(buf
,32,(long)o
->ptr
);
1207 return dictGenHashFunction((unsigned char*)buf
, len
);
1211 o
= getDecodedObject(o
);
1212 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1220 static dictType setDictType
= {
1221 dictEncObjHash
, /* hash function */
1224 dictEncObjKeyCompare
, /* key compare */
1225 dictRedisObjectDestructor
, /* key destructor */
1226 NULL
/* val destructor */
1229 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1230 static dictType zsetDictType
= {
1231 dictEncObjHash
, /* hash function */
1234 dictEncObjKeyCompare
, /* key compare */
1235 dictRedisObjectDestructor
, /* key destructor */
1236 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1239 /* Db->dict, keys are sds strings, vals are Redis objects. */
1240 static dictType dbDictType
= {
1241 dictSdsHash
, /* hash function */
1244 dictSdsKeyCompare
, /* key compare */
1245 dictSdsDestructor
, /* key destructor */
1246 dictRedisObjectDestructor
/* val destructor */
1250 static dictType keyptrDictType
= {
1251 dictSdsHash
, /* hash function */
1254 dictSdsKeyCompare
, /* key compare */
1255 dictSdsDestructor
, /* key destructor */
1256 NULL
/* val destructor */
1259 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1260 static dictType hashDictType
= {
1261 dictEncObjHash
, /* hash function */
1264 dictEncObjKeyCompare
, /* key compare */
1265 dictRedisObjectDestructor
, /* key destructor */
1266 dictRedisObjectDestructor
/* val destructor */
1269 /* Keylist hash table type has unencoded redis objects as keys and
1270 * lists as values. It's used for blocking operations (BLPOP) and to
1271 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1272 static dictType keylistDictType
= {
1273 dictObjHash
, /* hash function */
1276 dictObjKeyCompare
, /* key compare */
1277 dictRedisObjectDestructor
, /* key destructor */
1278 dictListDestructor
/* val destructor */
1281 static void version();
1283 /* ========================= Random utility functions ======================= */
1285 /* Redis generally does not try to recover from out of memory conditions
1286 * when allocating objects or strings, it is not clear if it will be possible
1287 * to report this condition to the client since the networking layer itself
1288 * is based on heap allocation for send buffers, so we simply abort.
1289 * At least the code will be simpler to read... */
1290 static void oom(const char *msg
) {
1291 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1296 /* ====================== Redis server networking stuff ===================== */
1297 static void closeTimedoutClients(void) {
1300 time_t now
= time(NULL
);
1303 listRewind(server
.clients
,&li
);
1304 while ((ln
= listNext(&li
)) != NULL
) {
1305 c
= listNodeValue(ln
);
1306 if (server
.maxidletime
&&
1307 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1308 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1309 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1310 listLength(c
->pubsub_patterns
) == 0 &&
1311 (now
- c
->lastinteraction
> server
.maxidletime
))
1313 redisLog(REDIS_VERBOSE
,"Closing idle client");
1315 } else if (c
->flags
& REDIS_BLOCKED
) {
1316 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1317 addReply(c
,shared
.nullmultibulk
);
1318 unblockClientWaitingData(c
);
1324 static int htNeedsResize(dict
*dict
) {
1325 long long size
, used
;
1327 size
= dictSlots(dict
);
1328 used
= dictSize(dict
);
1329 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1330 (used
*100/size
< REDIS_HT_MINFILL
));
1333 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1334 * we resize the hash table to save memory */
1335 static void tryResizeHashTables(void) {
1338 for (j
= 0; j
< server
.dbnum
; j
++) {
1339 if (htNeedsResize(server
.db
[j
].dict
))
1340 dictResize(server
.db
[j
].dict
);
1341 if (htNeedsResize(server
.db
[j
].expires
))
1342 dictResize(server
.db
[j
].expires
);
1346 /* Our hash table implementation performs rehashing incrementally while
1347 * we write/read from the hash table. Still if the server is idle, the hash
1348 * table will use two tables for a long time. So we try to use 1 millisecond
1349 * of CPU time at every serverCron() loop in order to rehash some key. */
1350 static void incrementallyRehash(void) {
1353 for (j
= 0; j
< server
.dbnum
; j
++) {
1354 if (dictIsRehashing(server
.db
[j
].dict
)) {
1355 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1356 break; /* already used our millisecond for this loop... */
1361 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1362 void backgroundSaveDoneHandler(int statloc
) {
1363 int exitcode
= WEXITSTATUS(statloc
);
1364 int bysignal
= WIFSIGNALED(statloc
);
1366 if (!bysignal
&& exitcode
== 0) {
1367 redisLog(REDIS_NOTICE
,
1368 "Background saving terminated with success");
1370 server
.lastsave
= time(NULL
);
1371 } else if (!bysignal
&& exitcode
!= 0) {
1372 redisLog(REDIS_WARNING
, "Background saving error");
1374 redisLog(REDIS_WARNING
,
1375 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1376 rdbRemoveTempFile(server
.bgsavechildpid
);
1378 server
.bgsavechildpid
= -1;
1379 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1380 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1381 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1384 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1386 void backgroundRewriteDoneHandler(int statloc
) {
1387 int exitcode
= WEXITSTATUS(statloc
);
1388 int bysignal
= WIFSIGNALED(statloc
);
1390 if (!bysignal
&& exitcode
== 0) {
1394 redisLog(REDIS_NOTICE
,
1395 "Background append only file rewriting terminated with success");
1396 /* Now it's time to flush the differences accumulated by the parent */
1397 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1398 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1400 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1403 /* Flush our data... */
1404 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1405 (signed) sdslen(server
.bgrewritebuf
)) {
1406 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1410 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1411 /* Now our work is to rename the temp file into the stable file. And
1412 * switch the file descriptor used by the server for append only. */
1413 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1414 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1418 /* Mission completed... almost */
1419 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1420 if (server
.appendfd
!= -1) {
1421 /* If append only is actually enabled... */
1422 close(server
.appendfd
);
1423 server
.appendfd
= fd
;
1424 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1425 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1426 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1428 /* If append only is disabled we just generate a dump in this
1429 * format. Why not? */
1432 } else if (!bysignal
&& exitcode
!= 0) {
1433 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1435 redisLog(REDIS_WARNING
,
1436 "Background append only file rewriting terminated by signal %d",
1440 sdsfree(server
.bgrewritebuf
);
1441 server
.bgrewritebuf
= sdsempty();
1442 aofRemoveTempFile(server
.bgrewritechildpid
);
1443 server
.bgrewritechildpid
= -1;
1446 /* This function is called once a background process of some kind terminates,
1447 * as we want to avoid resizing the hash tables when there is a child in order
1448 * to play well with copy-on-write (otherwise when a resize happens lots of
1449 * memory pages are copied). The goal of this function is to update the ability
1450 * for dict.c to resize the hash tables accordingly to the fact we have o not
1451 * running childs. */
1452 static void updateDictResizePolicy(void) {
1453 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1456 dictDisableResize();
1459 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1460 int j
, loops
= server
.cronloops
++;
1461 REDIS_NOTUSED(eventLoop
);
1463 REDIS_NOTUSED(clientData
);
1465 /* We take a cached value of the unix time in the global state because
1466 * with virtual memory and aging there is to store the current time
1467 * in objects at every object access, and accuracy is not needed.
1468 * To access a global var is faster than calling time(NULL) */
1469 server
.unixtime
= time(NULL
);
1470 /* We have just 21 bits per object for LRU information.
1471 * So we use an (eventually wrapping) LRU clock with minutes resolution.
1473 * When we need to select what object to swap, we compute the minimum
1474 * time distance between the current lruclock and the object last access
1475 * lruclock info. Even if clocks will wrap on overflow, there is
1476 * the interesting property that we are sure that at least
1477 * ABS(A-B) minutes passed between current time and timestamp B.
1479 * This is not precise but we don't need at all precision, but just
1480 * something statistically reasonable.
1482 server
.lruclock
= (time(NULL
)/60)&((1<<21)-1);
1484 /* We received a SIGTERM, shutting down here in a safe way, as it is
1485 * not ok doing so inside the signal handler. */
1486 if (server
.shutdown_asap
) {
1487 if (prepareForShutdown() == REDIS_OK
) exit(0);
1488 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1491 /* Show some info about non-empty databases */
1492 for (j
= 0; j
< server
.dbnum
; j
++) {
1493 long long size
, used
, vkeys
;
1495 size
= dictSlots(server
.db
[j
].dict
);
1496 used
= dictSize(server
.db
[j
].dict
);
1497 vkeys
= dictSize(server
.db
[j
].expires
);
1498 if (!(loops
% 50) && (used
|| vkeys
)) {
1499 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1500 /* dictPrintStats(server.dict); */
1504 /* We don't want to resize the hash tables while a bacground saving
1505 * is in progress: the saving child is created using fork() that is
1506 * implemented with a copy-on-write semantic in most modern systems, so
1507 * if we resize the HT while there is the saving child at work actually
1508 * a lot of memory movements in the parent will cause a lot of pages
1510 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1511 if (!(loops
% 10)) tryResizeHashTables();
1512 if (server
.activerehashing
) incrementallyRehash();
1515 /* Show information about connected clients */
1516 if (!(loops
% 50)) {
1517 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1518 listLength(server
.clients
)-listLength(server
.slaves
),
1519 listLength(server
.slaves
),
1520 zmalloc_used_memory());
1523 /* Close connections of timedout clients */
1524 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1525 closeTimedoutClients();
1527 /* Check if a background saving or AOF rewrite in progress terminated */
1528 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1532 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1533 if (pid
== server
.bgsavechildpid
) {
1534 backgroundSaveDoneHandler(statloc
);
1536 backgroundRewriteDoneHandler(statloc
);
1538 updateDictResizePolicy();
1541 /* If there is not a background saving in progress check if
1542 * we have to save now */
1543 time_t now
= time(NULL
);
1544 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1545 struct saveparam
*sp
= server
.saveparams
+j
;
1547 if (server
.dirty
>= sp
->changes
&&
1548 now
-server
.lastsave
> sp
->seconds
) {
1549 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1550 sp
->changes
, sp
->seconds
);
1551 rdbSaveBackground(server
.dbfilename
);
1557 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1558 * will use few CPU cycles if there are few expiring keys, otherwise
1559 * it will get more aggressive to avoid that too much memory is used by
1560 * keys that can be removed from the keyspace. */
1561 for (j
= 0; j
< server
.dbnum
; j
++) {
1563 redisDb
*db
= server
.db
+j
;
1565 /* Continue to expire if at the end of the cycle more than 25%
1566 * of the keys were expired. */
1568 long num
= dictSize(db
->expires
);
1569 time_t now
= time(NULL
);
1572 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1573 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1578 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1579 t
= (time_t) dictGetEntryVal(de
);
1581 sds key
= dictGetEntryKey(de
);
1582 robj
*keyobj
= createStringObject(key
,sdslen(key
));
1584 dbDelete(db
,keyobj
);
1585 decrRefCount(keyobj
);
1587 server
.stat_expiredkeys
++;
1590 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1593 /* Swap a few keys on disk if we are over the memory limit and VM
1594 * is enbled. Try to free objects from the free list first. */
1595 if (vmCanSwapOut()) {
1596 while (server
.vm_enabled
&& zmalloc_used_memory() >
1597 server
.vm_max_memory
)
1601 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1602 retval
= (server
.vm_max_threads
== 0) ?
1603 vmSwapOneObjectBlocking() :
1604 vmSwapOneObjectThreaded();
1605 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1606 zmalloc_used_memory() >
1607 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1609 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1611 /* Note that when using threade I/O we free just one object,
1612 * because anyway when the I/O thread in charge to swap this
1613 * object out will finish, the handler of completed jobs
1614 * will try to swap more objects if we are still out of memory. */
1615 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1619 /* Check if we should connect to a MASTER */
1620 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1621 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1622 if (syncWithMaster() == REDIS_OK
) {
1623 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1624 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1630 /* This function gets called every time Redis is entering the
1631 * main loop of the event driven library, that is, before to sleep
1632 * for ready file descriptors. */
1633 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1634 REDIS_NOTUSED(eventLoop
);
1636 /* Awake clients that got all the swapped keys they requested */
1637 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1641 listRewind(server
.io_ready_clients
,&li
);
1642 while((ln
= listNext(&li
))) {
1643 redisClient
*c
= ln
->value
;
1644 struct redisCommand
*cmd
;
1646 /* Resume the client. */
1647 listDelNode(server
.io_ready_clients
,ln
);
1648 c
->flags
&= (~REDIS_IO_WAIT
);
1649 server
.vm_blocked_clients
--;
1650 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1651 readQueryFromClient
, c
);
1652 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1653 assert(cmd
!= NULL
);
1656 /* There may be more data to process in the input buffer. */
1657 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1658 processInputBuffer(c
);
1661 /* Write the AOF buffer on disk */
1662 flushAppendOnlyFile();
1665 static void createSharedObjects(void) {
1668 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1669 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1670 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1671 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1672 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1673 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1674 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1675 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1676 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1677 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1678 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1679 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1680 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1681 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1682 "-ERR no such key\r\n"));
1683 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1684 "-ERR syntax error\r\n"));
1685 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1686 "-ERR source and destination objects are the same\r\n"));
1687 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1688 "-ERR index out of range\r\n"));
1689 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1690 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1691 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1692 shared
.select0
= createStringObject("select 0\r\n",10);
1693 shared
.select1
= createStringObject("select 1\r\n",10);
1694 shared
.select2
= createStringObject("select 2\r\n",10);
1695 shared
.select3
= createStringObject("select 3\r\n",10);
1696 shared
.select4
= createStringObject("select 4\r\n",10);
1697 shared
.select5
= createStringObject("select 5\r\n",10);
1698 shared
.select6
= createStringObject("select 6\r\n",10);
1699 shared
.select7
= createStringObject("select 7\r\n",10);
1700 shared
.select8
= createStringObject("select 8\r\n",10);
1701 shared
.select9
= createStringObject("select 9\r\n",10);
1702 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1703 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1704 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1705 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1706 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1707 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1708 shared
.mbulk3
= createStringObject("*3\r\n",4);
1709 shared
.mbulk4
= createStringObject("*4\r\n",4);
1710 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1711 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1712 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1716 static void appendServerSaveParams(time_t seconds
, int changes
) {
1717 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1718 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1719 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1720 server
.saveparamslen
++;
1723 static void resetServerSaveParams() {
1724 zfree(server
.saveparams
);
1725 server
.saveparams
= NULL
;
1726 server
.saveparamslen
= 0;
1729 static void initServerConfig() {
1730 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1731 server
.port
= REDIS_SERVERPORT
;
1732 server
.verbosity
= REDIS_VERBOSE
;
1733 server
.maxidletime
= REDIS_MAXIDLETIME
;
1734 server
.saveparams
= NULL
;
1735 server
.logfile
= NULL
; /* NULL = log on standard output */
1736 server
.bindaddr
= NULL
;
1737 server
.glueoutputbuf
= 1;
1738 server
.daemonize
= 0;
1739 server
.appendonly
= 0;
1740 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1741 server
.no_appendfsync_on_rewrite
= 0;
1742 server
.lastfsync
= time(NULL
);
1743 server
.appendfd
= -1;
1744 server
.appendseldb
= -1; /* Make sure the first time will not match */
1745 server
.pidfile
= zstrdup("/var/run/redis.pid");
1746 server
.dbfilename
= zstrdup("dump.rdb");
1747 server
.appendfilename
= zstrdup("appendonly.aof");
1748 server
.requirepass
= NULL
;
1749 server
.rdbcompression
= 1;
1750 server
.activerehashing
= 1;
1751 server
.maxclients
= 0;
1752 server
.blpop_blocked_clients
= 0;
1753 server
.maxmemory
= 0;
1754 server
.vm_enabled
= 0;
1755 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1756 server
.vm_page_size
= 256; /* 256 bytes per page */
1757 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1758 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1759 server
.vm_max_threads
= 4;
1760 server
.vm_blocked_clients
= 0;
1761 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1762 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1763 server
.list_max_ziplist_entries
= REDIS_LIST_MAX_ZIPLIST_ENTRIES
;
1764 server
.list_max_ziplist_value
= REDIS_LIST_MAX_ZIPLIST_VALUE
;
1765 server
.shutdown_asap
= 0;
1767 resetServerSaveParams();
1769 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1770 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1771 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1772 /* Replication related */
1774 server
.masterauth
= NULL
;
1775 server
.masterhost
= NULL
;
1776 server
.masterport
= 6379;
1777 server
.master
= NULL
;
1778 server
.replstate
= REDIS_REPL_NONE
;
1780 /* Double constants initialization */
1782 R_PosInf
= 1.0/R_Zero
;
1783 R_NegInf
= -1.0/R_Zero
;
1784 R_Nan
= R_Zero
/R_Zero
;
1787 static void initServer() {
1790 signal(SIGHUP
, SIG_IGN
);
1791 signal(SIGPIPE
, SIG_IGN
);
1792 setupSigSegvAction();
1794 server
.devnull
= fopen("/dev/null","w");
1795 if (server
.devnull
== NULL
) {
1796 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1799 server
.clients
= listCreate();
1800 server
.slaves
= listCreate();
1801 server
.monitors
= listCreate();
1802 server
.objfreelist
= listCreate();
1803 createSharedObjects();
1804 server
.el
= aeCreateEventLoop();
1805 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1806 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1807 if (server
.fd
== -1) {
1808 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1811 for (j
= 0; j
< server
.dbnum
; j
++) {
1812 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1813 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1814 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1815 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1816 if (server
.vm_enabled
)
1817 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1818 server
.db
[j
].id
= j
;
1820 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1821 server
.pubsub_patterns
= listCreate();
1822 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1823 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1824 server
.cronloops
= 0;
1825 server
.bgsavechildpid
= -1;
1826 server
.bgrewritechildpid
= -1;
1827 server
.bgrewritebuf
= sdsempty();
1828 server
.aofbuf
= sdsempty();
1829 server
.lastsave
= time(NULL
);
1831 server
.stat_numcommands
= 0;
1832 server
.stat_numconnections
= 0;
1833 server
.stat_expiredkeys
= 0;
1834 server
.stat_starttime
= time(NULL
);
1835 server
.unixtime
= time(NULL
);
1836 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1837 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1838 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1840 if (server
.appendonly
) {
1841 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1842 if (server
.appendfd
== -1) {
1843 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1849 if (server
.vm_enabled
) vmInit();
1852 /* Empty the whole database */
1853 static long long emptyDb() {
1855 long long removed
= 0;
1857 for (j
= 0; j
< server
.dbnum
; j
++) {
1858 removed
+= dictSize(server
.db
[j
].dict
);
1859 dictEmpty(server
.db
[j
].dict
);
1860 dictEmpty(server
.db
[j
].expires
);
1865 static int yesnotoi(char *s
) {
1866 if (!strcasecmp(s
,"yes")) return 1;
1867 else if (!strcasecmp(s
,"no")) return 0;
1871 /* I agree, this is a very rudimental way to load a configuration...
1872 will improve later if the config gets more complex */
1873 static void loadServerConfig(char *filename
) {
1875 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1879 if (filename
[0] == '-' && filename
[1] == '\0')
1882 if ((fp
= fopen(filename
,"r")) == NULL
) {
1883 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1888 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1894 line
= sdstrim(line
," \t\r\n");
1896 /* Skip comments and blank lines*/
1897 if (line
[0] == '#' || line
[0] == '\0') {
1902 /* Split into arguments */
1903 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1904 sdstolower(argv
[0]);
1906 /* Execute config directives */
1907 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1908 server
.maxidletime
= atoi(argv
[1]);
1909 if (server
.maxidletime
< 0) {
1910 err
= "Invalid timeout value"; goto loaderr
;
1912 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1913 server
.port
= atoi(argv
[1]);
1914 if (server
.port
< 1 || server
.port
> 65535) {
1915 err
= "Invalid port"; goto loaderr
;
1917 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1918 server
.bindaddr
= zstrdup(argv
[1]);
1919 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1920 int seconds
= atoi(argv
[1]);
1921 int changes
= atoi(argv
[2]);
1922 if (seconds
< 1 || changes
< 0) {
1923 err
= "Invalid save parameters"; goto loaderr
;
1925 appendServerSaveParams(seconds
,changes
);
1926 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1927 if (chdir(argv
[1]) == -1) {
1928 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1929 argv
[1], strerror(errno
));
1932 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1933 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1934 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1935 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1936 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1938 err
= "Invalid log level. Must be one of debug, notice, warning";
1941 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1944 server
.logfile
= zstrdup(argv
[1]);
1945 if (!strcasecmp(server
.logfile
,"stdout")) {
1946 zfree(server
.logfile
);
1947 server
.logfile
= NULL
;
1949 if (server
.logfile
) {
1950 /* Test if we are able to open the file. The server will not
1951 * be able to abort just for this problem later... */
1952 logfp
= fopen(server
.logfile
,"a");
1953 if (logfp
== NULL
) {
1954 err
= sdscatprintf(sdsempty(),
1955 "Can't open the log file: %s", strerror(errno
));
1960 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1961 server
.dbnum
= atoi(argv
[1]);
1962 if (server
.dbnum
< 1) {
1963 err
= "Invalid number of databases"; goto loaderr
;
1965 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1966 loadServerConfig(argv
[1]);
1967 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1968 server
.maxclients
= atoi(argv
[1]);
1969 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1970 server
.maxmemory
= memtoll(argv
[1],NULL
);
1971 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1972 server
.masterhost
= sdsnew(argv
[1]);
1973 server
.masterport
= atoi(argv
[2]);
1974 server
.replstate
= REDIS_REPL_CONNECT
;
1975 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1976 server
.masterauth
= zstrdup(argv
[1]);
1977 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1978 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1979 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1981 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1982 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1983 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1985 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1986 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1987 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1989 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1990 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1991 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1993 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1994 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1995 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1997 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1998 zfree(server
.appendfilename
);
1999 server
.appendfilename
= zstrdup(argv
[1]);
2000 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
2002 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
2003 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2005 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
2006 if (!strcasecmp(argv
[1],"no")) {
2007 server
.appendfsync
= APPENDFSYNC_NO
;
2008 } else if (!strcasecmp(argv
[1],"always")) {
2009 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
2010 } else if (!strcasecmp(argv
[1],"everysec")) {
2011 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
2013 err
= "argument must be 'no', 'always' or 'everysec'";
2016 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
2017 server
.requirepass
= zstrdup(argv
[1]);
2018 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
2019 zfree(server
.pidfile
);
2020 server
.pidfile
= zstrdup(argv
[1]);
2021 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
2022 zfree(server
.dbfilename
);
2023 server
.dbfilename
= zstrdup(argv
[1]);
2024 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
2025 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
2026 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2028 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
2029 zfree(server
.vm_swap_file
);
2030 server
.vm_swap_file
= zstrdup(argv
[1]);
2031 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
2032 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
2033 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
2034 server
.vm_page_size
= memtoll(argv
[1], NULL
);
2035 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
2036 server
.vm_pages
= memtoll(argv
[1], NULL
);
2037 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
2038 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
2039 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
2040 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
2041 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
2042 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
2043 } else if (!strcasecmp(argv
[0],"list-max-ziplist-entries") && argc
== 2){
2044 server
.list_max_ziplist_entries
= memtoll(argv
[1], NULL
);
2045 } else if (!strcasecmp(argv
[0],"list-max-ziplist-value") && argc
== 2){
2046 server
.list_max_ziplist_value
= memtoll(argv
[1], NULL
);
2048 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
2050 for (j
= 0; j
< argc
; j
++)
2055 if (fp
!= stdin
) fclose(fp
);
2059 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2060 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2061 fprintf(stderr
, ">>> '%s'\n", line
);
2062 fprintf(stderr
, "%s\n", err
);
2066 static void freeClientArgv(redisClient
*c
) {
2069 for (j
= 0; j
< c
->argc
; j
++)
2070 decrRefCount(c
->argv
[j
]);
2071 for (j
= 0; j
< c
->mbargc
; j
++)
2072 decrRefCount(c
->mbargv
[j
]);
2077 static void freeClient(redisClient
*c
) {
2080 /* Note that if the client we are freeing is blocked into a blocking
2081 * call, we have to set querybuf to NULL *before* to call
2082 * unblockClientWaitingData() to avoid processInputBuffer() will get
2083 * called. Also it is important to remove the file events after
2084 * this, because this call adds the READABLE event. */
2085 sdsfree(c
->querybuf
);
2087 if (c
->flags
& REDIS_BLOCKED
)
2088 unblockClientWaitingData(c
);
2090 /* UNWATCH all the keys */
2092 listRelease(c
->watched_keys
);
2093 /* Unsubscribe from all the pubsub channels */
2094 pubsubUnsubscribeAllChannels(c
,0);
2095 pubsubUnsubscribeAllPatterns(c
,0);
2096 dictRelease(c
->pubsub_channels
);
2097 listRelease(c
->pubsub_patterns
);
2098 /* Obvious cleanup */
2099 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2100 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2101 listRelease(c
->reply
);
2104 /* Remove from the list of clients */
2105 ln
= listSearchKey(server
.clients
,c
);
2106 redisAssert(ln
!= NULL
);
2107 listDelNode(server
.clients
,ln
);
2108 /* Remove from the list of clients that are now ready to be restarted
2109 * after waiting for swapped keys */
2110 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2111 ln
= listSearchKey(server
.io_ready_clients
,c
);
2113 listDelNode(server
.io_ready_clients
,ln
);
2114 server
.vm_blocked_clients
--;
2117 /* Remove from the list of clients waiting for swapped keys */
2118 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2119 ln
= listFirst(c
->io_keys
);
2120 dontWaitForSwappedKey(c
,ln
->value
);
2122 listRelease(c
->io_keys
);
2123 /* Master/slave cleanup */
2124 if (c
->flags
& REDIS_SLAVE
) {
2125 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2127 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2128 ln
= listSearchKey(l
,c
);
2129 redisAssert(ln
!= NULL
);
2132 if (c
->flags
& REDIS_MASTER
) {
2133 server
.master
= NULL
;
2134 server
.replstate
= REDIS_REPL_CONNECT
;
2136 /* Release memory */
2139 freeClientMultiState(c
);
2143 #define GLUEREPLY_UP_TO (1024)
2144 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2146 char buf
[GLUEREPLY_UP_TO
];
2151 listRewind(c
->reply
,&li
);
2152 while((ln
= listNext(&li
))) {
2156 objlen
= sdslen(o
->ptr
);
2157 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2158 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2160 listDelNode(c
->reply
,ln
);
2162 if (copylen
== 0) return;
2166 /* Now the output buffer is empty, add the new single element */
2167 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2168 listAddNodeHead(c
->reply
,o
);
2171 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2172 redisClient
*c
= privdata
;
2173 int nwritten
= 0, totwritten
= 0, objlen
;
2176 REDIS_NOTUSED(mask
);
2178 /* Use writev() if we have enough buffers to send */
2179 if (!server
.glueoutputbuf
&&
2180 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2181 !(c
->flags
& REDIS_MASTER
))
2183 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2187 while(listLength(c
->reply
)) {
2188 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2189 glueReplyBuffersIfNeeded(c
);
2191 o
= listNodeValue(listFirst(c
->reply
));
2192 objlen
= sdslen(o
->ptr
);
2195 listDelNode(c
->reply
,listFirst(c
->reply
));
2199 if (c
->flags
& REDIS_MASTER
) {
2200 /* Don't reply to a master */
2201 nwritten
= objlen
- c
->sentlen
;
2203 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2204 if (nwritten
<= 0) break;
2206 c
->sentlen
+= nwritten
;
2207 totwritten
+= nwritten
;
2208 /* If we fully sent the object on head go to the next one */
2209 if (c
->sentlen
== objlen
) {
2210 listDelNode(c
->reply
,listFirst(c
->reply
));
2213 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2214 * bytes, in a single threaded server it's a good idea to serve
2215 * other clients as well, even if a very large request comes from
2216 * super fast link that is always able to accept data (in real world
2217 * scenario think about 'KEYS *' against the loopback interfae) */
2218 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2220 if (nwritten
== -1) {
2221 if (errno
== EAGAIN
) {
2224 redisLog(REDIS_VERBOSE
,
2225 "Error writing to client: %s", strerror(errno
));
2230 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2231 if (listLength(c
->reply
) == 0) {
2233 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2237 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2239 redisClient
*c
= privdata
;
2240 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2242 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2243 int offset
, ion
= 0;
2245 REDIS_NOTUSED(mask
);
2248 while (listLength(c
->reply
)) {
2249 offset
= c
->sentlen
;
2253 /* fill-in the iov[] array */
2254 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2255 o
= listNodeValue(node
);
2256 objlen
= sdslen(o
->ptr
);
2258 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2261 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2262 break; /* no more iovecs */
2264 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2265 iov
[ion
].iov_len
= objlen
- offset
;
2266 willwrite
+= objlen
- offset
;
2267 offset
= 0; /* just for the first item */
2274 /* write all collected blocks at once */
2275 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2276 if (errno
!= EAGAIN
) {
2277 redisLog(REDIS_VERBOSE
,
2278 "Error writing to client: %s", strerror(errno
));
2285 totwritten
+= nwritten
;
2286 offset
= c
->sentlen
;
2288 /* remove written robjs from c->reply */
2289 while (nwritten
&& listLength(c
->reply
)) {
2290 o
= listNodeValue(listFirst(c
->reply
));
2291 objlen
= sdslen(o
->ptr
);
2293 if(nwritten
>= objlen
- offset
) {
2294 listDelNode(c
->reply
, listFirst(c
->reply
));
2295 nwritten
-= objlen
- offset
;
2299 c
->sentlen
+= nwritten
;
2307 c
->lastinteraction
= time(NULL
);
2309 if (listLength(c
->reply
) == 0) {
2311 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2315 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2317 ((struct redisCommand
*)r1
)->name
,
2318 ((struct redisCommand
*)r2
)->name
);
2321 static void sortCommandTable() {
2322 /* Copy and sort the read-only version of the command table */
2323 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2324 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2326 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2327 sizeof(struct redisCommand
),qsortRedisCommands
);
2330 static struct redisCommand
*lookupCommand(char *name
) {
2331 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2335 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2336 sizeof(struct redisCommand
),
2337 qsortRedisCommands
);
2340 /* resetClient prepare the client to process the next command */
2341 static void resetClient(redisClient
*c
) {
2347 /* Call() is the core of Redis execution of a command */
2348 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2351 dirty
= server
.dirty
;
2353 dirty
= server
.dirty
-dirty
;
2355 if (server
.appendonly
&& dirty
)
2356 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2357 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2358 listLength(server
.slaves
))
2359 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2360 if (listLength(server
.monitors
))
2361 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2362 server
.stat_numcommands
++;
2365 /* If this function gets called we already read a whole
2366 * command, argments are in the client argv/argc fields.
2367 * processCommand() execute the command or prepare the
2368 * server for a bulk read from the client.
2370 * If 1 is returned the client is still alive and valid and
2371 * and other operations can be performed by the caller. Otherwise
2372 * if 0 is returned the client was destroied (i.e. after QUIT). */
2373 static int processCommand(redisClient
*c
) {
2374 struct redisCommand
*cmd
;
2376 /* Free some memory if needed (maxmemory setting) */
2377 if (server
.maxmemory
) freeMemoryIfNeeded();
2379 /* Handle the multi bulk command type. This is an alternative protocol
2380 * supported by Redis in order to receive commands that are composed of
2381 * multiple binary-safe "bulk" arguments. The latency of processing is
2382 * a bit higher but this allows things like multi-sets, so if this
2383 * protocol is used only for MSET and similar commands this is a big win. */
2384 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2385 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2386 if (c
->multibulk
<= 0) {
2390 decrRefCount(c
->argv
[c
->argc
-1]);
2394 } else if (c
->multibulk
) {
2395 if (c
->bulklen
== -1) {
2396 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2397 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2401 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2402 decrRefCount(c
->argv
[0]);
2403 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2405 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2410 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2414 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2415 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2419 if (c
->multibulk
== 0) {
2423 /* Here we need to swap the multi-bulk argc/argv with the
2424 * normal argc/argv of the client structure. */
2426 c
->argv
= c
->mbargv
;
2427 c
->mbargv
= auxargv
;
2430 c
->argc
= c
->mbargc
;
2431 c
->mbargc
= auxargc
;
2433 /* We need to set bulklen to something different than -1
2434 * in order for the code below to process the command without
2435 * to try to read the last argument of a bulk command as
2436 * a special argument. */
2438 /* continue below and process the command */
2445 /* -- end of multi bulk commands processing -- */
2447 /* The QUIT command is handled as a special case. Normal command
2448 * procs are unable to close the client connection safely */
2449 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2454 /* Now lookup the command and check ASAP about trivial error conditions
2455 * such wrong arity, bad command name and so forth. */
2456 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2459 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2460 (char*)c
->argv
[0]->ptr
));
2463 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2464 (c
->argc
< -cmd
->arity
)) {
2466 sdscatprintf(sdsempty(),
2467 "-ERR wrong number of arguments for '%s' command\r\n",
2471 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2472 /* This is a bulk command, we have to read the last argument yet. */
2473 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2475 decrRefCount(c
->argv
[c
->argc
-1]);
2476 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2478 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2483 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2484 /* It is possible that the bulk read is already in the
2485 * buffer. Check this condition and handle it accordingly.
2486 * This is just a fast path, alternative to call processInputBuffer().
2487 * It's a good idea since the code is small and this condition
2488 * happens most of the times. */
2489 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2490 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2492 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2494 /* Otherwise return... there is to read the last argument
2495 * from the socket. */
2499 /* Let's try to encode the bulk object to save space. */
2500 if (cmd
->flags
& REDIS_CMD_BULK
)
2501 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2503 /* Check if the user is authenticated */
2504 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2505 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2510 /* Handle the maxmemory directive */
2511 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2512 zmalloc_used_memory() > server
.maxmemory
)
2514 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2519 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2520 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2522 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2523 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2524 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2529 /* Exec the command */
2530 if (c
->flags
& REDIS_MULTI
&&
2531 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2532 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2534 queueMultiCommand(c
,cmd
);
2535 addReply(c
,shared
.queued
);
2537 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2538 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2542 /* Prepare the client for the next command */
2547 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2552 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2553 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2554 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2555 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2558 if (argc
<= REDIS_STATIC_ARGS
) {
2561 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2564 lenobj
= createObject(REDIS_STRING
,
2565 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2566 lenobj
->refcount
= 0;
2567 outv
[outc
++] = lenobj
;
2568 for (j
= 0; j
< argc
; j
++) {
2569 lenobj
= createObject(REDIS_STRING
,
2570 sdscatprintf(sdsempty(),"$%lu\r\n",
2571 (unsigned long) stringObjectLen(argv
[j
])));
2572 lenobj
->refcount
= 0;
2573 outv
[outc
++] = lenobj
;
2574 outv
[outc
++] = argv
[j
];
2575 outv
[outc
++] = shared
.crlf
;
2578 /* Increment all the refcounts at start and decrement at end in order to
2579 * be sure to free objects if there is no slave in a replication state
2580 * able to be feed with commands */
2581 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2582 listRewind(slaves
,&li
);
2583 while((ln
= listNext(&li
))) {
2584 redisClient
*slave
= ln
->value
;
2586 /* Don't feed slaves that are still waiting for BGSAVE to start */
2587 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2589 /* Feed all the other slaves, MONITORs and so on */
2590 if (slave
->slaveseldb
!= dictid
) {
2594 case 0: selectcmd
= shared
.select0
; break;
2595 case 1: selectcmd
= shared
.select1
; break;
2596 case 2: selectcmd
= shared
.select2
; break;
2597 case 3: selectcmd
= shared
.select3
; break;
2598 case 4: selectcmd
= shared
.select4
; break;
2599 case 5: selectcmd
= shared
.select5
; break;
2600 case 6: selectcmd
= shared
.select6
; break;
2601 case 7: selectcmd
= shared
.select7
; break;
2602 case 8: selectcmd
= shared
.select8
; break;
2603 case 9: selectcmd
= shared
.select9
; break;
2605 selectcmd
= createObject(REDIS_STRING
,
2606 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2607 selectcmd
->refcount
= 0;
2610 addReply(slave
,selectcmd
);
2611 slave
->slaveseldb
= dictid
;
2613 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2615 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2616 if (outv
!= static_outv
) zfree(outv
);
2619 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2620 s
= sdscatlen(s
,"\"",1);
2625 s
= sdscatprintf(s
,"\\%c",*p
);
2627 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2628 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2629 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2630 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2631 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2634 s
= sdscatprintf(s
,"%c",*p
);
2636 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2641 return sdscatlen(s
,"\"",1);
2644 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2648 sds cmdrepr
= sdsnew("+");
2652 gettimeofday(&tv
,NULL
);
2653 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2654 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2656 for (j
= 0; j
< argc
; j
++) {
2657 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2658 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2660 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2661 sdslen(argv
[j
]->ptr
));
2664 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2666 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2667 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2669 listRewind(monitors
,&li
);
2670 while((ln
= listNext(&li
))) {
2671 redisClient
*monitor
= ln
->value
;
2672 addReply(monitor
,cmdobj
);
2674 decrRefCount(cmdobj
);
2677 static void processInputBuffer(redisClient
*c
) {
2679 /* Before to process the input buffer, make sure the client is not
2680 * waitig for a blocking operation such as BLPOP. Note that the first
2681 * iteration the client is never blocked, otherwise the processInputBuffer
2682 * would not be called at all, but after the execution of the first commands
2683 * in the input buffer the client may be blocked, and the "goto again"
2684 * will try to reiterate. The following line will make it return asap. */
2685 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2686 if (c
->bulklen
== -1) {
2687 /* Read the first line of the query */
2688 char *p
= strchr(c
->querybuf
,'\n');
2695 query
= c
->querybuf
;
2696 c
->querybuf
= sdsempty();
2697 querylen
= 1+(p
-(query
));
2698 if (sdslen(query
) > querylen
) {
2699 /* leave data after the first line of the query in the buffer */
2700 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2702 *p
= '\0'; /* remove "\n" */
2703 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2704 sdsupdatelen(query
);
2706 /* Now we can split the query in arguments */
2707 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2710 if (c
->argv
) zfree(c
->argv
);
2711 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2713 for (j
= 0; j
< argc
; j
++) {
2714 if (sdslen(argv
[j
])) {
2715 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2723 /* Execute the command. If the client is still valid
2724 * after processCommand() return and there is something
2725 * on the query buffer try to process the next command. */
2726 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2728 /* Nothing to process, argc == 0. Just process the query
2729 * buffer if it's not empty or return to the caller */
2730 if (sdslen(c
->querybuf
)) goto again
;
2733 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2734 redisLog(REDIS_VERBOSE
, "Client protocol error");
2739 /* Bulk read handling. Note that if we are at this point
2740 the client already sent a command terminated with a newline,
2741 we are reading the bulk data that is actually the last
2742 argument of the command. */
2743 int qbl
= sdslen(c
->querybuf
);
2745 if (c
->bulklen
<= qbl
) {
2746 /* Copy everything but the final CRLF as final argument */
2747 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2749 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2750 /* Process the command. If the client is still valid after
2751 * the processing and there is more data in the buffer
2752 * try to parse it. */
2753 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2759 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2760 redisClient
*c
= (redisClient
*) privdata
;
2761 char buf
[REDIS_IOBUF_LEN
];
2764 REDIS_NOTUSED(mask
);
2766 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2768 if (errno
== EAGAIN
) {
2771 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2775 } else if (nread
== 0) {
2776 redisLog(REDIS_VERBOSE
, "Client closed connection");
2781 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2782 c
->lastinteraction
= time(NULL
);
2786 processInputBuffer(c
);
2789 static int selectDb(redisClient
*c
, int id
) {
2790 if (id
< 0 || id
>= server
.dbnum
)
2792 c
->db
= &server
.db
[id
];
2796 static void *dupClientReplyValue(void *o
) {
2797 incrRefCount((robj
*)o
);
2801 static int listMatchObjects(void *a
, void *b
) {
2802 return equalStringObjects(a
,b
);
2805 static redisClient
*createClient(int fd
) {
2806 redisClient
*c
= zmalloc(sizeof(*c
));
2808 anetNonBlock(NULL
,fd
);
2809 anetTcpNoDelay(NULL
,fd
);
2810 if (!c
) return NULL
;
2813 c
->querybuf
= sdsempty();
2822 c
->lastinteraction
= time(NULL
);
2823 c
->authenticated
= 0;
2824 c
->replstate
= REDIS_REPL_NONE
;
2825 c
->reply
= listCreate();
2826 listSetFreeMethod(c
->reply
,decrRefCount
);
2827 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2828 c
->blocking_keys
= NULL
;
2829 c
->blocking_keys_num
= 0;
2830 c
->io_keys
= listCreate();
2831 c
->watched_keys
= listCreate();
2832 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2833 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2834 c
->pubsub_patterns
= listCreate();
2835 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2836 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2837 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2838 readQueryFromClient
, c
) == AE_ERR
) {
2842 listAddNodeTail(server
.clients
,c
);
2843 initClientMultiState(c
);
2847 static void addReply(redisClient
*c
, robj
*obj
) {
2848 if (listLength(c
->reply
) == 0 &&
2849 (c
->replstate
== REDIS_REPL_NONE
||
2850 c
->replstate
== REDIS_REPL_ONLINE
) &&
2851 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2852 sendReplyToClient
, c
) == AE_ERR
) return;
2854 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2855 obj
= dupStringObject(obj
);
2856 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2858 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2861 static void addReplySds(redisClient
*c
, sds s
) {
2862 robj
*o
= createObject(REDIS_STRING
,s
);
2867 static void addReplyDouble(redisClient
*c
, double d
) {
2870 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2871 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2872 (unsigned long) strlen(buf
),buf
));
2875 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2880 addReply(c
,shared
.czero
);
2882 } else if (ll
== 1) {
2883 addReply(c
,shared
.cone
);
2887 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2890 addReplySds(c
,sdsnewlen(buf
,len
+3));
2893 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2898 addReply(c
,shared
.czero
);
2900 } else if (ul
== 1) {
2901 addReply(c
,shared
.cone
);
2904 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2905 addReplySds(c
,sdsnewlen(buf
,len
));
2908 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2912 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2913 len
= sdslen(obj
->ptr
);
2915 long n
= (long)obj
->ptr
;
2917 /* Compute how many bytes will take this integer as a radix 10 string */
2923 while((n
= n
/10) != 0) {
2928 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2929 buf
[intlen
+1] = '\r';
2930 buf
[intlen
+2] = '\n';
2931 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2934 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2935 addReplyBulkLen(c
,obj
);
2937 addReply(c
,shared
.crlf
);
2940 static void addReplyBulkSds(redisClient
*c
, sds s
) {
2941 robj
*o
= createStringObject(s
, sdslen(s
));
2946 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2947 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2949 addReply(c
,shared
.nullbulk
);
2951 robj
*o
= createStringObject(s
,strlen(s
));
2957 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2962 REDIS_NOTUSED(mask
);
2963 REDIS_NOTUSED(privdata
);
2965 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2966 if (cfd
== AE_ERR
) {
2967 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2970 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2971 if ((c
= createClient(cfd
)) == NULL
) {
2972 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2973 close(cfd
); /* May be already closed, just ingore errors */
2976 /* If maxclient directive is set and this is one client more... close the
2977 * connection. Note that we create the client instead to check before
2978 * for this condition, since now the socket is already set in nonblocking
2979 * mode and we can send an error for free using the Kernel I/O */
2980 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2981 char *err
= "-ERR max number of clients reached\r\n";
2983 /* That's a best effort error message, don't check write errors */
2984 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2985 /* Nothing to do, Just to avoid the warning... */
2990 server
.stat_numconnections
++;
2993 /* ======================= Redis objects implementation ===================== */
2995 static robj
*createObject(int type
, void *ptr
) {
2998 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2999 if (listLength(server
.objfreelist
)) {
3000 listNode
*head
= listFirst(server
.objfreelist
);
3001 o
= listNodeValue(head
);
3002 listDelNode(server
.objfreelist
,head
);
3003 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3005 if (server
.vm_enabled
)
3006 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3007 o
= zmalloc(sizeof(*o
));
3010 o
->encoding
= REDIS_ENCODING_RAW
;
3013 if (server
.vm_enabled
) {
3014 /* Note that this code may run in the context of an I/O thread
3015 * and accessing server.lruclock in theory is an error
3016 * (no locks). But in practice this is safe, and even if we read
3017 * garbage Redis will not fail. */
3018 o
->lru
= server
.lruclock
;
3019 o
->storage
= REDIS_VM_MEMORY
;
3024 static robj
*createStringObject(char *ptr
, size_t len
) {
3025 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
3028 static robj
*createStringObjectFromLongLong(long long value
) {
3030 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3031 incrRefCount(shared
.integers
[value
]);
3032 o
= shared
.integers
[value
];
3034 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
3035 o
= createObject(REDIS_STRING
, NULL
);
3036 o
->encoding
= REDIS_ENCODING_INT
;
3037 o
->ptr
= (void*)((long)value
);
3039 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
3045 static robj
*dupStringObject(robj
*o
) {
3046 assert(o
->encoding
== REDIS_ENCODING_RAW
);
3047 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
3050 static robj
*createListObject(void) {
3051 list
*l
= listCreate();
3052 robj
*o
= createObject(REDIS_LIST
,l
);
3053 listSetFreeMethod(l
,decrRefCount
);
3054 o
->encoding
= REDIS_ENCODING_LIST
;
3058 static robj
*createZiplistObject(void) {
3059 unsigned char *zl
= ziplistNew();
3060 robj
*o
= createObject(REDIS_LIST
,zl
);
3061 o
->encoding
= REDIS_ENCODING_ZIPLIST
;
3065 static robj
*createSetObject(void) {
3066 dict
*d
= dictCreate(&setDictType
,NULL
);
3067 robj
*o
= createObject(REDIS_SET
,d
);
3068 o
->encoding
= REDIS_ENCODING_HT
;
3072 static robj
*createHashObject(void) {
3073 /* All the Hashes start as zipmaps. Will be automatically converted
3074 * into hash tables if there are enough elements or big elements
3076 unsigned char *zm
= zipmapNew();
3077 robj
*o
= createObject(REDIS_HASH
,zm
);
3078 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3082 static robj
*createZsetObject(void) {
3083 zset
*zs
= zmalloc(sizeof(*zs
));
3085 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3086 zs
->zsl
= zslCreate();
3087 return createObject(REDIS_ZSET
,zs
);
3090 static void freeStringObject(robj
*o
) {
3091 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3096 static void freeListObject(robj
*o
) {
3097 switch (o
->encoding
) {
3098 case REDIS_ENCODING_LIST
:
3099 listRelease((list
*) o
->ptr
);
3101 case REDIS_ENCODING_ZIPLIST
:
3105 redisPanic("Unknown list encoding type");
3109 static void freeSetObject(robj
*o
) {
3110 dictRelease((dict
*) o
->ptr
);
3113 static void freeZsetObject(robj
*o
) {
3116 dictRelease(zs
->dict
);
3121 static void freeHashObject(robj
*o
) {
3122 switch (o
->encoding
) {
3123 case REDIS_ENCODING_HT
:
3124 dictRelease((dict
*) o
->ptr
);
3126 case REDIS_ENCODING_ZIPMAP
:
3130 redisPanic("Unknown hash encoding type");
3135 static void incrRefCount(robj
*o
) {
3139 static void decrRefCount(void *obj
) {
3142 /* Object is a swapped out value, or in the process of being loaded. */
3143 if (server
.vm_enabled
&&
3144 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3146 vmpointer
*vp
= obj
;
3147 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
);
3148 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
3149 server
.vm_stats_swapped_objects
--;
3154 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3155 /* Object is in memory, or in the process of being swapped out.
3157 * If the object is being swapped out, abort the operation on
3158 * decrRefCount even if the refcount does not drop to 0: the object
3159 * is referenced at least two times, as value of the key AND as
3160 * job->val in the iojob. So if we don't invalidate the iojob, when it is
3161 * done but the relevant key was removed in the meantime, the
3162 * complete jobs handler will not find the key about the job and the
3163 * assert will fail. */
3164 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3165 vmCancelThreadedIOJob(o
);
3166 if (--(o
->refcount
) == 0) {
3168 case REDIS_STRING
: freeStringObject(o
); break;
3169 case REDIS_LIST
: freeListObject(o
); break;
3170 case REDIS_SET
: freeSetObject(o
); break;
3171 case REDIS_ZSET
: freeZsetObject(o
); break;
3172 case REDIS_HASH
: freeHashObject(o
); break;
3173 default: redisPanic("Unknown object type"); break;
3175 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3176 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3177 !listAddNodeHead(server
.objfreelist
,o
))
3179 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3183 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3184 if (o
->type
!= type
) {
3185 addReply(c
,shared
.wrongtypeerr
);
3191 /* Check if the nul-terminated string 's' can be represented by a long
3192 * (that is, is a number that fits into long without any other space or
3193 * character before or after the digits).
3195 * If so, the function returns REDIS_OK and *longval is set to the value
3196 * of the number. Otherwise REDIS_ERR is returned */
3197 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3198 char buf
[32], *endptr
;
3202 value
= strtol(s
, &endptr
, 10);
3203 if (endptr
[0] != '\0') return REDIS_ERR
;
3204 slen
= ll2string(buf
,32,value
);
3206 /* If the number converted back into a string is not identical
3207 * then it's not possible to encode the string as integer */
3208 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3209 if (longval
) *longval
= value
;
3213 /* Try to encode a string object in order to save space */
3214 static robj
*tryObjectEncoding(robj
*o
) {
3218 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3219 return o
; /* Already encoded */
3221 /* It's not safe to encode shared objects: shared objects can be shared
3222 * everywhere in the "object space" of Redis. Encoded objects can only
3223 * appear as "values" (and not, for instance, as keys) */
3224 if (o
->refcount
> 1) return o
;
3226 /* Currently we try to encode only strings */
3227 redisAssert(o
->type
== REDIS_STRING
);
3229 /* Check if we can represent this string as a long integer */
3230 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3232 /* Ok, this object can be encoded */
3233 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3235 incrRefCount(shared
.integers
[value
]);
3236 return shared
.integers
[value
];
3238 o
->encoding
= REDIS_ENCODING_INT
;
3240 o
->ptr
= (void*) value
;
3245 /* Get a decoded version of an encoded object (returned as a new object).
3246 * If the object is already raw-encoded just increment the ref count. */
3247 static robj
*getDecodedObject(robj
*o
) {
3250 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3254 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3257 ll2string(buf
,32,(long)o
->ptr
);
3258 dec
= createStringObject(buf
,strlen(buf
));
3261 redisPanic("Unknown encoding type");
3265 /* Compare two string objects via strcmp() or alike.
3266 * Note that the objects may be integer-encoded. In such a case we
3267 * use ll2string() to get a string representation of the numbers on the stack
3268 * and compare the strings, it's much faster than calling getDecodedObject().
3270 * Important note: if objects are not integer encoded, but binary-safe strings,
3271 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3273 static int compareStringObjects(robj
*a
, robj
*b
) {
3274 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3275 char bufa
[128], bufb
[128], *astr
, *bstr
;
3278 if (a
== b
) return 0;
3279 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3280 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3286 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3287 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3293 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3296 /* Equal string objects return 1 if the two objects are the same from the
3297 * point of view of a string comparison, otherwise 0 is returned. Note that
3298 * this function is faster then checking for (compareStringObject(a,b) == 0)
3299 * because it can perform some more optimization. */
3300 static int equalStringObjects(robj
*a
, robj
*b
) {
3301 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3302 return a
->ptr
== b
->ptr
;
3304 return compareStringObjects(a
,b
) == 0;
3308 static size_t stringObjectLen(robj
*o
) {
3309 redisAssert(o
->type
== REDIS_STRING
);
3310 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3311 return sdslen(o
->ptr
);
3315 return ll2string(buf
,32,(long)o
->ptr
);
3319 static int getDoubleFromObject(robj
*o
, double *target
) {
3326 redisAssert(o
->type
== REDIS_STRING
);
3327 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3328 value
= strtod(o
->ptr
, &eptr
);
3329 if (eptr
[0] != '\0') return REDIS_ERR
;
3330 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3331 value
= (long)o
->ptr
;
3333 redisPanic("Unknown string encoding");
3341 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3343 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3345 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3347 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3356 static int getLongLongFromObject(robj
*o
, long long *target
) {
3363 redisAssert(o
->type
== REDIS_STRING
);
3364 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3365 value
= strtoll(o
->ptr
, &eptr
, 10);
3366 if (eptr
[0] != '\0') return REDIS_ERR
;
3367 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3368 value
= (long)o
->ptr
;
3370 redisPanic("Unknown string encoding");
3378 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3380 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3382 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3384 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3393 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3396 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3397 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3399 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3401 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3410 /* =========================== Keyspace access API ========================== */
3412 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3413 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
3415 robj
*val
= dictGetEntryVal(de
);
3417 if (server
.vm_enabled
) {
3418 if (val
->storage
== REDIS_VM_MEMORY
||
3419 val
->storage
== REDIS_VM_SWAPPING
)
3421 /* If we were swapping the object out, cancel the operation */
3422 if (val
->storage
== REDIS_VM_SWAPPING
)
3423 vmCancelThreadedIOJob(val
);
3424 /* Update the access time for the aging algorithm. */
3425 val
->lru
= server
.lruclock
;
3427 int notify
= (val
->storage
== REDIS_VM_LOADING
);
3429 /* Our value was swapped on disk. Bring it at home. */
3430 redisAssert(val
->type
== REDIS_VMPOINTER
);
3431 val
= vmLoadObject(val
);
3432 dictGetEntryVal(de
) = val
;
3434 /* Clients blocked by the VM subsystem may be waiting for
3436 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3445 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3446 expireIfNeeded(db
,key
);
3447 return lookupKey(db
,key
);
3450 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3451 deleteIfVolatile(db
,key
);
3452 touchWatchedKey(db
,key
);
3453 return lookupKey(db
,key
);
3456 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3457 robj
*o
= lookupKeyRead(c
->db
, key
);
3458 if (!o
) addReply(c
,reply
);
3462 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3463 robj
*o
= lookupKeyWrite(c
->db
, key
);
3464 if (!o
) addReply(c
,reply
);
3468 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
3469 * otherwise REDIS_OK is returned, and the caller should increment the
3470 * refcount of 'val'. */
3471 static int dbAdd(redisDb
*db
, robj
*key
, robj
*val
) {
3472 /* Perform a lookup before adding the key, as we need to copy the
3474 if (dictFind(db
->dict
, key
->ptr
) != NULL
) {
3477 sds copy
= sdsdup(key
->ptr
);
3478 dictAdd(db
->dict
, copy
, val
);
3483 /* If the key does not exist, this is just like dbAdd(). Otherwise
3484 * the value associated to the key is replaced with the new one.
3486 * On update (key already existed) 0 is returned. Otherwise 1. */
3487 static int dbReplace(redisDb
*db
, robj
*key
, robj
*val
) {
3488 if (dictFind(db
->dict
,key
->ptr
) == NULL
) {
3489 sds copy
= sdsdup(key
->ptr
);
3490 dictAdd(db
->dict
, copy
, val
);
3493 dictReplace(db
->dict
, key
->ptr
, val
);
3498 static int dbExists(redisDb
*db
, robj
*key
) {
3499 return dictFind(db
->dict
,key
->ptr
) != NULL
;
3502 /* Return a random key, in form of a Redis object.
3503 * If there are no keys, NULL is returned.
3505 * The function makes sure to return keys not already expired. */
3506 static robj
*dbRandomKey(redisDb
*db
) {
3507 struct dictEntry
*de
;
3513 de
= dictGetRandomKey(db
->dict
);
3514 if (de
== NULL
) return NULL
;
3516 key
= dictGetEntryKey(de
);
3517 keyobj
= createStringObject(key
,sdslen(key
));
3518 if (dictFind(db
->expires
,key
)) {
3519 if (expireIfNeeded(db
,keyobj
)) {
3520 decrRefCount(keyobj
);
3521 continue; /* search for another key. This expired. */
3528 /* Delete a key, value, and associated expiration entry if any, from the DB */
3529 static int dbDelete(redisDb
*db
, robj
*key
) {
3532 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
);
3533 retval
= dictDelete(db
->dict
,key
->ptr
);
3535 return retval
== DICT_OK
;
3538 /*============================ RDB saving/loading =========================== */
3540 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3541 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3545 static int rdbSaveTime(FILE *fp
, time_t t
) {
3546 int32_t t32
= (int32_t) t
;
3547 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3551 /* check rdbLoadLen() comments for more info */
3552 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3553 unsigned char buf
[2];
3556 /* Save a 6 bit len */
3557 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3558 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3559 } else if (len
< (1<<14)) {
3560 /* Save a 14 bit len */
3561 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3563 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3565 /* Save a 32 bit len */
3566 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3567 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3569 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3574 /* Encode 'value' as an integer if possible (if integer will fit the
3575 * supported range). If the function sucessful encoded the integer
3576 * then the (up to 5 bytes) encoded representation is written in the
3577 * string pointed by 'enc' and the length is returned. Otherwise
3579 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3580 /* Finally check if it fits in our ranges */
3581 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3582 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3583 enc
[1] = value
&0xFF;
3585 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3586 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3587 enc
[1] = value
&0xFF;
3588 enc
[2] = (value
>>8)&0xFF;
3590 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3591 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3592 enc
[1] = value
&0xFF;
3593 enc
[2] = (value
>>8)&0xFF;
3594 enc
[3] = (value
>>16)&0xFF;
3595 enc
[4] = (value
>>24)&0xFF;
3602 /* String objects in the form "2391" "-100" without any space and with a
3603 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3604 * encoded as integers to save space */
3605 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3607 char *endptr
, buf
[32];
3609 /* Check if it's possible to encode this value as a number */
3610 value
= strtoll(s
, &endptr
, 10);
3611 if (endptr
[0] != '\0') return 0;
3612 ll2string(buf
,32,value
);
3614 /* If the number converted back into a string is not identical
3615 * then it's not possible to encode the string as integer */
3616 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3618 return rdbEncodeInteger(value
,enc
);
3621 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3622 size_t comprlen
, outlen
;
3626 /* We require at least four bytes compression for this to be worth it */
3627 if (len
<= 4) return 0;
3629 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3630 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3631 if (comprlen
== 0) {
3635 /* Data compressed! Let's save it on disk */
3636 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3637 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3638 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3639 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3640 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3649 /* Save a string objet as [len][data] on disk. If the object is a string
3650 * representation of an integer value we try to safe it in a special form */
3651 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3654 /* Try integer encoding */
3656 unsigned char buf
[5];
3657 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3658 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3663 /* Try LZF compression - under 20 bytes it's unable to compress even
3664 * aaaaaaaaaaaaaaaaaa so skip it */
3665 if (server
.rdbcompression
&& len
> 20) {
3668 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3669 if (retval
== -1) return -1;
3670 if (retval
> 0) return 0;
3671 /* retval == 0 means data can't be compressed, save the old way */
3674 /* Store verbatim */
3675 if (rdbSaveLen(fp
,len
) == -1) return -1;
3676 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3680 /* Save a long long value as either an encoded string or a string. */
3681 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) {
3682 unsigned char buf
[32];
3683 int enclen
= rdbEncodeInteger(value
,buf
);
3685 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3687 /* Encode as string */
3688 enclen
= ll2string((char*)buf
,32,value
);
3689 redisAssert(enclen
< 32);
3690 if (rdbSaveLen(fp
,enclen
) == -1) return -1;
3691 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3696 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3697 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3698 /* Avoid to decode the object, then encode it again, if the
3699 * object is alrady integer encoded. */
3700 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3701 return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
);
3703 redisAssert(obj
->encoding
== REDIS_ENCODING_RAW
);
3704 return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3708 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3709 * 8 bit integer specifing the length of the representation.
3710 * This 8 bit integer has special values in order to specify the following
3716 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3717 unsigned char buf
[128];
3723 } else if (!isfinite(val
)) {
3725 buf
[0] = (val
< 0) ? 255 : 254;
3727 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3728 /* Check if the float is in a safe range to be casted into a
3729 * long long. We are assuming that long long is 64 bit here.
3730 * Also we are assuming that there are no implementations around where
3731 * double has precision < 52 bit.
3733 * Under this assumptions we test if a double is inside an interval
3734 * where casting to long long is safe. Then using two castings we
3735 * make sure the decimal part is zero. If all this is true we use
3736 * integer printing function that is much faster. */
3737 double min
= -4503599627370495; /* (2^52)-1 */
3738 double max
= 4503599627370496; /* -(2^52) */
3739 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3740 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3743 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3744 buf
[0] = strlen((char*)buf
+1);
3747 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3751 /* Save a Redis object. */
3752 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3753 if (o
->type
== REDIS_STRING
) {
3754 /* Save a string value */
3755 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3756 } else if (o
->type
== REDIS_LIST
) {
3757 /* Save a list value */
3758 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
3760 unsigned char *vstr
;
3764 if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1;
3765 p
= ziplistIndex(o
->ptr
,0);
3766 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
3768 if (rdbSaveRawString(fp
,vstr
,vlen
) == -1)
3771 if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1)
3774 p
= ziplistNext(o
->ptr
,p
);
3776 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
3777 list
*list
= o
->ptr
;
3781 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3782 listRewind(list
,&li
);
3783 while((ln
= listNext(&li
))) {
3784 robj
*eleobj
= listNodeValue(ln
);
3785 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3788 redisPanic("Unknown list encoding");
3790 } else if (o
->type
== REDIS_SET
) {
3791 /* Save a set value */
3793 dictIterator
*di
= dictGetIterator(set
);
3796 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3797 while((de
= dictNext(di
)) != NULL
) {
3798 robj
*eleobj
= dictGetEntryKey(de
);
3800 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3802 dictReleaseIterator(di
);
3803 } else if (o
->type
== REDIS_ZSET
) {
3804 /* Save a set value */
3806 dictIterator
*di
= dictGetIterator(zs
->dict
);
3809 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3810 while((de
= dictNext(di
)) != NULL
) {
3811 robj
*eleobj
= dictGetEntryKey(de
);
3812 double *score
= dictGetEntryVal(de
);
3814 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3815 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3817 dictReleaseIterator(di
);
3818 } else if (o
->type
== REDIS_HASH
) {
3819 /* Save a hash value */
3820 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3821 unsigned char *p
= zipmapRewind(o
->ptr
);
3822 unsigned int count
= zipmapLen(o
->ptr
);
3823 unsigned char *key
, *val
;
3824 unsigned int klen
, vlen
;
3826 if (rdbSaveLen(fp
,count
) == -1) return -1;
3827 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3828 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3829 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3832 dictIterator
*di
= dictGetIterator(o
->ptr
);
3835 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3836 while((de
= dictNext(di
)) != NULL
) {
3837 robj
*key
= dictGetEntryKey(de
);
3838 robj
*val
= dictGetEntryVal(de
);
3840 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3841 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3843 dictReleaseIterator(di
);
3846 redisPanic("Unknown object type");
3851 /* Return the length the object will have on disk if saved with
3852 * the rdbSaveObject() function. Currently we use a trick to get
3853 * this length with very little changes to the code. In the future
3854 * we could switch to a faster solution. */
3855 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3856 if (fp
== NULL
) fp
= server
.devnull
;
3858 assert(rdbSaveObject(fp
,o
) != 1);
3862 /* Return the number of pages required to save this object in the swap file */
3863 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3864 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3866 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3869 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3870 static int rdbSave(char *filename
) {
3871 dictIterator
*di
= NULL
;
3876 time_t now
= time(NULL
);
3878 /* Wait for I/O therads to terminate, just in case this is a
3879 * foreground-saving, to avoid seeking the swap file descriptor at the
3881 if (server
.vm_enabled
)
3882 waitEmptyIOJobsQueue();
3884 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3885 fp
= fopen(tmpfile
,"w");
3887 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3890 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3891 for (j
= 0; j
< server
.dbnum
; j
++) {
3892 redisDb
*db
= server
.db
+j
;
3894 if (dictSize(d
) == 0) continue;
3895 di
= dictGetIterator(d
);
3901 /* Write the SELECT DB opcode */
3902 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3903 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3905 /* Iterate this DB writing every entry */
3906 while((de
= dictNext(di
)) != NULL
) {
3907 sds keystr
= dictGetEntryKey(de
);
3908 robj key
, *o
= dictGetEntryVal(de
);
3911 initStaticStringObject(key
,keystr
);
3912 expiretime
= getExpire(db
,&key
);
3914 /* Save the expire time */
3915 if (expiretime
!= -1) {
3916 /* If this key is already expired skip it */
3917 if (expiretime
< now
) continue;
3918 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3919 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3921 /* Save the key and associated value. This requires special
3922 * handling if the value is swapped out. */
3923 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
3924 o
->storage
== REDIS_VM_SWAPPING
) {
3925 /* Save type, key, value */
3926 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3927 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3928 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3930 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3932 /* Get a preview of the object in memory */
3933 po
= vmPreviewObject(o
);
3934 /* Save type, key, value */
3935 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
;
3936 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3937 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3938 /* Remove the loaded object from memory */
3942 dictReleaseIterator(di
);
3945 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3947 /* Make sure data will not remain on the OS's output buffers */
3952 /* Use RENAME to make sure the DB file is changed atomically only
3953 * if the generate DB file is ok. */
3954 if (rename(tmpfile
,filename
) == -1) {
3955 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3959 redisLog(REDIS_NOTICE
,"DB saved on disk");
3961 server
.lastsave
= time(NULL
);
3967 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3968 if (di
) dictReleaseIterator(di
);
3972 static int rdbSaveBackground(char *filename
) {
3975 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3976 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3977 if ((childpid
= fork()) == 0) {
3979 if (server
.vm_enabled
) vmReopenSwapFile();
3981 if (rdbSave(filename
) == REDIS_OK
) {
3988 if (childpid
== -1) {
3989 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3993 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3994 server
.bgsavechildpid
= childpid
;
3995 updateDictResizePolicy();
3998 return REDIS_OK
; /* unreached */
4001 static void rdbRemoveTempFile(pid_t childpid
) {
4004 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
4008 static int rdbLoadType(FILE *fp
) {
4010 if (fread(&type
,1,1,fp
) == 0) return -1;
4014 static time_t rdbLoadTime(FILE *fp
) {
4016 if (fread(&t32
,4,1,fp
) == 0) return -1;
4017 return (time_t) t32
;
4020 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
4021 * of this file for a description of how this are stored on disk.
4023 * isencoded is set to 1 if the readed length is not actually a length but
4024 * an "encoding type", check the above comments for more info */
4025 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
4026 unsigned char buf
[2];
4030 if (isencoded
) *isencoded
= 0;
4031 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4032 type
= (buf
[0]&0xC0)>>6;
4033 if (type
== REDIS_RDB_6BITLEN
) {
4034 /* Read a 6 bit len */
4036 } else if (type
== REDIS_RDB_ENCVAL
) {
4037 /* Read a 6 bit len encoding type */
4038 if (isencoded
) *isencoded
= 1;
4040 } else if (type
== REDIS_RDB_14BITLEN
) {
4041 /* Read a 14 bit len */
4042 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4043 return ((buf
[0]&0x3F)<<8)|buf
[1];
4045 /* Read a 32 bit len */
4046 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
4051 /* Load an integer-encoded object from file 'fp', with the specified
4052 * encoding type 'enctype'. If encode is true the function may return
4053 * an integer-encoded object as reply, otherwise the returned object
4054 * will always be encoded as a raw string. */
4055 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
4056 unsigned char enc
[4];
4059 if (enctype
== REDIS_RDB_ENC_INT8
) {
4060 if (fread(enc
,1,1,fp
) == 0) return NULL
;
4061 val
= (signed char)enc
[0];
4062 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
4064 if (fread(enc
,2,1,fp
) == 0) return NULL
;
4065 v
= enc
[0]|(enc
[1]<<8);
4067 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
4069 if (fread(enc
,4,1,fp
) == 0) return NULL
;
4070 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
4073 val
= 0; /* anti-warning */
4074 redisPanic("Unknown RDB integer encoding type");
4077 return createStringObjectFromLongLong(val
);
4079 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
4082 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
4083 unsigned int len
, clen
;
4084 unsigned char *c
= NULL
;
4087 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4088 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4089 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
4090 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
4091 if (fread(c
,clen
,1,fp
) == 0) goto err
;
4092 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
4094 return createObject(REDIS_STRING
,val
);
4101 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
4106 len
= rdbLoadLen(fp
,&isencoded
);
4109 case REDIS_RDB_ENC_INT8
:
4110 case REDIS_RDB_ENC_INT16
:
4111 case REDIS_RDB_ENC_INT32
:
4112 return rdbLoadIntegerObject(fp
,len
,encode
);
4113 case REDIS_RDB_ENC_LZF
:
4114 return rdbLoadLzfStringObject(fp
);
4116 redisPanic("Unknown RDB encoding type");
4120 if (len
== REDIS_RDB_LENERR
) return NULL
;
4121 val
= sdsnewlen(NULL
,len
);
4122 if (len
&& fread(val
,len
,1,fp
) == 0) {
4126 return createObject(REDIS_STRING
,val
);
4129 static robj
*rdbLoadStringObject(FILE *fp
) {
4130 return rdbGenericLoadStringObject(fp
,0);
4133 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
4134 return rdbGenericLoadStringObject(fp
,1);
4137 /* For information about double serialization check rdbSaveDoubleValue() */
4138 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
4142 if (fread(&len
,1,1,fp
) == 0) return -1;
4144 case 255: *val
= R_NegInf
; return 0;
4145 case 254: *val
= R_PosInf
; return 0;
4146 case 253: *val
= R_Nan
; return 0;
4148 if (fread(buf
,len
,1,fp
) == 0) return -1;
4150 sscanf(buf
, "%lg", val
);
4155 /* Load a Redis object of the specified type from the specified file.
4156 * On success a newly allocated object is returned, otherwise NULL. */
4157 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4158 robj
*o
, *ele
, *dec
;
4161 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4162 if (type
== REDIS_STRING
) {
4163 /* Read string value */
4164 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4165 o
= tryObjectEncoding(o
);
4166 } else if (type
== REDIS_LIST
) {
4167 /* Read list value */
4168 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4170 /* Use a real list when there are too many entries */
4171 if (len
> server
.list_max_ziplist_entries
) {
4172 o
= createListObject();
4174 o
= createZiplistObject();
4177 /* Load every single element of the list */
4179 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4181 /* If we are using a ziplist and the value is too big, convert
4182 * the object to a real list. */
4183 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
&&
4184 ele
->encoding
== REDIS_ENCODING_RAW
&&
4185 sdslen(ele
->ptr
) > server
.list_max_ziplist_value
)
4186 listTypeConvert(o
,REDIS_ENCODING_LIST
);
4188 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4189 dec
= getDecodedObject(ele
);
4190 o
->ptr
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
);
4194 ele
= tryObjectEncoding(ele
);
4195 listAddNodeTail(o
->ptr
,ele
);
4198 } else if (type
== REDIS_SET
) {
4199 /* Read list/set value */
4200 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4201 o
= createSetObject();
4202 /* It's faster to expand the dict to the right size asap in order
4203 * to avoid rehashing */
4204 if (len
> DICT_HT_INITIAL_SIZE
)
4205 dictExpand(o
->ptr
,len
);
4206 /* Load every single element of the list/set */
4208 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4209 ele
= tryObjectEncoding(ele
);
4210 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4212 } else if (type
== REDIS_ZSET
) {
4213 /* Read list/set value */
4217 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4218 o
= createZsetObject();
4220 /* Load every single element of the list/set */
4223 double *score
= zmalloc(sizeof(double));
4225 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4226 ele
= tryObjectEncoding(ele
);
4227 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4228 dictAdd(zs
->dict
,ele
,score
);
4229 zslInsert(zs
->zsl
,*score
,ele
);
4230 incrRefCount(ele
); /* added to skiplist */
4232 } else if (type
== REDIS_HASH
) {
4235 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4236 o
= createHashObject();
4237 /* Too many entries? Use an hash table. */
4238 if (hashlen
> server
.hash_max_zipmap_entries
)
4239 convertToRealHash(o
);
4240 /* Load every key/value, then set it into the zipmap or hash
4241 * table, as needed. */
4245 if ((key
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4246 if ((val
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4247 /* If we are using a zipmap and there are too big values
4248 * the object is converted to real hash table encoding. */
4249 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4250 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4251 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4253 convertToRealHash(o
);
4256 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4257 unsigned char *zm
= o
->ptr
;
4259 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4260 val
->ptr
,sdslen(val
->ptr
),NULL
);
4265 key
= tryObjectEncoding(key
);
4266 val
= tryObjectEncoding(val
);
4267 dictAdd((dict
*)o
->ptr
,key
,val
);
4271 redisPanic("Unknown object type");
4276 static int rdbLoad(char *filename
) {
4279 int type
, retval
, rdbver
;
4280 int swap_all_values
= 0;
4281 redisDb
*db
= server
.db
+0;
4283 time_t expiretime
, now
= time(NULL
);
4285 fp
= fopen(filename
,"r");
4286 if (!fp
) return REDIS_ERR
;
4287 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4289 if (memcmp(buf
,"REDIS",5) != 0) {
4291 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4294 rdbver
= atoi(buf
+5);
4297 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4306 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4307 if (type
== REDIS_EXPIRETIME
) {
4308 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4309 /* We read the time so we need to read the object type again */
4310 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4312 if (type
== REDIS_EOF
) break;
4313 /* Handle SELECT DB opcode as a special case */
4314 if (type
== REDIS_SELECTDB
) {
4315 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4317 if (dbid
>= (unsigned)server
.dbnum
) {
4318 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4321 db
= server
.db
+dbid
;
4325 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4327 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4328 /* Check if the key already expired */
4329 if (expiretime
!= -1 && expiretime
< now
) {
4334 /* Add the new object in the hash table */
4335 retval
= dbAdd(db
,key
,val
);
4336 if (retval
== REDIS_ERR
) {
4337 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4340 /* Set the expire time if needed */
4341 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4343 /* Handle swapping while loading big datasets when VM is on */
4345 /* If we detecter we are hopeless about fitting something in memory
4346 * we just swap every new key on disk. Directly...
4347 * Note that's important to check for this condition before resorting
4348 * to random sampling, otherwise we may try to swap already
4350 if (swap_all_values
) {
4351 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
4353 /* de may be NULL since the key already expired */
4356 val
= dictGetEntryVal(de
);
4358 if (val
->refcount
== 1 &&
4359 (vp
= vmSwapObjectBlocking(val
)) != NULL
)
4360 dictGetEntryVal(de
) = vp
;
4367 /* Flush data on disk once 32 MB of additional RAM are used... */
4369 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
4372 /* If we have still some hope of having some value fitting memory
4373 * then we try random sampling. */
4374 if (!swap_all_values
&& server
.vm_enabled
&& force_swapout
) {
4375 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4376 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4378 if (zmalloc_used_memory() > server
.vm_max_memory
)
4379 swap_all_values
= 1; /* We are already using too much mem */
4385 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4386 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4388 return REDIS_ERR
; /* Just to avoid warning */
4391 /*================================== Shutdown =============================== */
4392 static int prepareForShutdown() {
4393 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4394 /* Kill the saving child if there is a background saving in progress.
4395 We want to avoid race conditions, for instance our saving child may
4396 overwrite the synchronous saving did by SHUTDOWN. */
4397 if (server
.bgsavechildpid
!= -1) {
4398 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4399 kill(server
.bgsavechildpid
,SIGKILL
);
4400 rdbRemoveTempFile(server
.bgsavechildpid
);
4402 if (server
.appendonly
) {
4403 /* Append only file: fsync() the AOF and exit */
4404 aof_fsync(server
.appendfd
);
4405 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4407 /* Snapshotting. Perform a SYNC SAVE and exit */
4408 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4409 if (server
.daemonize
)
4410 unlink(server
.pidfile
);
4411 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4413 /* Ooops.. error saving! The best we can do is to continue
4414 * operating. Note that if there was a background saving process,
4415 * in the next cron() Redis will be notified that the background
4416 * saving aborted, handling special stuff like slaves pending for
4417 * synchronization... */
4418 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4422 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4426 /*================================== Commands =============================== */
4428 static void authCommand(redisClient
*c
) {
4429 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4430 c
->authenticated
= 1;
4431 addReply(c
,shared
.ok
);
4433 c
->authenticated
= 0;
4434 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4438 static void pingCommand(redisClient
*c
) {
4439 addReply(c
,shared
.pong
);
4442 static void echoCommand(redisClient
*c
) {
4443 addReplyBulk(c
,c
->argv
[1]);
4446 /*=================================== Strings =============================== */
4448 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4450 long seconds
= 0; /* initialized to avoid an harmness warning */
4453 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4456 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4461 touchWatchedKey(c
->db
,key
);
4462 if (nx
) deleteIfVolatile(c
->db
,key
);
4463 retval
= dbAdd(c
->db
,key
,val
);
4464 if (retval
== REDIS_ERR
) {
4466 dbReplace(c
->db
,key
,val
);
4469 addReply(c
,shared
.czero
);
4476 removeExpire(c
->db
,key
);
4477 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4478 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4481 static void setCommand(redisClient
*c
) {
4482 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4485 static void setnxCommand(redisClient
*c
) {
4486 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4489 static void setexCommand(redisClient
*c
) {
4490 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4493 static int getGenericCommand(redisClient
*c
) {
4496 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4499 if (o
->type
!= REDIS_STRING
) {
4500 addReply(c
,shared
.wrongtypeerr
);
4508 static void getCommand(redisClient
*c
) {
4509 getGenericCommand(c
);
4512 static void getsetCommand(redisClient
*c
) {
4513 if (getGenericCommand(c
) == REDIS_ERR
) return;
4514 dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]);
4515 incrRefCount(c
->argv
[2]);
4517 removeExpire(c
->db
,c
->argv
[1]);
4520 static void mgetCommand(redisClient
*c
) {
4523 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4524 for (j
= 1; j
< c
->argc
; j
++) {
4525 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4527 addReply(c
,shared
.nullbulk
);
4529 if (o
->type
!= REDIS_STRING
) {
4530 addReply(c
,shared
.nullbulk
);
4538 static void msetGenericCommand(redisClient
*c
, int nx
) {
4539 int j
, busykeys
= 0;
4541 if ((c
->argc
% 2) == 0) {
4542 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4545 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4546 * set nothing at all if at least one already key exists. */
4548 for (j
= 1; j
< c
->argc
; j
+= 2) {
4549 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4555 addReply(c
, shared
.czero
);
4559 for (j
= 1; j
< c
->argc
; j
+= 2) {
4560 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4561 dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]);
4562 incrRefCount(c
->argv
[j
+1]);
4563 removeExpire(c
->db
,c
->argv
[j
]);
4565 server
.dirty
+= (c
->argc
-1)/2;
4566 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4569 static void msetCommand(redisClient
*c
) {
4570 msetGenericCommand(c
,0);
4573 static void msetnxCommand(redisClient
*c
) {
4574 msetGenericCommand(c
,1);
4577 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4581 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4582 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4583 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4586 o
= createStringObjectFromLongLong(value
);
4587 dbReplace(c
->db
,c
->argv
[1],o
);
4589 addReply(c
,shared
.colon
);
4591 addReply(c
,shared
.crlf
);
4594 static void incrCommand(redisClient
*c
) {
4595 incrDecrCommand(c
,1);
4598 static void decrCommand(redisClient
*c
) {
4599 incrDecrCommand(c
,-1);
4602 static void incrbyCommand(redisClient
*c
) {
4605 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4606 incrDecrCommand(c
,incr
);
4609 static void decrbyCommand(redisClient
*c
) {
4612 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4613 incrDecrCommand(c
,-incr
);
4616 static void appendCommand(redisClient
*c
) {
4621 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4623 /* Create the key */
4624 retval
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]);
4625 incrRefCount(c
->argv
[2]);
4626 totlen
= stringObjectLen(c
->argv
[2]);
4628 if (o
->type
!= REDIS_STRING
) {
4629 addReply(c
,shared
.wrongtypeerr
);
4632 /* If the object is specially encoded or shared we have to make
4634 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4635 robj
*decoded
= getDecodedObject(o
);
4637 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4638 decrRefCount(decoded
);
4639 dbReplace(c
->db
,c
->argv
[1],o
);
4642 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4643 o
->ptr
= sdscatlen(o
->ptr
,
4644 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4646 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4647 (unsigned long) c
->argv
[2]->ptr
);
4649 totlen
= sdslen(o
->ptr
);
4652 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4655 static void substrCommand(redisClient
*c
) {
4657 long start
= atoi(c
->argv
[2]->ptr
);
4658 long end
= atoi(c
->argv
[3]->ptr
);
4659 size_t rangelen
, strlen
;
4662 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4663 checkType(c
,o
,REDIS_STRING
)) return;
4665 o
= getDecodedObject(o
);
4666 strlen
= sdslen(o
->ptr
);
4668 /* convert negative indexes */
4669 if (start
< 0) start
= strlen
+start
;
4670 if (end
< 0) end
= strlen
+end
;
4671 if (start
< 0) start
= 0;
4672 if (end
< 0) end
= 0;
4674 /* indexes sanity checks */
4675 if (start
> end
|| (size_t)start
>= strlen
) {
4676 /* Out of range start or start > end result in null reply */
4677 addReply(c
,shared
.nullbulk
);
4681 if ((size_t)end
>= strlen
) end
= strlen
-1;
4682 rangelen
= (end
-start
)+1;
4684 /* Return the result */
4685 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4686 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4687 addReplySds(c
,range
);
4688 addReply(c
,shared
.crlf
);
4692 /* ========================= Type agnostic commands ========================= */
4694 static void delCommand(redisClient
*c
) {
4697 for (j
= 1; j
< c
->argc
; j
++) {
4698 if (dbDelete(c
->db
,c
->argv
[j
])) {
4699 touchWatchedKey(c
->db
,c
->argv
[j
]);
4704 addReplyLongLong(c
,deleted
);
4707 static void existsCommand(redisClient
*c
) {
4708 expireIfNeeded(c
->db
,c
->argv
[1]);
4709 if (dbExists(c
->db
,c
->argv
[1])) {
4710 addReply(c
, shared
.cone
);
4712 addReply(c
, shared
.czero
);
4716 static void selectCommand(redisClient
*c
) {
4717 int id
= atoi(c
->argv
[1]->ptr
);
4719 if (selectDb(c
,id
) == REDIS_ERR
) {
4720 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4722 addReply(c
,shared
.ok
);
4726 static void randomkeyCommand(redisClient
*c
) {
4729 if ((key
= dbRandomKey(c
->db
)) == NULL
) {
4730 addReply(c
,shared
.nullbulk
);
4734 addReplyBulk(c
,key
);
4738 static void keysCommand(redisClient
*c
) {
4741 sds pattern
= c
->argv
[1]->ptr
;
4742 int plen
= sdslen(pattern
);
4743 unsigned long numkeys
= 0;
4744 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4746 di
= dictGetIterator(c
->db
->dict
);
4748 decrRefCount(lenobj
);
4749 while((de
= dictNext(di
)) != NULL
) {
4750 sds key
= dictGetEntryKey(de
);
4753 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4754 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4755 keyobj
= createStringObject(key
,sdslen(key
));
4756 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4757 addReplyBulk(c
,keyobj
);
4760 decrRefCount(keyobj
);
4763 dictReleaseIterator(di
);
4764 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4767 static void dbsizeCommand(redisClient
*c
) {
4769 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4772 static void lastsaveCommand(redisClient
*c
) {
4774 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4777 static void typeCommand(redisClient
*c
) {
4781 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4786 case REDIS_STRING
: type
= "+string"; break;
4787 case REDIS_LIST
: type
= "+list"; break;
4788 case REDIS_SET
: type
= "+set"; break;
4789 case REDIS_ZSET
: type
= "+zset"; break;
4790 case REDIS_HASH
: type
= "+hash"; break;
4791 default: type
= "+unknown"; break;
4794 addReplySds(c
,sdsnew(type
));
4795 addReply(c
,shared
.crlf
);
4798 static void saveCommand(redisClient
*c
) {
4799 if (server
.bgsavechildpid
!= -1) {
4800 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4803 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4804 addReply(c
,shared
.ok
);
4806 addReply(c
,shared
.err
);
4810 static void bgsaveCommand(redisClient
*c
) {
4811 if (server
.bgsavechildpid
!= -1) {
4812 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4815 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4816 char *status
= "+Background saving started\r\n";
4817 addReplySds(c
,sdsnew(status
));
4819 addReply(c
,shared
.err
);
4823 static void shutdownCommand(redisClient
*c
) {
4824 if (prepareForShutdown() == REDIS_OK
)
4826 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4829 static void renameGenericCommand(redisClient
*c
, int nx
) {
4832 /* To use the same key as src and dst is probably an error */
4833 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4834 addReply(c
,shared
.sameobjecterr
);
4838 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4842 deleteIfVolatile(c
->db
,c
->argv
[2]);
4843 if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) {
4846 addReply(c
,shared
.czero
);
4849 dbReplace(c
->db
,c
->argv
[2],o
);
4851 dbDelete(c
->db
,c
->argv
[1]);
4852 touchWatchedKey(c
->db
,c
->argv
[2]);
4854 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4857 static void renameCommand(redisClient
*c
) {
4858 renameGenericCommand(c
,0);
4861 static void renamenxCommand(redisClient
*c
) {
4862 renameGenericCommand(c
,1);
4865 static void moveCommand(redisClient
*c
) {
4870 /* Obtain source and target DB pointers */
4873 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4874 addReply(c
,shared
.outofrangeerr
);
4878 selectDb(c
,srcid
); /* Back to the source DB */
4880 /* If the user is moving using as target the same
4881 * DB as the source DB it is probably an error. */
4883 addReply(c
,shared
.sameobjecterr
);
4887 /* Check if the element exists and get a reference */
4888 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4890 addReply(c
,shared
.czero
);
4894 /* Try to add the element to the target DB */
4895 deleteIfVolatile(dst
,c
->argv
[1]);
4896 if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) {
4897 addReply(c
,shared
.czero
);
4902 /* OK! key moved, free the entry in the source DB */
4903 dbDelete(src
,c
->argv
[1]);
4905 addReply(c
,shared
.cone
);
4908 /* =================================== Lists ================================ */
4911 /* Check the argument length to see if it requires us to convert the ziplist
4912 * to a real list. Only check raw-encoded objects because integer encoded
4913 * objects are never too long. */
4914 static void listTypeTryConversion(robj
*subject
, robj
*value
) {
4915 if (subject
->encoding
!= REDIS_ENCODING_ZIPLIST
) return;
4916 if (value
->encoding
== REDIS_ENCODING_RAW
&&
4917 sdslen(value
->ptr
) > server
.list_max_ziplist_value
)
4918 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
4921 static void listTypePush(robj
*subject
, robj
*value
, int where
) {
4922 /* Check if we need to convert the ziplist */
4923 listTypeTryConversion(subject
,value
);
4924 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
&&
4925 ziplistLen(subject
->ptr
) > server
.list_max_ziplist_entries
)
4926 listTypeConvert(subject
,REDIS_ENCODING_LIST
);
4928 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4929 int pos
= (where
== REDIS_HEAD
) ? ZIPLIST_HEAD
: ZIPLIST_TAIL
;
4930 value
= getDecodedObject(value
);
4931 subject
->ptr
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
);
4932 decrRefCount(value
);
4933 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4934 if (where
== REDIS_HEAD
) {
4935 listAddNodeHead(subject
->ptr
,value
);
4937 listAddNodeTail(subject
->ptr
,value
);
4939 incrRefCount(value
);
4941 redisPanic("Unknown list encoding");
4945 static robj
*listTypePop(robj
*subject
, int where
) {
4947 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4949 unsigned char *vstr
;
4952 int pos
= (where
== REDIS_HEAD
) ? 0 : -1;
4953 p
= ziplistIndex(subject
->ptr
,pos
);
4954 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
4956 value
= createStringObject((char*)vstr
,vlen
);
4958 value
= createStringObjectFromLongLong(vlong
);
4960 /* We only need to delete an element when it exists */
4961 subject
->ptr
= ziplistDelete(subject
->ptr
,&p
);
4963 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4964 list
*list
= subject
->ptr
;
4966 if (where
== REDIS_HEAD
) {
4967 ln
= listFirst(list
);
4969 ln
= listLast(list
);
4972 value
= listNodeValue(ln
);
4973 incrRefCount(value
);
4974 listDelNode(list
,ln
);
4977 redisPanic("Unknown list encoding");
4982 static unsigned long listTypeLength(robj
*subject
) {
4983 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4984 return ziplistLen(subject
->ptr
);
4985 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4986 return listLength((list
*)subject
->ptr
);
4988 redisPanic("Unknown list encoding");
4992 /* Structure to hold set iteration abstraction. */
4995 unsigned char encoding
;
4996 unsigned char direction
; /* Iteration direction */
5001 /* Structure for an entry while iterating over a list. */
5003 listTypeIterator
*li
;
5004 unsigned char *zi
; /* Entry in ziplist */
5005 listNode
*ln
; /* Entry in linked list */
5008 /* Initialize an iterator at the specified index. */
5009 static listTypeIterator
*listTypeInitIterator(robj
*subject
, int index
, unsigned char direction
) {
5010 listTypeIterator
*li
= zmalloc(sizeof(listTypeIterator
));
5011 li
->subject
= subject
;
5012 li
->encoding
= subject
->encoding
;
5013 li
->direction
= direction
;
5014 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5015 li
->zi
= ziplistIndex(subject
->ptr
,index
);
5016 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5017 li
->ln
= listIndex(subject
->ptr
,index
);
5019 redisPanic("Unknown list encoding");
5024 /* Clean up the iterator. */
5025 static void listTypeReleaseIterator(listTypeIterator
*li
) {
5029 /* Stores pointer to current the entry in the provided entry structure
5030 * and advances the position of the iterator. Returns 1 when the current
5031 * entry is in fact an entry, 0 otherwise. */
5032 static int listTypeNext(listTypeIterator
*li
, listTypeEntry
*entry
) {
5033 /* Protect from converting when iterating */
5034 redisAssert(li
->subject
->encoding
== li
->encoding
);
5037 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5039 if (entry
->zi
!= NULL
) {
5040 if (li
->direction
== REDIS_TAIL
)
5041 li
->zi
= ziplistNext(li
->subject
->ptr
,li
->zi
);
5043 li
->zi
= ziplistPrev(li
->subject
->ptr
,li
->zi
);
5046 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5048 if (entry
->ln
!= NULL
) {
5049 if (li
->direction
== REDIS_TAIL
)
5050 li
->ln
= li
->ln
->next
;
5052 li
->ln
= li
->ln
->prev
;
5056 redisPanic("Unknown list encoding");
5061 /* Return entry or NULL at the current position of the iterator. */
5062 static robj
*listTypeGet(listTypeEntry
*entry
) {
5063 listTypeIterator
*li
= entry
->li
;
5065 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5066 unsigned char *vstr
;
5069 redisAssert(entry
->zi
!= NULL
);
5070 if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) {
5072 value
= createStringObject((char*)vstr
,vlen
);
5074 value
= createStringObjectFromLongLong(vlong
);
5077 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5078 redisAssert(entry
->ln
!= NULL
);
5079 value
= listNodeValue(entry
->ln
);
5080 incrRefCount(value
);
5082 redisPanic("Unknown list encoding");
5087 /* Compare the given object with the entry at the current position. */
5088 static int listTypeEqual(listTypeEntry
*entry
, robj
*o
) {
5089 listTypeIterator
*li
= entry
->li
;
5090 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5091 redisAssert(o
->encoding
== REDIS_ENCODING_RAW
);
5092 return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
));
5093 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5094 return equalStringObjects(o
,listNodeValue(entry
->ln
));
5096 redisPanic("Unknown list encoding");
5100 /* Delete the element pointed to. */
5101 static void listTypeDelete(listTypeEntry
*entry
) {
5102 listTypeIterator
*li
= entry
->li
;
5103 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5104 unsigned char *p
= entry
->zi
;
5105 li
->subject
->ptr
= ziplistDelete(li
->subject
->ptr
,&p
);
5107 /* Update position of the iterator depending on the direction */
5108 if (li
->direction
== REDIS_TAIL
)
5111 li
->zi
= ziplistPrev(li
->subject
->ptr
,p
);
5112 } else if (entry
->li
->encoding
== REDIS_ENCODING_LIST
) {
5114 if (li
->direction
== REDIS_TAIL
)
5115 next
= entry
->ln
->next
;
5117 next
= entry
->ln
->prev
;
5118 listDelNode(li
->subject
->ptr
,entry
->ln
);
5121 redisPanic("Unknown list encoding");
5125 static void listTypeConvert(robj
*subject
, int enc
) {
5126 listTypeIterator
*li
;
5127 listTypeEntry entry
;
5128 redisAssert(subject
->type
== REDIS_LIST
);
5130 if (enc
== REDIS_ENCODING_LIST
) {
5131 list
*l
= listCreate();
5132 listSetFreeMethod(l
,decrRefCount
);
5134 /* listTypeGet returns a robj with incremented refcount */
5135 li
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5136 while (listTypeNext(li
,&entry
)) listAddNodeTail(l
,listTypeGet(&entry
));
5137 listTypeReleaseIterator(li
);
5139 subject
->encoding
= REDIS_ENCODING_LIST
;
5140 zfree(subject
->ptr
);
5143 redisPanic("Unsupported list conversion");
5147 static void pushGenericCommand(redisClient
*c
, int where
) {
5148 robj
*lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5150 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5151 addReply(c
,shared
.cone
);
5154 lobj
= createZiplistObject();
5155 dbAdd(c
->db
,c
->argv
[1],lobj
);
5157 if (lobj
->type
!= REDIS_LIST
) {
5158 addReply(c
,shared
.wrongtypeerr
);
5161 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5162 addReply(c
,shared
.cone
);
5166 listTypePush(lobj
,c
->argv
[2],where
);
5167 addReplyLongLong(c
,listTypeLength(lobj
));
5171 static void lpushCommand(redisClient
*c
) {
5172 pushGenericCommand(c
,REDIS_HEAD
);
5175 static void rpushCommand(redisClient
*c
) {
5176 pushGenericCommand(c
,REDIS_TAIL
);
5179 static void llenCommand(redisClient
*c
) {
5180 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
);
5181 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5182 addReplyUlong(c
,listTypeLength(o
));
5185 static void lindexCommand(redisClient
*c
) {
5186 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5187 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5188 int index
= atoi(c
->argv
[2]->ptr
);
5191 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5193 unsigned char *vstr
;
5196 p
= ziplistIndex(o
->ptr
,index
);
5197 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
5199 value
= createStringObject((char*)vstr
,vlen
);
5201 value
= createStringObjectFromLongLong(vlong
);
5203 addReplyBulk(c
,value
);
5204 decrRefCount(value
);
5206 addReply(c
,shared
.nullbulk
);
5208 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5209 listNode
*ln
= listIndex(o
->ptr
,index
);
5211 value
= listNodeValue(ln
);
5212 addReplyBulk(c
,value
);
5214 addReply(c
,shared
.nullbulk
);
5217 redisPanic("Unknown list encoding");
5221 static void lsetCommand(redisClient
*c
) {
5222 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
);
5223 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5224 int index
= atoi(c
->argv
[2]->ptr
);
5225 robj
*value
= c
->argv
[3];
5227 listTypeTryConversion(o
,value
);
5228 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5229 unsigned char *p
, *zl
= o
->ptr
;
5230 p
= ziplistIndex(zl
,index
);
5232 addReply(c
,shared
.outofrangeerr
);
5234 o
->ptr
= ziplistDelete(o
->ptr
,&p
);
5235 value
= getDecodedObject(value
);
5236 o
->ptr
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
));
5237 decrRefCount(value
);
5238 addReply(c
,shared
.ok
);
5241 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5242 listNode
*ln
= listIndex(o
->ptr
,index
);
5244 addReply(c
,shared
.outofrangeerr
);
5246 decrRefCount((robj
*)listNodeValue(ln
));
5247 listNodeValue(ln
) = value
;
5248 incrRefCount(value
);
5249 addReply(c
,shared
.ok
);
5253 redisPanic("Unknown list encoding");
5257 static void popGenericCommand(redisClient
*c
, int where
) {
5258 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5259 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5261 robj
*value
= listTypePop(o
,where
);
5262 if (value
== NULL
) {
5263 addReply(c
,shared
.nullbulk
);
5265 addReplyBulk(c
,value
);
5266 decrRefCount(value
);
5267 if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5272 static void lpopCommand(redisClient
*c
) {
5273 popGenericCommand(c
,REDIS_HEAD
);
5276 static void rpopCommand(redisClient
*c
) {
5277 popGenericCommand(c
,REDIS_TAIL
);
5280 static void lrangeCommand(redisClient
*c
) {
5282 int start
= atoi(c
->argv
[2]->ptr
);
5283 int end
= atoi(c
->argv
[3]->ptr
);
5286 listTypeEntry entry
;
5288 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5289 || checkType(c
,o
,REDIS_LIST
)) return;
5290 llen
= listTypeLength(o
);
5292 /* convert negative indexes */
5293 if (start
< 0) start
= llen
+start
;
5294 if (end
< 0) end
= llen
+end
;
5295 if (start
< 0) start
= 0;
5296 if (end
< 0) end
= 0;
5298 /* indexes sanity checks */
5299 if (start
> end
|| start
>= llen
) {
5300 /* Out of range start or start > end result in empty list */
5301 addReply(c
,shared
.emptymultibulk
);
5304 if (end
>= llen
) end
= llen
-1;
5305 rangelen
= (end
-start
)+1;
5307 /* Return the result in form of a multi-bulk reply */
5308 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
5309 listTypeIterator
*li
= listTypeInitIterator(o
,start
,REDIS_TAIL
);
5310 for (j
= 0; j
< rangelen
; j
++) {
5311 redisAssert(listTypeNext(li
,&entry
));
5312 value
= listTypeGet(&entry
);
5313 addReplyBulk(c
,value
);
5314 decrRefCount(value
);
5316 listTypeReleaseIterator(li
);
5319 static void ltrimCommand(redisClient
*c
) {
5321 int start
= atoi(c
->argv
[2]->ptr
);
5322 int end
= atoi(c
->argv
[3]->ptr
);
5324 int j
, ltrim
, rtrim
;
5328 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
5329 checkType(c
,o
,REDIS_LIST
)) return;
5330 llen
= listTypeLength(o
);
5332 /* convert negative indexes */
5333 if (start
< 0) start
= llen
+start
;
5334 if (end
< 0) end
= llen
+end
;
5335 if (start
< 0) start
= 0;
5336 if (end
< 0) end
= 0;
5338 /* indexes sanity checks */
5339 if (start
> end
|| start
>= llen
) {
5340 /* Out of range start or start > end result in empty list */
5344 if (end
>= llen
) end
= llen
-1;
5349 /* Remove list elements to perform the trim */
5350 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5351 o
->ptr
= ziplistDeleteRange(o
->ptr
,0,ltrim
);
5352 o
->ptr
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
);
5353 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5355 for (j
= 0; j
< ltrim
; j
++) {
5356 ln
= listFirst(list
);
5357 listDelNode(list
,ln
);
5359 for (j
= 0; j
< rtrim
; j
++) {
5360 ln
= listLast(list
);
5361 listDelNode(list
,ln
);
5364 redisPanic("Unknown list encoding");
5366 if (listTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5368 addReply(c
,shared
.ok
);
5371 static void lremCommand(redisClient
*c
) {
5372 robj
*subject
, *obj
= c
->argv
[3];
5373 int toremove
= atoi(c
->argv
[2]->ptr
);
5375 listTypeEntry entry
;
5377 subject
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
);
5378 if (subject
== NULL
|| checkType(c
,subject
,REDIS_LIST
)) return;
5380 /* Make sure obj is raw when we're dealing with a ziplist */
5381 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5382 obj
= getDecodedObject(obj
);
5384 listTypeIterator
*li
;
5386 toremove
= -toremove
;
5387 li
= listTypeInitIterator(subject
,-1,REDIS_HEAD
);
5389 li
= listTypeInitIterator(subject
,0,REDIS_TAIL
);
5392 while (listTypeNext(li
,&entry
)) {
5393 if (listTypeEqual(&entry
,obj
)) {
5394 listTypeDelete(&entry
);
5397 if (toremove
&& removed
== toremove
) break;
5400 listTypeReleaseIterator(li
);
5402 /* Clean up raw encoded object */
5403 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5406 if (listTypeLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5407 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5410 /* This is the semantic of this command:
5411 * RPOPLPUSH srclist dstlist:
5412 * IF LLEN(srclist) > 0
5413 * element = RPOP srclist
5414 * LPUSH dstlist element
5421 * The idea is to be able to get an element from a list in a reliable way
5422 * since the element is not just returned but pushed against another list
5423 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5425 static void rpoplpushcommand(redisClient
*c
) {
5427 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5428 checkType(c
,sobj
,REDIS_LIST
)) return;
5430 if (listTypeLength(sobj
) == 0) {
5431 addReply(c
,shared
.nullbulk
);
5433 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5434 if (dobj
&& checkType(c
,dobj
,REDIS_LIST
)) return;
5435 value
= listTypePop(sobj
,REDIS_TAIL
);
5437 /* Add the element to the target list (unless it's directly
5438 * passed to some BLPOP-ing client */
5439 if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) {
5440 /* Create the list if the key does not exist */
5442 dobj
= createZiplistObject();
5443 dbAdd(c
->db
,c
->argv
[2],dobj
);
5445 listTypePush(dobj
,value
,REDIS_HEAD
);
5448 /* Send the element to the client as reply as well */
5449 addReplyBulk(c
,value
);
5451 /* listTypePop returns an object with its refcount incremented */
5452 decrRefCount(value
);
5454 /* Delete the source list when it is empty */
5455 if (listTypeLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5460 /* ==================================== Sets ================================ */
5462 static int setTypeAdd(robj
*subject
, robj
*value
) {
5463 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5464 if (dictAdd(subject
->ptr
,value
,NULL
) == DICT_OK
) {
5465 incrRefCount(value
);
5469 redisPanic("Unknown set encoding");
5474 static int setTypeRemove(robj
*subject
, robj
*value
) {
5475 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5476 if (dictDelete(subject
->ptr
,value
) == DICT_OK
) {
5477 if (htNeedsResize(subject
->ptr
)) dictResize(subject
->ptr
);
5481 redisPanic("Unknown set encoding");
5486 static int setTypeIsMember(robj
*subject
, robj
*value
) {
5487 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5488 return dictFind((dict
*)subject
->ptr
,value
) != NULL
;
5490 redisPanic("Unknown set encoding");
5494 /* Structure to hold set iteration abstraction. */
5500 static setIterator
*setTypeInitIterator(robj
*subject
) {
5501 setIterator
*si
= zmalloc(sizeof(setIterator
));
5502 si
->encoding
= subject
->encoding
;
5503 if (si
->encoding
== REDIS_ENCODING_HT
) {
5504 si
->di
= dictGetIterator(subject
->ptr
);
5506 redisPanic("Unknown set encoding");
5511 static void setTypeReleaseIterator(setIterator
*si
) {
5512 if (si
->encoding
== REDIS_ENCODING_HT
)
5513 dictReleaseIterator(si
->di
);
5517 /* Move to the next entry in the set. Returns the object at the current
5518 * position, or NULL when the end is reached. This object will have its
5519 * refcount incremented, so the caller needs to take care of this. */
5520 static robj
*setTypeNext(setIterator
*si
) {
5522 if (si
->encoding
== REDIS_ENCODING_HT
) {
5523 dictEntry
*de
= dictNext(si
->di
);
5525 ret
= dictGetEntryKey(de
);
5533 /* Return random element from set. The returned object will always have
5534 * an incremented refcount. */
5535 robj
*setTypeRandomElement(robj
*subject
) {
5537 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5538 dictEntry
*de
= dictGetRandomKey(subject
->ptr
);
5539 ret
= dictGetEntryKey(de
);
5542 redisPanic("Unknown set encoding");
5547 static unsigned long setTypeSize(robj
*subject
) {
5548 if (subject
->encoding
== REDIS_ENCODING_HT
) {
5549 return dictSize((dict
*)subject
->ptr
);
5551 redisPanic("Unknown set encoding");
5555 static void saddCommand(redisClient
*c
) {
5558 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5560 set
= createSetObject();
5561 dbAdd(c
->db
,c
->argv
[1],set
);
5563 if (set
->type
!= REDIS_SET
) {
5564 addReply(c
,shared
.wrongtypeerr
);
5568 if (setTypeAdd(set
,c
->argv
[2])) {
5570 addReply(c
,shared
.cone
);
5572 addReply(c
,shared
.czero
);
5576 static void sremCommand(redisClient
*c
) {
5579 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5580 checkType(c
,set
,REDIS_SET
)) return;
5582 if (setTypeRemove(set
,c
->argv
[2])) {
5583 if (setTypeSize(set
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5585 addReply(c
,shared
.cone
);
5587 addReply(c
,shared
.czero
);
5591 static void smoveCommand(redisClient
*c
) {
5592 robj
*srcset
, *dstset
;
5594 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5595 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5597 /* If the source key does not exist return 0, if it's of the wrong type
5599 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5600 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5603 /* Error if the destination key is not a set as well */
5604 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5605 addReply(c
,shared
.wrongtypeerr
);
5608 /* Remove the element from the source set */
5609 if (!setTypeRemove(srcset
,c
->argv
[3])) {
5610 /* Key not found in the src set! return zero */
5611 addReply(c
,shared
.czero
);
5614 if (setTypeSize(srcset
) == 0 && srcset
!= dstset
)
5615 dbDelete(c
->db
,c
->argv
[1]);
5617 /* Add the element to the destination set */
5619 dstset
= createSetObject();
5620 dbAdd(c
->db
,c
->argv
[2],dstset
);
5622 setTypeAdd(dstset
,c
->argv
[3]);
5623 addReply(c
,shared
.cone
);
5626 static void sismemberCommand(redisClient
*c
) {
5629 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5630 checkType(c
,set
,REDIS_SET
)) return;
5632 if (setTypeIsMember(set
,c
->argv
[2]))
5633 addReply(c
,shared
.cone
);
5635 addReply(c
,shared
.czero
);
5638 static void scardCommand(redisClient
*c
) {
5641 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5642 checkType(c
,o
,REDIS_SET
)) return;
5644 addReplyUlong(c
,setTypeSize(o
));
5647 static void spopCommand(redisClient
*c
) {
5650 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5651 checkType(c
,set
,REDIS_SET
)) return;
5653 ele
= setTypeRandomElement(set
);
5655 addReply(c
,shared
.nullbulk
);
5657 setTypeRemove(set
,ele
);
5658 addReplyBulk(c
,ele
);
5660 if (setTypeSize(set
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5665 static void srandmemberCommand(redisClient
*c
) {
5668 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5669 checkType(c
,set
,REDIS_SET
)) return;
5671 ele
= setTypeRandomElement(set
);
5673 addReply(c
,shared
.nullbulk
);
5675 addReplyBulk(c
,ele
);
5680 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5681 return setTypeSize(*(robj
**)s1
)-setTypeSize(*(robj
**)s2
);
5684 static void sinterGenericCommand(redisClient
*c
, robj
**setkeys
, unsigned long setnum
, robj
*dstkey
) {
5685 robj
**sets
= zmalloc(sizeof(robj
*)*setnum
);
5687 robj
*ele
, *lenobj
= NULL
, *dstset
= NULL
;
5688 unsigned long j
, cardinality
= 0;
5690 for (j
= 0; j
< setnum
; j
++) {
5691 robj
*setobj
= dstkey
?
5692 lookupKeyWrite(c
->db
,setkeys
[j
]) :
5693 lookupKeyRead(c
->db
,setkeys
[j
]);
5697 if (dbDelete(c
->db
,dstkey
))
5699 addReply(c
,shared
.czero
);
5701 addReply(c
,shared
.emptymultibulk
);
5705 if (checkType(c
,setobj
,REDIS_SET
)) {
5711 /* Sort sets from the smallest to largest, this will improve our
5712 * algorithm's performace */
5713 qsort(sets
,setnum
,sizeof(robj
*),qsortCompareSetsByCardinality
);
5715 /* The first thing we should output is the total number of elements...
5716 * since this is a multi-bulk write, but at this stage we don't know
5717 * the intersection set size, so we use a trick, append an empty object
5718 * to the output list and save the pointer to later modify it with the
5721 lenobj
= createObject(REDIS_STRING
,NULL
);
5723 decrRefCount(lenobj
);
5725 /* If we have a target key where to store the resulting set
5726 * create this key with an empty set inside */
5727 dstset
= createSetObject();
5730 /* Iterate all the elements of the first (smallest) set, and test
5731 * the element against all the other sets, if at least one set does
5732 * not include the element it is discarded */
5733 si
= setTypeInitIterator(sets
[0]);
5734 while((ele
= setTypeNext(si
)) != NULL
) {
5735 for (j
= 1; j
< setnum
; j
++)
5736 if (!setTypeIsMember(sets
[j
],ele
)) break;
5738 /* Only take action when all sets contain the member */
5741 addReplyBulk(c
,ele
);
5744 setTypeAdd(dstset
,ele
);
5749 setTypeReleaseIterator(si
);
5752 /* Store the resulting set into the target, if the intersection
5753 * is not an empty set. */
5754 dbDelete(c
->db
,dstkey
);
5755 if (setTypeSize(dstset
) > 0) {
5756 dbAdd(c
->db
,dstkey
,dstset
);
5757 addReplyLongLong(c
,setTypeSize(dstset
));
5759 decrRefCount(dstset
);
5760 addReply(c
,shared
.czero
);
5764 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5769 static void sinterCommand(redisClient
*c
) {
5770 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5773 static void sinterstoreCommand(redisClient
*c
) {
5774 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5777 #define REDIS_OP_UNION 0
5778 #define REDIS_OP_DIFF 1
5779 #define REDIS_OP_INTER 2
5781 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setkeys
, int setnum
, robj
*dstkey
, int op
) {
5782 robj
**sets
= zmalloc(sizeof(robj
*)*setnum
);
5784 robj
*ele
, *dstset
= NULL
;
5785 int j
, cardinality
= 0;
5787 for (j
= 0; j
< setnum
; j
++) {
5788 robj
*setobj
= dstkey
?
5789 lookupKeyWrite(c
->db
,setkeys
[j
]) :
5790 lookupKeyRead(c
->db
,setkeys
[j
]);
5795 if (checkType(c
,setobj
,REDIS_SET
)) {
5802 /* We need a temp set object to store our union. If the dstkey
5803 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5804 * this set object will be the resulting object to set into the target key*/
5805 dstset
= createSetObject();
5807 /* Iterate all the elements of all the sets, add every element a single
5808 * time to the result set */
5809 for (j
= 0; j
< setnum
; j
++) {
5810 if (op
== REDIS_OP_DIFF
&& j
== 0 && !sets
[j
]) break; /* result set is empty */
5811 if (!sets
[j
]) continue; /* non existing keys are like empty sets */
5813 si
= setTypeInitIterator(sets
[j
]);
5814 while((ele
= setTypeNext(si
)) != NULL
) {
5815 if (op
== REDIS_OP_UNION
|| j
== 0) {
5816 if (setTypeAdd(dstset
,ele
)) {
5819 } else if (op
== REDIS_OP_DIFF
) {
5820 if (setTypeRemove(dstset
,ele
)) {
5826 setTypeReleaseIterator(si
);
5828 /* Exit when result set is empty. */
5829 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5832 /* Output the content of the resulting set, if not in STORE mode */
5834 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5835 si
= setTypeInitIterator(dstset
);
5836 while((ele
= setTypeNext(si
)) != NULL
) {
5837 addReplyBulk(c
,ele
);
5840 setTypeReleaseIterator(si
);
5841 decrRefCount(dstset
);
5843 /* If we have a target key where to store the resulting set
5844 * create this key with the result set inside */
5845 dbDelete(c
->db
,dstkey
);
5846 if (setTypeSize(dstset
) > 0) {
5847 dbAdd(c
->db
,dstkey
,dstset
);
5848 addReplyLongLong(c
,setTypeSize(dstset
));
5850 decrRefCount(dstset
);
5851 addReply(c
,shared
.czero
);
5858 static void sunionCommand(redisClient
*c
) {
5859 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5862 static void sunionstoreCommand(redisClient
*c
) {
5863 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5866 static void sdiffCommand(redisClient
*c
) {
5867 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5870 static void sdiffstoreCommand(redisClient
*c
) {
5871 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5874 /* ==================================== ZSets =============================== */
5876 /* ZSETs are ordered sets using two data structures to hold the same elements
5877 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5880 * The elements are added to an hash table mapping Redis objects to scores.
5881 * At the same time the elements are added to a skip list mapping scores
5882 * to Redis objects (so objects are sorted by scores in this "view"). */
5884 /* This skiplist implementation is almost a C translation of the original
5885 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5886 * Alternative to Balanced Trees", modified in three ways:
5887 * a) this implementation allows for repeated values.
5888 * b) the comparison is not just by key (our 'score') but by satellite data.
5889 * c) there is a back pointer, so it's a doubly linked list with the back
5890 * pointers being only at "level 1". This allows to traverse the list
5891 * from tail to head, useful for ZREVRANGE. */
5893 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5894 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5896 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5898 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5906 static zskiplist
*zslCreate(void) {
5910 zsl
= zmalloc(sizeof(*zsl
));
5913 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5914 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5915 zsl
->header
->forward
[j
] = NULL
;
5917 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5918 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5919 zsl
->header
->span
[j
] = 0;
5921 zsl
->header
->backward
= NULL
;
5926 static void zslFreeNode(zskiplistNode
*node
) {
5927 decrRefCount(node
->obj
);
5928 zfree(node
->forward
);
5933 static void zslFree(zskiplist
*zsl
) {
5934 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5936 zfree(zsl
->header
->forward
);
5937 zfree(zsl
->header
->span
);
5940 next
= node
->forward
[0];
5947 static int zslRandomLevel(void) {
5949 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5951 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5954 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5955 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5956 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5960 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5961 /* store rank that is crossed to reach the insert position */
5962 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5964 while (x
->forward
[i
] &&
5965 (x
->forward
[i
]->score
< score
||
5966 (x
->forward
[i
]->score
== score
&&
5967 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5968 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5973 /* we assume the key is not already inside, since we allow duplicated
5974 * scores, and the re-insertion of score and redis object should never
5975 * happpen since the caller of zslInsert() should test in the hash table
5976 * if the element is already inside or not. */
5977 level
= zslRandomLevel();
5978 if (level
> zsl
->level
) {
5979 for (i
= zsl
->level
; i
< level
; i
++) {
5981 update
[i
] = zsl
->header
;
5982 update
[i
]->span
[i
-1] = zsl
->length
;
5986 x
= zslCreateNode(level
,score
,obj
);
5987 for (i
= 0; i
< level
; i
++) {
5988 x
->forward
[i
] = update
[i
]->forward
[i
];
5989 update
[i
]->forward
[i
] = x
;
5991 /* update span covered by update[i] as x is inserted here */
5993 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5994 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5998 /* increment span for untouched levels */
5999 for (i
= level
; i
< zsl
->level
; i
++) {
6000 update
[i
]->span
[i
-1]++;
6003 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
6005 x
->forward
[0]->backward
= x
;
6011 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
6012 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
6014 for (i
= 0; i
< zsl
->level
; i
++) {
6015 if (update
[i
]->forward
[i
] == x
) {
6017 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
6019 update
[i
]->forward
[i
] = x
->forward
[i
];
6021 /* invariant: i > 0, because update[0]->forward[0]
6022 * is always equal to x */
6023 update
[i
]->span
[i
-1] -= 1;
6026 if (x
->forward
[0]) {
6027 x
->forward
[0]->backward
= x
->backward
;
6029 zsl
->tail
= x
->backward
;
6031 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
6036 /* Delete an element with matching score/object from the skiplist. */
6037 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
6038 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6042 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6043 while (x
->forward
[i
] &&
6044 (x
->forward
[i
]->score
< score
||
6045 (x
->forward
[i
]->score
== score
&&
6046 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
6050 /* We may have multiple elements with the same score, what we need
6051 * is to find the element with both the right score and object. */
6053 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
6054 zslDeleteNode(zsl
, x
, update
);
6058 return 0; /* not found */
6060 return 0; /* not found */
6063 /* Delete all the elements with score between min and max from the skiplist.
6064 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
6065 * Note that this function takes the reference to the hash table view of the
6066 * sorted set, in order to remove the elements from the hash table too. */
6067 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
6068 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6069 unsigned long removed
= 0;
6073 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6074 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
6078 /* We may have multiple elements with the same score, what we need
6079 * is to find the element with both the right score and object. */
6081 while (x
&& x
->score
<= max
) {
6082 zskiplistNode
*next
= x
->forward
[0];
6083 zslDeleteNode(zsl
, x
, update
);
6084 dictDelete(dict
,x
->obj
);
6089 return removed
; /* not found */
6092 /* Delete all the elements with rank between start and end from the skiplist.
6093 * Start and end are inclusive. Note that start and end need to be 1-based */
6094 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
6095 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
6096 unsigned long traversed
= 0, removed
= 0;
6100 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6101 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
6102 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6110 while (x
&& traversed
<= end
) {
6111 zskiplistNode
*next
= x
->forward
[0];
6112 zslDeleteNode(zsl
, x
, update
);
6113 dictDelete(dict
,x
->obj
);
6122 /* Find the first node having a score equal or greater than the specified one.
6123 * Returns NULL if there is no match. */
6124 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
6129 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6130 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
6133 /* We may have multiple elements with the same score, what we need
6134 * is to find the element with both the right score and object. */
6135 return x
->forward
[0];
6138 /* Find the rank for an element by both score and key.
6139 * Returns 0 when the element cannot be found, rank otherwise.
6140 * Note that the rank is 1-based due to the span of zsl->header to the
6142 static unsigned long zslistTypeGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
6144 unsigned long rank
= 0;
6148 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6149 while (x
->forward
[i
] &&
6150 (x
->forward
[i
]->score
< score
||
6151 (x
->forward
[i
]->score
== score
&&
6152 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
6153 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
6157 /* x might be equal to zsl->header, so test if obj is non-NULL */
6158 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
6165 /* Finds an element by its rank. The rank argument needs to be 1-based. */
6166 zskiplistNode
* zslistTypeGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
6168 unsigned long traversed
= 0;
6172 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6173 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
6175 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6178 if (traversed
== rank
) {
6185 /* The actual Z-commands implementations */
6187 /* This generic command implements both ZADD and ZINCRBY.
6188 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
6189 * the increment if the operation is a ZINCRBY (doincrement == 1). */
6190 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
6195 if (isnan(scoreval
)) {
6196 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
6200 zsetobj
= lookupKeyWrite(c
->db
,key
);
6201 if (zsetobj
== NULL
) {
6202 zsetobj
= createZsetObject();
6203 dbAdd(c
->db
,key
,zsetobj
);
6205 if (zsetobj
->type
!= REDIS_ZSET
) {
6206 addReply(c
,shared
.wrongtypeerr
);
6212 /* Ok now since we implement both ZADD and ZINCRBY here the code
6213 * needs to handle the two different conditions. It's all about setting
6214 * '*score', that is, the new score to set, to the right value. */
6215 score
= zmalloc(sizeof(double));
6219 /* Read the old score. If the element was not present starts from 0 */
6220 de
= dictFind(zs
->dict
,ele
);
6222 double *oldscore
= dictGetEntryVal(de
);
6223 *score
= *oldscore
+ scoreval
;
6227 if (isnan(*score
)) {
6229 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
6231 /* Note that we don't need to check if the zset may be empty and
6232 * should be removed here, as we can only obtain Nan as score if
6233 * there was already an element in the sorted set. */
6240 /* What follows is a simple remove and re-insert operation that is common
6241 * to both ZADD and ZINCRBY... */
6242 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
6243 /* case 1: New element */
6244 incrRefCount(ele
); /* added to hash */
6245 zslInsert(zs
->zsl
,*score
,ele
);
6246 incrRefCount(ele
); /* added to skiplist */
6249 addReplyDouble(c
,*score
);
6251 addReply(c
,shared
.cone
);
6256 /* case 2: Score update operation */
6257 de
= dictFind(zs
->dict
,ele
);
6258 redisAssert(de
!= NULL
);
6259 oldscore
= dictGetEntryVal(de
);
6260 if (*score
!= *oldscore
) {
6263 /* Remove and insert the element in the skip list with new score */
6264 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
6265 redisAssert(deleted
!= 0);
6266 zslInsert(zs
->zsl
,*score
,ele
);
6268 /* Update the score in the hash table */
6269 dictReplace(zs
->dict
,ele
,score
);
6275 addReplyDouble(c
,*score
);
6277 addReply(c
,shared
.czero
);
6281 static void zaddCommand(redisClient
*c
) {
6284 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6285 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
6288 static void zincrbyCommand(redisClient
*c
) {
6291 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6292 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
6295 static void zremCommand(redisClient
*c
) {
6302 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6303 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6306 de
= dictFind(zs
->dict
,c
->argv
[2]);
6308 addReply(c
,shared
.czero
);
6311 /* Delete from the skiplist */
6312 oldscore
= dictGetEntryVal(de
);
6313 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
6314 redisAssert(deleted
!= 0);
6316 /* Delete from the hash table */
6317 dictDelete(zs
->dict
,c
->argv
[2]);
6318 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6319 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6321 addReply(c
,shared
.cone
);
6324 static void zremrangebyscoreCommand(redisClient
*c
) {
6331 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
6332 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
6334 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6335 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6338 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
6339 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6340 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6341 server
.dirty
+= deleted
;
6342 addReplyLongLong(c
,deleted
);
6345 static void zremrangebyrankCommand(redisClient
*c
) {
6353 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6354 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6356 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6357 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6359 llen
= zs
->zsl
->length
;
6361 /* convert negative indexes */
6362 if (start
< 0) start
= llen
+start
;
6363 if (end
< 0) end
= llen
+end
;
6364 if (start
< 0) start
= 0;
6365 if (end
< 0) end
= 0;
6367 /* indexes sanity checks */
6368 if (start
> end
|| start
>= llen
) {
6369 addReply(c
,shared
.czero
);
6372 if (end
>= llen
) end
= llen
-1;
6374 /* increment start and end because zsl*Rank functions
6375 * use 1-based rank */
6376 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
6377 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6378 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6379 server
.dirty
+= deleted
;
6380 addReplyLongLong(c
, deleted
);
6388 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
6389 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
6390 unsigned long size1
, size2
;
6391 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
6392 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
6393 return size1
- size2
;
6396 #define REDIS_AGGR_SUM 1
6397 #define REDIS_AGGR_MIN 2
6398 #define REDIS_AGGR_MAX 3
6399 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
6401 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
6402 if (aggregate
== REDIS_AGGR_SUM
) {
6403 *target
= *target
+ val
;
6404 } else if (aggregate
== REDIS_AGGR_MIN
) {
6405 *target
= val
< *target
? val
: *target
;
6406 } else if (aggregate
== REDIS_AGGR_MAX
) {
6407 *target
= val
> *target
? val
: *target
;
6410 redisPanic("Unknown ZUNION/INTER aggregate type");
6414 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
6416 int aggregate
= REDIS_AGGR_SUM
;
6423 /* expect setnum input keys to be given */
6424 setnum
= atoi(c
->argv
[2]->ptr
);
6426 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
6430 /* test if the expected number of keys would overflow */
6431 if (3+setnum
> c
->argc
) {
6432 addReply(c
,shared
.syntaxerr
);
6436 /* read keys to be used for input */
6437 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6438 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6439 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6443 if (obj
->type
== REDIS_ZSET
) {
6444 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6445 } else if (obj
->type
== REDIS_SET
) {
6446 src
[i
].dict
= (obj
->ptr
);
6449 addReply(c
,shared
.wrongtypeerr
);
6454 /* default all weights to 1 */
6455 src
[i
].weight
= 1.0;
6458 /* parse optional extra arguments */
6460 int remaining
= c
->argc
- j
;
6463 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6465 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6466 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6469 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6471 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6472 aggregate
= REDIS_AGGR_SUM
;
6473 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6474 aggregate
= REDIS_AGGR_MIN
;
6475 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6476 aggregate
= REDIS_AGGR_MAX
;
6479 addReply(c
,shared
.syntaxerr
);
6485 addReply(c
,shared
.syntaxerr
);
6491 /* sort sets from the smallest to largest, this will improve our
6492 * algorithm's performance */
6493 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6495 dstobj
= createZsetObject();
6496 dstzset
= dstobj
->ptr
;
6498 if (op
== REDIS_OP_INTER
) {
6499 /* skip going over all entries if the smallest zset is NULL or empty */
6500 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6501 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6502 * from small to large, all src[i > 0].dict are non-empty too */
6503 di
= dictGetIterator(src
[0].dict
);
6504 while((de
= dictNext(di
)) != NULL
) {
6505 double *score
= zmalloc(sizeof(double)), value
;
6506 *score
= src
[0].weight
* zunionInterDictValue(de
);
6508 for (j
= 1; j
< setnum
; j
++) {
6509 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6511 value
= src
[j
].weight
* zunionInterDictValue(other
);
6512 zunionInterAggregate(score
, value
, aggregate
);
6518 /* skip entry when not present in every source dict */
6522 robj
*o
= dictGetEntryKey(de
);
6523 dictAdd(dstzset
->dict
,o
,score
);
6524 incrRefCount(o
); /* added to dictionary */
6525 zslInsert(dstzset
->zsl
,*score
,o
);
6526 incrRefCount(o
); /* added to skiplist */
6529 dictReleaseIterator(di
);
6531 } else if (op
== REDIS_OP_UNION
) {
6532 for (i
= 0; i
< setnum
; i
++) {
6533 if (!src
[i
].dict
) continue;
6535 di
= dictGetIterator(src
[i
].dict
);
6536 while((de
= dictNext(di
)) != NULL
) {
6537 /* skip key when already processed */
6538 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6540 double *score
= zmalloc(sizeof(double)), value
;
6541 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6543 /* because the zsets are sorted by size, its only possible
6544 * for sets at larger indices to hold this entry */
6545 for (j
= (i
+1); j
< setnum
; j
++) {
6546 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6548 value
= src
[j
].weight
* zunionInterDictValue(other
);
6549 zunionInterAggregate(score
, value
, aggregate
);
6553 robj
*o
= dictGetEntryKey(de
);
6554 dictAdd(dstzset
->dict
,o
,score
);
6555 incrRefCount(o
); /* added to dictionary */
6556 zslInsert(dstzset
->zsl
,*score
,o
);
6557 incrRefCount(o
); /* added to skiplist */
6559 dictReleaseIterator(di
);
6562 /* unknown operator */
6563 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6566 dbDelete(c
->db
,dstkey
);
6567 if (dstzset
->zsl
->length
) {
6568 dbAdd(c
->db
,dstkey
,dstobj
);
6569 addReplyLongLong(c
, dstzset
->zsl
->length
);
6572 decrRefCount(dstobj
);
6573 addReply(c
, shared
.czero
);
6578 static void zunionstoreCommand(redisClient
*c
) {
6579 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6582 static void zinterstoreCommand(redisClient
*c
) {
6583 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6586 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6598 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6599 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6601 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6603 } else if (c
->argc
>= 5) {
6604 addReply(c
,shared
.syntaxerr
);
6608 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6609 || checkType(c
,o
,REDIS_ZSET
)) return;
6614 /* convert negative indexes */
6615 if (start
< 0) start
= llen
+start
;
6616 if (end
< 0) end
= llen
+end
;
6617 if (start
< 0) start
= 0;
6618 if (end
< 0) end
= 0;
6620 /* indexes sanity checks */
6621 if (start
> end
|| start
>= llen
) {
6622 /* Out of range start or start > end result in empty list */
6623 addReply(c
,shared
.emptymultibulk
);
6626 if (end
>= llen
) end
= llen
-1;
6627 rangelen
= (end
-start
)+1;
6629 /* check if starting point is trivial, before searching
6630 * the element in log(N) time */
6632 ln
= start
== 0 ? zsl
->tail
: zslistTypeGetElementByRank(zsl
, llen
-start
);
6635 zsl
->header
->forward
[0] : zslistTypeGetElementByRank(zsl
, start
+1);
6638 /* Return the result in form of a multi-bulk reply */
6639 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6640 withscores
? (rangelen
*2) : rangelen
));
6641 for (j
= 0; j
< rangelen
; j
++) {
6643 addReplyBulk(c
,ele
);
6645 addReplyDouble(c
,ln
->score
);
6646 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6650 static void zrangeCommand(redisClient
*c
) {
6651 zrangeGenericCommand(c
,0);
6654 static void zrevrangeCommand(redisClient
*c
) {
6655 zrangeGenericCommand(c
,1);
6658 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6659 * If justcount is non-zero, just the count is returned. */
6660 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6663 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6664 int offset
= 0, limit
= -1;
6668 /* Parse the min-max interval. If one of the values is prefixed
6669 * by the "(" character, it's considered "open". For instance
6670 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6671 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6672 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6673 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6676 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6678 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6679 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6682 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6685 /* Parse "WITHSCORES": note that if the command was called with
6686 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6687 * enter the following paths to parse WITHSCORES and LIMIT. */
6688 if (c
->argc
== 5 || c
->argc
== 8) {
6689 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6694 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6698 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6703 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6704 addReply(c
,shared
.syntaxerr
);
6706 } else if (c
->argc
== (7 + withscores
)) {
6707 offset
= atoi(c
->argv
[5]->ptr
);
6708 limit
= atoi(c
->argv
[6]->ptr
);
6709 if (offset
< 0) offset
= 0;
6712 /* Ok, lookup the key and get the range */
6713 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6715 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6717 if (o
->type
!= REDIS_ZSET
) {
6718 addReply(c
,shared
.wrongtypeerr
);
6720 zset
*zsetobj
= o
->ptr
;
6721 zskiplist
*zsl
= zsetobj
->zsl
;
6723 robj
*ele
, *lenobj
= NULL
;
6724 unsigned long rangelen
= 0;
6726 /* Get the first node with the score >= min, or with
6727 * score > min if 'minex' is true. */
6728 ln
= zslFirstWithScore(zsl
,min
);
6729 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6732 /* No element matching the speciifed interval */
6733 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6737 /* We don't know in advance how many matching elements there
6738 * are in the list, so we push this object that will represent
6739 * the multi-bulk length in the output buffer, and will "fix"
6742 lenobj
= createObject(REDIS_STRING
,NULL
);
6744 decrRefCount(lenobj
);
6747 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6750 ln
= ln
->forward
[0];
6753 if (limit
== 0) break;
6756 addReplyBulk(c
,ele
);
6758 addReplyDouble(c
,ln
->score
);
6760 ln
= ln
->forward
[0];
6762 if (limit
> 0) limit
--;
6765 addReplyLongLong(c
,(long)rangelen
);
6767 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6768 withscores
? (rangelen
*2) : rangelen
);
6774 static void zrangebyscoreCommand(redisClient
*c
) {
6775 genericZrangebyscoreCommand(c
,0);
6778 static void zcountCommand(redisClient
*c
) {
6779 genericZrangebyscoreCommand(c
,1);
6782 static void zcardCommand(redisClient
*c
) {
6786 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6787 checkType(c
,o
,REDIS_ZSET
)) return;
6790 addReplyUlong(c
,zs
->zsl
->length
);
6793 static void zscoreCommand(redisClient
*c
) {
6798 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6799 checkType(c
,o
,REDIS_ZSET
)) return;
6802 de
= dictFind(zs
->dict
,c
->argv
[2]);
6804 addReply(c
,shared
.nullbulk
);
6806 double *score
= dictGetEntryVal(de
);
6808 addReplyDouble(c
,*score
);
6812 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6820 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6821 checkType(c
,o
,REDIS_ZSET
)) return;
6825 de
= dictFind(zs
->dict
,c
->argv
[2]);
6827 addReply(c
,shared
.nullbulk
);
6831 score
= dictGetEntryVal(de
);
6832 rank
= zslistTypeGetRank(zsl
, *score
, c
->argv
[2]);
6835 addReplyLongLong(c
, zsl
->length
- rank
);
6837 addReplyLongLong(c
, rank
-1);
6840 addReply(c
,shared
.nullbulk
);
6844 static void zrankCommand(redisClient
*c
) {
6845 zrankGenericCommand(c
, 0);
6848 static void zrevrankCommand(redisClient
*c
) {
6849 zrankGenericCommand(c
, 1);
6852 /* ========================= Hashes utility functions ======================= */
6853 #define REDIS_HASH_KEY 1
6854 #define REDIS_HASH_VALUE 2
6856 /* Check the length of a number of objects to see if we need to convert a
6857 * zipmap to a real hash. Note that we only check string encoded objects
6858 * as their string length can be queried in constant time. */
6859 static void hashTypeTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6861 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6863 for (i
= start
; i
<= end
; i
++) {
6864 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6865 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6867 convertToRealHash(subject
);
6873 /* Encode given objects in-place when the hash uses a dict. */
6874 static void hashTypeTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6875 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6876 if (o1
) *o1
= tryObjectEncoding(*o1
);
6877 if (o2
) *o2
= tryObjectEncoding(*o2
);
6881 /* Get the value from a hash identified by key. Returns either a string
6882 * object or NULL if the value cannot be found. The refcount of the object
6883 * is always increased by 1 when the value was found. */
6884 static robj
*hashTypeGet(robj
*o
, robj
*key
) {
6886 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6889 key
= getDecodedObject(key
);
6890 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6891 value
= createStringObject((char*)v
,vlen
);
6895 dictEntry
*de
= dictFind(o
->ptr
,key
);
6897 value
= dictGetEntryVal(de
);
6898 incrRefCount(value
);
6904 /* Test if the key exists in the given hash. Returns 1 if the key
6905 * exists and 0 when it doesn't. */
6906 static int hashTypeExists(robj
*o
, robj
*key
) {
6907 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6908 key
= getDecodedObject(key
);
6909 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6915 if (dictFind(o
->ptr
,key
) != NULL
) {
6922 /* Add an element, discard the old if the key already exists.
6923 * Return 0 on insert and 1 on update. */
6924 static int hashTypeSet(robj
*o
, robj
*key
, robj
*value
) {
6926 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6927 key
= getDecodedObject(key
);
6928 value
= getDecodedObject(value
);
6929 o
->ptr
= zipmapSet(o
->ptr
,
6930 key
->ptr
,sdslen(key
->ptr
),
6931 value
->ptr
,sdslen(value
->ptr
), &update
);
6933 decrRefCount(value
);
6935 /* Check if the zipmap needs to be upgraded to a real hash table */
6936 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6937 convertToRealHash(o
);
6939 if (dictReplace(o
->ptr
,key
,value
)) {
6946 incrRefCount(value
);
6951 /* Delete an element from a hash.
6952 * Return 1 on deleted and 0 on not found. */
6953 static int hashTypeDelete(robj
*o
, robj
*key
) {
6955 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6956 key
= getDecodedObject(key
);
6957 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6960 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6961 /* Always check if the dictionary needs a resize after a delete. */
6962 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6967 /* Return the number of elements in a hash. */
6968 static unsigned long hashTypeLength(robj
*o
) {
6969 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6970 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6973 /* Structure to hold hash iteration abstration. Note that iteration over
6974 * hashes involves both fields and values. Because it is possible that
6975 * not both are required, store pointers in the iterator to avoid
6976 * unnecessary memory allocation for fields/values. */
6980 unsigned char *zk
, *zv
;
6981 unsigned int zklen
, zvlen
;
6987 static hashTypeIterator
*hashTypeInitIterator(robj
*subject
) {
6988 hashTypeIterator
*hi
= zmalloc(sizeof(hashTypeIterator
));
6989 hi
->encoding
= subject
->encoding
;
6990 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6991 hi
->zi
= zipmapRewind(subject
->ptr
);
6992 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6993 hi
->di
= dictGetIterator(subject
->ptr
);
7000 static void hashTypeReleaseIterator(hashTypeIterator
*hi
) {
7001 if (hi
->encoding
== REDIS_ENCODING_HT
) {
7002 dictReleaseIterator(hi
->di
);
7007 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
7008 * could be found and REDIS_ERR when the iterator reaches the end. */
7009 static int hashTypeNext(hashTypeIterator
*hi
) {
7010 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7011 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
7012 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
7014 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
7019 /* Get key or value object at current iteration position.
7020 * This increases the refcount of the field object by 1. */
7021 static robj
*hashTypeCurrent(hashTypeIterator
*hi
, int what
) {
7023 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
7024 if (what
& REDIS_HASH_KEY
) {
7025 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
7027 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
7030 if (what
& REDIS_HASH_KEY
) {
7031 o
= dictGetEntryKey(hi
->de
);
7033 o
= dictGetEntryVal(hi
->de
);
7040 static robj
*hashTypeLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
7041 robj
*o
= lookupKeyWrite(c
->db
,key
);
7043 o
= createHashObject();
7046 if (o
->type
!= REDIS_HASH
) {
7047 addReply(c
,shared
.wrongtypeerr
);
7054 /* ============================= Hash commands ============================== */
7055 static void hsetCommand(redisClient
*c
) {
7059 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7060 hashTypeTryConversion(o
,c
->argv
,2,3);
7061 hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
7062 update
= hashTypeSet(o
,c
->argv
[2],c
->argv
[3]);
7063 addReply(c
, update
? shared
.czero
: shared
.cone
);
7067 static void hsetnxCommand(redisClient
*c
) {
7069 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7070 hashTypeTryConversion(o
,c
->argv
,2,3);
7072 if (hashTypeExists(o
, c
->argv
[2])) {
7073 addReply(c
, shared
.czero
);
7075 hashTypeTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
7076 hashTypeSet(o
,c
->argv
[2],c
->argv
[3]);
7077 addReply(c
, shared
.cone
);
7082 static void hmsetCommand(redisClient
*c
) {
7086 if ((c
->argc
% 2) == 1) {
7087 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
7091 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7092 hashTypeTryConversion(o
,c
->argv
,2,c
->argc
-1);
7093 for (i
= 2; i
< c
->argc
; i
+= 2) {
7094 hashTypeTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
7095 hashTypeSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
7097 addReply(c
, shared
.ok
);
7101 static void hincrbyCommand(redisClient
*c
) {
7102 long long value
, incr
;
7103 robj
*o
, *current
, *new;
7105 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
7106 if ((o
= hashTypeLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
7107 if ((current
= hashTypeGet(o
,c
->argv
[2])) != NULL
) {
7108 if (getLongLongFromObjectOrReply(c
,current
,&value
,
7109 "hash value is not an integer") != REDIS_OK
) {
7110 decrRefCount(current
);
7113 decrRefCount(current
);
7119 new = createStringObjectFromLongLong(value
);
7120 hashTypeTryObjectEncoding(o
,&c
->argv
[2],NULL
);
7121 hashTypeSet(o
,c
->argv
[2],new);
7123 addReplyLongLong(c
,value
);
7127 static void hgetCommand(redisClient
*c
) {
7129 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
7130 checkType(c
,o
,REDIS_HASH
)) return;
7132 if ((value
= hashTypeGet(o
,c
->argv
[2])) != NULL
) {
7133 addReplyBulk(c
,value
);
7134 decrRefCount(value
);
7136 addReply(c
,shared
.nullbulk
);
7140 static void hmgetCommand(redisClient
*c
) {
7143 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
7144 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
7145 addReply(c
,shared
.wrongtypeerr
);
7148 /* Note the check for o != NULL happens inside the loop. This is
7149 * done because objects that cannot be found are considered to be
7150 * an empty hash. The reply should then be a series of NULLs. */
7151 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
7152 for (i
= 2; i
< c
->argc
; i
++) {
7153 if (o
!= NULL
&& (value
= hashTypeGet(o
,c
->argv
[i
])) != NULL
) {
7154 addReplyBulk(c
,value
);
7155 decrRefCount(value
);
7157 addReply(c
,shared
.nullbulk
);
7162 static void hdelCommand(redisClient
*c
) {
7164 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7165 checkType(c
,o
,REDIS_HASH
)) return;
7167 if (hashTypeDelete(o
,c
->argv
[2])) {
7168 if (hashTypeLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
7169 addReply(c
,shared
.cone
);
7172 addReply(c
,shared
.czero
);
7176 static void hlenCommand(redisClient
*c
) {
7178 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7179 checkType(c
,o
,REDIS_HASH
)) return;
7181 addReplyUlong(c
,hashTypeLength(o
));
7184 static void genericHgetallCommand(redisClient
*c
, int flags
) {
7185 robj
*o
, *lenobj
, *obj
;
7186 unsigned long count
= 0;
7187 hashTypeIterator
*hi
;
7189 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
7190 || checkType(c
,o
,REDIS_HASH
)) return;
7192 lenobj
= createObject(REDIS_STRING
,NULL
);
7194 decrRefCount(lenobj
);
7196 hi
= hashTypeInitIterator(o
);
7197 while (hashTypeNext(hi
) != REDIS_ERR
) {
7198 if (flags
& REDIS_HASH_KEY
) {
7199 obj
= hashTypeCurrent(hi
,REDIS_HASH_KEY
);
7200 addReplyBulk(c
,obj
);
7204 if (flags
& REDIS_HASH_VALUE
) {
7205 obj
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
);
7206 addReplyBulk(c
,obj
);
7211 hashTypeReleaseIterator(hi
);
7213 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
7216 static void hkeysCommand(redisClient
*c
) {
7217 genericHgetallCommand(c
,REDIS_HASH_KEY
);
7220 static void hvalsCommand(redisClient
*c
) {
7221 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
7224 static void hgetallCommand(redisClient
*c
) {
7225 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
7228 static void hexistsCommand(redisClient
*c
) {
7230 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7231 checkType(c
,o
,REDIS_HASH
)) return;
7233 addReply(c
, hashTypeExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
7236 static void convertToRealHash(robj
*o
) {
7237 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
7238 unsigned int klen
, vlen
;
7239 dict
*dict
= dictCreate(&hashDictType
,NULL
);
7241 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
7242 p
= zipmapRewind(zm
);
7243 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
7244 robj
*keyobj
, *valobj
;
7246 keyobj
= createStringObject((char*)key
,klen
);
7247 valobj
= createStringObject((char*)val
,vlen
);
7248 keyobj
= tryObjectEncoding(keyobj
);
7249 valobj
= tryObjectEncoding(valobj
);
7250 dictAdd(dict
,keyobj
,valobj
);
7252 o
->encoding
= REDIS_ENCODING_HT
;
7257 /* ========================= Non type-specific commands ==================== */
7259 static void flushdbCommand(redisClient
*c
) {
7260 server
.dirty
+= dictSize(c
->db
->dict
);
7261 touchWatchedKeysOnFlush(c
->db
->id
);
7262 dictEmpty(c
->db
->dict
);
7263 dictEmpty(c
->db
->expires
);
7264 addReply(c
,shared
.ok
);
7267 static void flushallCommand(redisClient
*c
) {
7268 touchWatchedKeysOnFlush(-1);
7269 server
.dirty
+= emptyDb();
7270 addReply(c
,shared
.ok
);
7271 if (server
.bgsavechildpid
!= -1) {
7272 kill(server
.bgsavechildpid
,SIGKILL
);
7273 rdbRemoveTempFile(server
.bgsavechildpid
);
7275 rdbSave(server
.dbfilename
);
7279 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
7280 redisSortOperation
*so
= zmalloc(sizeof(*so
));
7282 so
->pattern
= pattern
;
7286 /* Return the value associated to the key with a name obtained
7287 * substituting the first occurence of '*' in 'pattern' with 'subst'.
7288 * The returned object will always have its refcount increased by 1
7289 * when it is non-NULL. */
7290 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
7293 robj keyobj
, fieldobj
, *o
;
7294 int prefixlen
, sublen
, postfixlen
, fieldlen
;
7295 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
7299 char buf
[REDIS_SORTKEY_MAX
+1];
7300 } keyname
, fieldname
;
7302 /* If the pattern is "#" return the substitution object itself in order
7303 * to implement the "SORT ... GET #" feature. */
7304 spat
= pattern
->ptr
;
7305 if (spat
[0] == '#' && spat
[1] == '\0') {
7306 incrRefCount(subst
);
7310 /* The substitution object may be specially encoded. If so we create
7311 * a decoded object on the fly. Otherwise getDecodedObject will just
7312 * increment the ref count, that we'll decrement later. */
7313 subst
= getDecodedObject(subst
);
7316 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
7317 p
= strchr(spat
,'*');
7319 decrRefCount(subst
);
7323 /* Find out if we're dealing with a hash dereference. */
7324 if ((f
= strstr(p
+1, "->")) != NULL
) {
7325 fieldlen
= sdslen(spat
)-(f
-spat
);
7326 /* this also copies \0 character */
7327 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
7328 fieldname
.len
= fieldlen
-2;
7334 sublen
= sdslen(ssub
);
7335 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
7336 memcpy(keyname
.buf
,spat
,prefixlen
);
7337 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
7338 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
7339 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
7340 keyname
.len
= prefixlen
+sublen
+postfixlen
;
7341 decrRefCount(subst
);
7343 /* Lookup substituted key */
7344 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
7345 o
= lookupKeyRead(db
,&keyobj
);
7346 if (o
== NULL
) return NULL
;
7349 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
7351 /* Retrieve value from hash by the field name. This operation
7352 * already increases the refcount of the returned object. */
7353 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
7354 o
= hashTypeGet(o
, &fieldobj
);
7356 if (o
->type
!= REDIS_STRING
) return NULL
;
7358 /* Every object that this function returns needs to have its refcount
7359 * increased. sortCommand decreases it again. */
7366 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
7367 * the additional parameter is not standard but a BSD-specific we have to
7368 * pass sorting parameters via the global 'server' structure */
7369 static int sortCompare(const void *s1
, const void *s2
) {
7370 const redisSortObject
*so1
= s1
, *so2
= s2
;
7373 if (!server
.sort_alpha
) {
7374 /* Numeric sorting. Here it's trivial as we precomputed scores */
7375 if (so1
->u
.score
> so2
->u
.score
) {
7377 } else if (so1
->u
.score
< so2
->u
.score
) {
7383 /* Alphanumeric sorting */
7384 if (server
.sort_bypattern
) {
7385 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
7386 /* At least one compare object is NULL */
7387 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
7389 else if (so1
->u
.cmpobj
== NULL
)
7394 /* We have both the objects, use strcoll */
7395 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
7398 /* Compare elements directly. */
7399 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
7402 return server
.sort_desc
? -cmp
: cmp
;
7405 /* The SORT command is the most complex command in Redis. Warning: this code
7406 * is optimized for speed and a bit less for readability */
7407 static void sortCommand(redisClient
*c
) {
7409 unsigned int outputlen
= 0;
7410 int desc
= 0, alpha
= 0;
7411 int limit_start
= 0, limit_count
= -1, start
, end
;
7412 int j
, dontsort
= 0, vectorlen
;
7413 int getop
= 0; /* GET operation counter */
7414 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
7415 redisSortObject
*vector
; /* Resulting vector to sort */
7417 /* Lookup the key to sort. It must be of the right types */
7418 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
7419 if (sortval
== NULL
) {
7420 addReply(c
,shared
.emptymultibulk
);
7423 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
7424 sortval
->type
!= REDIS_ZSET
)
7426 addReply(c
,shared
.wrongtypeerr
);
7430 /* Create a list of operations to perform for every sorted element.
7431 * Operations can be GET/DEL/INCR/DECR */
7432 operations
= listCreate();
7433 listSetFreeMethod(operations
,zfree
);
7436 /* Now we need to protect sortval incrementing its count, in the future
7437 * SORT may have options able to overwrite/delete keys during the sorting
7438 * and the sorted key itself may get destroied */
7439 incrRefCount(sortval
);
7441 /* The SORT command has an SQL-alike syntax, parse it */
7442 while(j
< c
->argc
) {
7443 int leftargs
= c
->argc
-j
-1;
7444 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7446 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7448 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7450 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7451 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7452 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7454 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7455 storekey
= c
->argv
[j
+1];
7457 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7458 sortby
= c
->argv
[j
+1];
7459 /* If the BY pattern does not contain '*', i.e. it is constant,
7460 * we don't need to sort nor to lookup the weight keys. */
7461 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7463 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7464 listAddNodeTail(operations
,createSortOperation(
7465 REDIS_SORT_GET
,c
->argv
[j
+1]));
7469 decrRefCount(sortval
);
7470 listRelease(operations
);
7471 addReply(c
,shared
.syntaxerr
);
7477 /* Load the sorting vector with all the objects to sort */
7478 switch(sortval
->type
) {
7479 case REDIS_LIST
: vectorlen
= listTypeLength(sortval
); break;
7480 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7481 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7482 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7484 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7487 if (sortval
->type
== REDIS_LIST
) {
7488 listTypeIterator
*li
= listTypeInitIterator(sortval
,0,REDIS_TAIL
);
7489 listTypeEntry entry
;
7490 while(listTypeNext(li
,&entry
)) {
7491 vector
[j
].obj
= listTypeGet(&entry
);
7492 vector
[j
].u
.score
= 0;
7493 vector
[j
].u
.cmpobj
= NULL
;
7496 listTypeReleaseIterator(li
);
7502 if (sortval
->type
== REDIS_SET
) {
7505 zset
*zs
= sortval
->ptr
;
7509 di
= dictGetIterator(set
);
7510 while((setele
= dictNext(di
)) != NULL
) {
7511 vector
[j
].obj
= dictGetEntryKey(setele
);
7512 vector
[j
].u
.score
= 0;
7513 vector
[j
].u
.cmpobj
= NULL
;
7516 dictReleaseIterator(di
);
7518 redisAssert(j
== vectorlen
);
7520 /* Now it's time to load the right scores in the sorting vector */
7521 if (dontsort
== 0) {
7522 for (j
= 0; j
< vectorlen
; j
++) {
7525 /* lookup value to sort by */
7526 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7527 if (!byval
) continue;
7529 /* use object itself to sort by */
7530 byval
= vector
[j
].obj
;
7534 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7536 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7537 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7538 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7539 /* Don't need to decode the object if it's
7540 * integer-encoded (the only encoding supported) so
7541 * far. We can just cast it */
7542 vector
[j
].u
.score
= (long)byval
->ptr
;
7544 redisAssert(1 != 1);
7548 /* when the object was retrieved using lookupKeyByPattern,
7549 * its refcount needs to be decreased. */
7551 decrRefCount(byval
);
7556 /* We are ready to sort the vector... perform a bit of sanity check
7557 * on the LIMIT option too. We'll use a partial version of quicksort. */
7558 start
= (limit_start
< 0) ? 0 : limit_start
;
7559 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7560 if (start
>= vectorlen
) {
7561 start
= vectorlen
-1;
7564 if (end
>= vectorlen
) end
= vectorlen
-1;
7566 if (dontsort
== 0) {
7567 server
.sort_desc
= desc
;
7568 server
.sort_alpha
= alpha
;
7569 server
.sort_bypattern
= sortby
? 1 : 0;
7570 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7571 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7573 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7576 /* Send command output to the output buffer, performing the specified
7577 * GET/DEL/INCR/DECR operations if any. */
7578 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7579 if (storekey
== NULL
) {
7580 /* STORE option not specified, sent the sorting result to client */
7581 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7582 for (j
= start
; j
<= end
; j
++) {
7586 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7587 listRewind(operations
,&li
);
7588 while((ln
= listNext(&li
))) {
7589 redisSortOperation
*sop
= ln
->value
;
7590 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7593 if (sop
->type
== REDIS_SORT_GET
) {
7595 addReply(c
,shared
.nullbulk
);
7597 addReplyBulk(c
,val
);
7601 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7606 robj
*sobj
= createZiplistObject();
7608 /* STORE option specified, set the sorting result as a List object */
7609 for (j
= start
; j
<= end
; j
++) {
7614 listTypePush(sobj
,vector
[j
].obj
,REDIS_TAIL
);
7616 listRewind(operations
,&li
);
7617 while((ln
= listNext(&li
))) {
7618 redisSortOperation
*sop
= ln
->value
;
7619 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7622 if (sop
->type
== REDIS_SORT_GET
) {
7623 if (!val
) val
= createStringObject("",0);
7625 /* listTypePush does an incrRefCount, so we should take care
7626 * care of the incremented refcount caused by either
7627 * lookupKeyByPattern or createStringObject("",0) */
7628 listTypePush(sobj
,val
,REDIS_TAIL
);
7632 redisAssert(sop
->type
== REDIS_SORT_GET
);
7637 dbReplace(c
->db
,storekey
,sobj
);
7638 /* Note: we add 1 because the DB is dirty anyway since even if the
7639 * SORT result is empty a new key is set and maybe the old content
7641 server
.dirty
+= 1+outputlen
;
7642 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7646 if (sortval
->type
== REDIS_LIST
)
7647 for (j
= 0; j
< vectorlen
; j
++)
7648 decrRefCount(vector
[j
].obj
);
7649 decrRefCount(sortval
);
7650 listRelease(operations
);
7651 for (j
= 0; j
< vectorlen
; j
++) {
7652 if (alpha
&& vector
[j
].u
.cmpobj
)
7653 decrRefCount(vector
[j
].u
.cmpobj
);
7658 /* Convert an amount of bytes into a human readable string in the form
7659 * of 100B, 2G, 100M, 4K, and so forth. */
7660 static void bytesToHuman(char *s
, unsigned long long n
) {
7665 sprintf(s
,"%lluB",n
);
7667 } else if (n
< (1024*1024)) {
7668 d
= (double)n
/(1024);
7669 sprintf(s
,"%.2fK",d
);
7670 } else if (n
< (1024LL*1024*1024)) {
7671 d
= (double)n
/(1024*1024);
7672 sprintf(s
,"%.2fM",d
);
7673 } else if (n
< (1024LL*1024*1024*1024)) {
7674 d
= (double)n
/(1024LL*1024*1024);
7675 sprintf(s
,"%.2fG",d
);
7679 /* Create the string returned by the INFO command. This is decoupled
7680 * by the INFO command itself as we need to report the same information
7681 * on memory corruption problems. */
7682 static sds
genRedisInfoString(void) {
7684 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7688 bytesToHuman(hmem
,zmalloc_used_memory());
7689 info
= sdscatprintf(sdsempty(),
7690 "redis_version:%s\r\n"
7691 "redis_git_sha1:%s\r\n"
7692 "redis_git_dirty:%d\r\n"
7694 "multiplexing_api:%s\r\n"
7695 "process_id:%ld\r\n"
7696 "uptime_in_seconds:%ld\r\n"
7697 "uptime_in_days:%ld\r\n"
7698 "connected_clients:%d\r\n"
7699 "connected_slaves:%d\r\n"
7700 "blocked_clients:%d\r\n"
7701 "used_memory:%zu\r\n"
7702 "used_memory_human:%s\r\n"
7703 "changes_since_last_save:%lld\r\n"
7704 "bgsave_in_progress:%d\r\n"
7705 "last_save_time:%ld\r\n"
7706 "bgrewriteaof_in_progress:%d\r\n"
7707 "total_connections_received:%lld\r\n"
7708 "total_commands_processed:%lld\r\n"
7709 "expired_keys:%lld\r\n"
7710 "hash_max_zipmap_entries:%zu\r\n"
7711 "hash_max_zipmap_value:%zu\r\n"
7712 "pubsub_channels:%ld\r\n"
7713 "pubsub_patterns:%u\r\n"
7718 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7719 (sizeof(long) == 8) ? "64" : "32",
7724 listLength(server
.clients
)-listLength(server
.slaves
),
7725 listLength(server
.slaves
),
7726 server
.blpop_blocked_clients
,
7727 zmalloc_used_memory(),
7730 server
.bgsavechildpid
!= -1,
7732 server
.bgrewritechildpid
!= -1,
7733 server
.stat_numconnections
,
7734 server
.stat_numcommands
,
7735 server
.stat_expiredkeys
,
7736 server
.hash_max_zipmap_entries
,
7737 server
.hash_max_zipmap_value
,
7738 dictSize(server
.pubsub_channels
),
7739 listLength(server
.pubsub_patterns
),
7740 server
.vm_enabled
!= 0,
7741 server
.masterhost
== NULL
? "master" : "slave"
7743 if (server
.masterhost
) {
7744 info
= sdscatprintf(info
,
7745 "master_host:%s\r\n"
7746 "master_port:%d\r\n"
7747 "master_link_status:%s\r\n"
7748 "master_last_io_seconds_ago:%d\r\n"
7751 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7753 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7756 if (server
.vm_enabled
) {
7758 info
= sdscatprintf(info
,
7759 "vm_conf_max_memory:%llu\r\n"
7760 "vm_conf_page_size:%llu\r\n"
7761 "vm_conf_pages:%llu\r\n"
7762 "vm_stats_used_pages:%llu\r\n"
7763 "vm_stats_swapped_objects:%llu\r\n"
7764 "vm_stats_swappin_count:%llu\r\n"
7765 "vm_stats_swappout_count:%llu\r\n"
7766 "vm_stats_io_newjobs_len:%lu\r\n"
7767 "vm_stats_io_processing_len:%lu\r\n"
7768 "vm_stats_io_processed_len:%lu\r\n"
7769 "vm_stats_io_active_threads:%lu\r\n"
7770 "vm_stats_blocked_clients:%lu\r\n"
7771 ,(unsigned long long) server
.vm_max_memory
,
7772 (unsigned long long) server
.vm_page_size
,
7773 (unsigned long long) server
.vm_pages
,
7774 (unsigned long long) server
.vm_stats_used_pages
,
7775 (unsigned long long) server
.vm_stats_swapped_objects
,
7776 (unsigned long long) server
.vm_stats_swapins
,
7777 (unsigned long long) server
.vm_stats_swapouts
,
7778 (unsigned long) listLength(server
.io_newjobs
),
7779 (unsigned long) listLength(server
.io_processing
),
7780 (unsigned long) listLength(server
.io_processed
),
7781 (unsigned long) server
.io_active_threads
,
7782 (unsigned long) server
.vm_blocked_clients
7786 for (j
= 0; j
< server
.dbnum
; j
++) {
7787 long long keys
, vkeys
;
7789 keys
= dictSize(server
.db
[j
].dict
);
7790 vkeys
= dictSize(server
.db
[j
].expires
);
7791 if (keys
|| vkeys
) {
7792 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7799 static void infoCommand(redisClient
*c
) {
7800 sds info
= genRedisInfoString();
7801 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7802 (unsigned long)sdslen(info
)));
7803 addReplySds(c
,info
);
7804 addReply(c
,shared
.crlf
);
7807 static void monitorCommand(redisClient
*c
) {
7808 /* ignore MONITOR if aleady slave or in monitor mode */
7809 if (c
->flags
& REDIS_SLAVE
) return;
7811 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7813 listAddNodeTail(server
.monitors
,c
);
7814 addReply(c
,shared
.ok
);
7817 /* ================================= Expire ================================= */
7818 static int removeExpire(redisDb
*db
, robj
*key
) {
7819 if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) {
7826 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7827 sds copy
= sdsdup(key
->ptr
);
7828 if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) {
7836 /* Return the expire time of the specified key, or -1 if no expire
7837 * is associated with this key (i.e. the key is non volatile) */
7838 static time_t getExpire(redisDb
*db
, robj
*key
) {
7841 /* No expire? return ASAP */
7842 if (dictSize(db
->expires
) == 0 ||
7843 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1;
7845 return (time_t) dictGetEntryVal(de
);
7848 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7852 /* No expire? return ASAP */
7853 if (dictSize(db
->expires
) == 0 ||
7854 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
7856 /* Lookup the expire */
7857 when
= (time_t) dictGetEntryVal(de
);
7858 if (time(NULL
) <= when
) return 0;
7860 /* Delete the key */
7862 server
.stat_expiredkeys
++;
7866 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7869 /* No expire? return ASAP */
7870 if (dictSize(db
->expires
) == 0 ||
7871 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
7873 /* Delete the key */
7875 server
.stat_expiredkeys
++;
7876 dictDelete(db
->expires
,key
->ptr
);
7877 return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
;
7880 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7884 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7888 de
= dictFind(c
->db
->dict
,key
->ptr
);
7890 addReply(c
,shared
.czero
);
7894 if (dbDelete(c
->db
,key
)) server
.dirty
++;
7895 addReply(c
, shared
.cone
);
7898 time_t when
= time(NULL
)+seconds
;
7899 if (setExpire(c
->db
,key
,when
)) {
7900 addReply(c
,shared
.cone
);
7903 addReply(c
,shared
.czero
);
7909 static void expireCommand(redisClient
*c
) {
7910 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7913 static void expireatCommand(redisClient
*c
) {
7914 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7917 static void ttlCommand(redisClient
*c
) {
7921 expire
= getExpire(c
->db
,c
->argv
[1]);
7923 ttl
= (int) (expire
-time(NULL
));
7924 if (ttl
< 0) ttl
= -1;
7926 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7929 /* ================================ MULTI/EXEC ============================== */
7931 /* Client state initialization for MULTI/EXEC */
7932 static void initClientMultiState(redisClient
*c
) {
7933 c
->mstate
.commands
= NULL
;
7934 c
->mstate
.count
= 0;
7937 /* Release all the resources associated with MULTI/EXEC state */
7938 static void freeClientMultiState(redisClient
*c
) {
7941 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7943 multiCmd
*mc
= c
->mstate
.commands
+j
;
7945 for (i
= 0; i
< mc
->argc
; i
++)
7946 decrRefCount(mc
->argv
[i
]);
7949 zfree(c
->mstate
.commands
);
7952 /* Add a new command into the MULTI commands queue */
7953 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7957 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7958 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7959 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7962 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7963 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7964 for (j
= 0; j
< c
->argc
; j
++)
7965 incrRefCount(mc
->argv
[j
]);
7969 static void multiCommand(redisClient
*c
) {
7970 if (c
->flags
& REDIS_MULTI
) {
7971 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7974 c
->flags
|= REDIS_MULTI
;
7975 addReply(c
,shared
.ok
);
7978 static void discardCommand(redisClient
*c
) {
7979 if (!(c
->flags
& REDIS_MULTI
)) {
7980 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7984 freeClientMultiState(c
);
7985 initClientMultiState(c
);
7986 c
->flags
&= (~REDIS_MULTI
);
7988 addReply(c
,shared
.ok
);
7991 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7992 * implememntation for more information. */
7993 static void execCommandReplicateMulti(redisClient
*c
) {
7994 struct redisCommand
*cmd
;
7995 robj
*multistring
= createStringObject("MULTI",5);
7997 cmd
= lookupCommand("multi");
7998 if (server
.appendonly
)
7999 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
8000 if (listLength(server
.slaves
))
8001 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
8002 decrRefCount(multistring
);
8005 static void execCommand(redisClient
*c
) {
8010 if (!(c
->flags
& REDIS_MULTI
)) {
8011 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
8015 /* Check if we need to abort the EXEC if some WATCHed key was touched.
8016 * A failed EXEC will return a multi bulk nil object. */
8017 if (c
->flags
& REDIS_DIRTY_CAS
) {
8018 freeClientMultiState(c
);
8019 initClientMultiState(c
);
8020 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
8022 addReply(c
,shared
.nullmultibulk
);
8026 /* Replicate a MULTI request now that we are sure the block is executed.
8027 * This way we'll deliver the MULTI/..../EXEC block as a whole and
8028 * both the AOF and the replication link will have the same consistency
8029 * and atomicity guarantees. */
8030 execCommandReplicateMulti(c
);
8032 /* Exec all the queued commands */
8033 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
8034 orig_argv
= c
->argv
;
8035 orig_argc
= c
->argc
;
8036 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
8037 for (j
= 0; j
< c
->mstate
.count
; j
++) {
8038 c
->argc
= c
->mstate
.commands
[j
].argc
;
8039 c
->argv
= c
->mstate
.commands
[j
].argv
;
8040 call(c
,c
->mstate
.commands
[j
].cmd
);
8042 c
->argv
= orig_argv
;
8043 c
->argc
= orig_argc
;
8044 freeClientMultiState(c
);
8045 initClientMultiState(c
);
8046 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
8047 /* Make sure the EXEC command is always replicated / AOF, since we
8048 * always send the MULTI command (we can't know beforehand if the
8049 * next operations will contain at least a modification to the DB). */
8053 /* =========================== Blocking Operations ========================= */
8055 /* Currently Redis blocking operations support is limited to list POP ops,
8056 * so the current implementation is not fully generic, but it is also not
8057 * completely specific so it will not require a rewrite to support new
8058 * kind of blocking operations in the future.
8060 * Still it's important to note that list blocking operations can be already
8061 * used as a notification mechanism in order to implement other blocking
8062 * operations at application level, so there must be a very strong evidence
8063 * of usefulness and generality before new blocking operations are implemented.
8065 * This is how the current blocking POP works, we use BLPOP as example:
8066 * - If the user calls BLPOP and the key exists and contains a non empty list
8067 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
8068 * if there is not to block.
8069 * - If instead BLPOP is called and the key does not exists or the list is
8070 * empty we need to block. In order to do so we remove the notification for
8071 * new data to read in the client socket (so that we'll not serve new
8072 * requests if the blocking request is not served). Also we put the client
8073 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
8074 * blocking for this keys.
8075 * - If a PUSH operation against a key with blocked clients waiting is
8076 * performed, we serve the first in the list: basically instead to push
8077 * the new element inside the list we return it to the (first / oldest)
8078 * blocking client, unblock the client, and remove it form the list.
8080 * The above comment and the source code should be enough in order to understand
8081 * the implementation and modify / fix it later.
8084 /* Set a client in blocking mode for the specified key, with the specified
8086 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
8091 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
8092 c
->blocking_keys_num
= numkeys
;
8093 c
->blockingto
= timeout
;
8094 for (j
= 0; j
< numkeys
; j
++) {
8095 /* Add the key in the client structure, to map clients -> keys */
8096 c
->blocking_keys
[j
] = keys
[j
];
8097 incrRefCount(keys
[j
]);
8099 /* And in the other "side", to map keys -> clients */
8100 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
8104 /* For every key we take a list of clients blocked for it */
8106 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
8107 incrRefCount(keys
[j
]);
8108 assert(retval
== DICT_OK
);
8110 l
= dictGetEntryVal(de
);
8112 listAddNodeTail(l
,c
);
8114 /* Mark the client as a blocked client */
8115 c
->flags
|= REDIS_BLOCKED
;
8116 server
.blpop_blocked_clients
++;
8119 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
8120 static void unblockClientWaitingData(redisClient
*c
) {
8125 assert(c
->blocking_keys
!= NULL
);
8126 /* The client may wait for multiple keys, so unblock it for every key. */
8127 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
8128 /* Remove this client from the list of clients waiting for this key. */
8129 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8131 l
= dictGetEntryVal(de
);
8132 listDelNode(l
,listSearchKey(l
,c
));
8133 /* If the list is empty we need to remove it to avoid wasting memory */
8134 if (listLength(l
) == 0)
8135 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8136 decrRefCount(c
->blocking_keys
[j
]);
8138 /* Cleanup the client structure */
8139 zfree(c
->blocking_keys
);
8140 c
->blocking_keys
= NULL
;
8141 c
->flags
&= (~REDIS_BLOCKED
);
8142 server
.blpop_blocked_clients
--;
8143 /* We want to process data if there is some command waiting
8144 * in the input buffer. Note that this is safe even if
8145 * unblockClientWaitingData() gets called from freeClient() because
8146 * freeClient() will be smart enough to call this function
8147 * *after* c->querybuf was set to NULL. */
8148 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
8151 /* This should be called from any function PUSHing into lists.
8152 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
8153 * 'ele' is the element pushed.
8155 * If the function returns 0 there was no client waiting for a list push
8158 * If the function returns 1 there was a client waiting for a list push
8159 * against this key, the element was passed to this client thus it's not
8160 * needed to actually add it to the list and the caller should return asap. */
8161 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
8162 struct dictEntry
*de
;
8163 redisClient
*receiver
;
8167 de
= dictFind(c
->db
->blocking_keys
,key
);
8168 if (de
== NULL
) return 0;
8169 l
= dictGetEntryVal(de
);
8172 receiver
= ln
->value
;
8174 addReplySds(receiver
,sdsnew("*2\r\n"));
8175 addReplyBulk(receiver
,key
);
8176 addReplyBulk(receiver
,ele
);
8177 unblockClientWaitingData(receiver
);
8181 /* Blocking RPOP/LPOP */
8182 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
8187 for (j
= 1; j
< c
->argc
-1; j
++) {
8188 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
8190 if (o
->type
!= REDIS_LIST
) {
8191 addReply(c
,shared
.wrongtypeerr
);
8194 list
*list
= o
->ptr
;
8195 if (listLength(list
) != 0) {
8196 /* If the list contains elements fall back to the usual
8197 * non-blocking POP operation */
8198 robj
*argv
[2], **orig_argv
;
8201 /* We need to alter the command arguments before to call
8202 * popGenericCommand() as the command takes a single key. */
8203 orig_argv
= c
->argv
;
8204 orig_argc
= c
->argc
;
8205 argv
[1] = c
->argv
[j
];
8209 /* Also the return value is different, we need to output
8210 * the multi bulk reply header and the key name. The
8211 * "real" command will add the last element (the value)
8212 * for us. If this souds like an hack to you it's just
8213 * because it is... */
8214 addReplySds(c
,sdsnew("*2\r\n"));
8215 addReplyBulk(c
,argv
[1]);
8216 popGenericCommand(c
,where
);
8218 /* Fix the client structure with the original stuff */
8219 c
->argv
= orig_argv
;
8220 c
->argc
= orig_argc
;
8226 /* If the list is empty or the key does not exists we must block */
8227 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
8228 if (timeout
> 0) timeout
+= time(NULL
);
8229 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
8232 static void blpopCommand(redisClient
*c
) {
8233 blockingPopGenericCommand(c
,REDIS_HEAD
);
8236 static void brpopCommand(redisClient
*c
) {
8237 blockingPopGenericCommand(c
,REDIS_TAIL
);
8240 /* =============================== Replication ============================= */
8242 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8243 ssize_t nwritten
, ret
= size
;
8244 time_t start
= time(NULL
);
8248 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
8249 nwritten
= write(fd
,ptr
,size
);
8250 if (nwritten
== -1) return -1;
8254 if ((time(NULL
)-start
) > timeout
) {
8262 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8263 ssize_t nread
, totread
= 0;
8264 time_t start
= time(NULL
);
8268 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
8269 nread
= read(fd
,ptr
,size
);
8270 if (nread
== -1) return -1;
8275 if ((time(NULL
)-start
) > timeout
) {
8283 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8290 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
8293 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
8304 static void syncCommand(redisClient
*c
) {
8305 /* ignore SYNC if aleady slave or in monitor mode */
8306 if (c
->flags
& REDIS_SLAVE
) return;
8308 /* SYNC can't be issued when the server has pending data to send to
8309 * the client about already issued commands. We need a fresh reply
8310 * buffer registering the differences between the BGSAVE and the current
8311 * dataset, so that we can copy to other slaves if needed. */
8312 if (listLength(c
->reply
) != 0) {
8313 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
8317 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
8318 /* Here we need to check if there is a background saving operation
8319 * in progress, or if it is required to start one */
8320 if (server
.bgsavechildpid
!= -1) {
8321 /* Ok a background save is in progress. Let's check if it is a good
8322 * one for replication, i.e. if there is another slave that is
8323 * registering differences since the server forked to save */
8328 listRewind(server
.slaves
,&li
);
8329 while((ln
= listNext(&li
))) {
8331 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
8334 /* Perfect, the server is already registering differences for
8335 * another slave. Set the right state, and copy the buffer. */
8336 listRelease(c
->reply
);
8337 c
->reply
= listDup(slave
->reply
);
8338 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8339 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
8341 /* No way, we need to wait for the next BGSAVE in order to
8342 * register differences */
8343 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8344 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
8347 /* Ok we don't have a BGSAVE in progress, let's start one */
8348 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
8349 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8350 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
8351 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
8354 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8357 c
->flags
|= REDIS_SLAVE
;
8359 listAddNodeTail(server
.slaves
,c
);
8363 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
8364 redisClient
*slave
= privdata
;
8366 REDIS_NOTUSED(mask
);
8367 char buf
[REDIS_IOBUF_LEN
];
8368 ssize_t nwritten
, buflen
;
8370 if (slave
->repldboff
== 0) {
8371 /* Write the bulk write count before to transfer the DB. In theory here
8372 * we don't know how much room there is in the output buffer of the
8373 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
8374 * operations) will never be smaller than the few bytes we need. */
8377 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
8379 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
8387 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
8388 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
8390 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
8391 (buflen
== 0) ? "premature EOF" : strerror(errno
));
8395 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
8396 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
8401 slave
->repldboff
+= nwritten
;
8402 if (slave
->repldboff
== slave
->repldbsize
) {
8403 close(slave
->repldbfd
);
8404 slave
->repldbfd
= -1;
8405 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8406 slave
->replstate
= REDIS_REPL_ONLINE
;
8407 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
8408 sendReplyToClient
, slave
) == AE_ERR
) {
8412 addReplySds(slave
,sdsempty());
8413 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
8417 /* This function is called at the end of every backgrond saving.
8418 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
8419 * otherwise REDIS_ERR is passed to the function.
8421 * The goal of this function is to handle slaves waiting for a successful
8422 * background saving in order to perform non-blocking synchronization. */
8423 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
8425 int startbgsave
= 0;
8428 listRewind(server
.slaves
,&li
);
8429 while((ln
= listNext(&li
))) {
8430 redisClient
*slave
= ln
->value
;
8432 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8434 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8435 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8436 struct redis_stat buf
;
8438 if (bgsaveerr
!= REDIS_OK
) {
8440 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8443 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8444 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8446 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8449 slave
->repldboff
= 0;
8450 slave
->repldbsize
= buf
.st_size
;
8451 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8452 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8453 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8460 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8463 listRewind(server
.slaves
,&li
);
8464 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8465 while((ln
= listNext(&li
))) {
8466 redisClient
*slave
= ln
->value
;
8468 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8475 static int syncWithMaster(void) {
8476 char buf
[1024], tmpfile
[256], authcmd
[1024];
8478 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8479 int dfd
, maxtries
= 5;
8482 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8487 /* AUTH with the master if required. */
8488 if(server
.masterauth
) {
8489 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8490 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8492 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8496 /* Read the AUTH result. */
8497 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8499 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8503 if (buf
[0] != '+') {
8505 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8510 /* Issue the SYNC command */
8511 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8513 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8517 /* Read the bulk write count */
8518 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8520 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8524 if (buf
[0] != '$') {
8526 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8529 dumpsize
= strtol(buf
+1,NULL
,10);
8530 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8531 /* Read the bulk write data on a temp file */
8533 snprintf(tmpfile
,256,
8534 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8535 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8536 if (dfd
!= -1) break;
8541 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8545 int nread
, nwritten
;
8547 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8549 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8555 nwritten
= write(dfd
,buf
,nread
);
8556 if (nwritten
== -1) {
8557 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8565 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8566 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8572 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8573 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8577 server
.master
= createClient(fd
);
8578 server
.master
->flags
|= REDIS_MASTER
;
8579 server
.master
->authenticated
= 1;
8580 server
.replstate
= REDIS_REPL_CONNECTED
;
8584 static void slaveofCommand(redisClient
*c
) {
8585 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8586 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8587 if (server
.masterhost
) {
8588 sdsfree(server
.masterhost
);
8589 server
.masterhost
= NULL
;
8590 if (server
.master
) freeClient(server
.master
);
8591 server
.replstate
= REDIS_REPL_NONE
;
8592 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8595 sdsfree(server
.masterhost
);
8596 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8597 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8598 if (server
.master
) freeClient(server
.master
);
8599 server
.replstate
= REDIS_REPL_CONNECT
;
8600 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8601 server
.masterhost
, server
.masterport
);
8603 addReply(c
,shared
.ok
);
8606 /* ============================ Maxmemory directive ======================== */
8608 /* Try to free one object form the pre-allocated objects free list.
8609 * This is useful under low mem conditions as by default we take 1 million
8610 * free objects allocated. On success REDIS_OK is returned, otherwise
8612 static int tryFreeOneObjectFromFreelist(void) {
8615 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8616 if (listLength(server
.objfreelist
)) {
8617 listNode
*head
= listFirst(server
.objfreelist
);
8618 o
= listNodeValue(head
);
8619 listDelNode(server
.objfreelist
,head
);
8620 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8624 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8629 /* This function gets called when 'maxmemory' is set on the config file to limit
8630 * the max memory used by the server, and we are out of memory.
8631 * This function will try to, in order:
8633 * - Free objects from the free list
8634 * - Try to remove keys with an EXPIRE set
8636 * It is not possible to free enough memory to reach used-memory < maxmemory
8637 * the server will start refusing commands that will enlarge even more the
8640 static void freeMemoryIfNeeded(void) {
8641 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8642 int j
, k
, freed
= 0;
8644 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8645 for (j
= 0; j
< server
.dbnum
; j
++) {
8647 robj
*minkey
= NULL
;
8648 struct dictEntry
*de
;
8650 if (dictSize(server
.db
[j
].expires
)) {
8652 /* From a sample of three keys drop the one nearest to
8653 * the natural expire */
8654 for (k
= 0; k
< 3; k
++) {
8657 de
= dictGetRandomKey(server
.db
[j
].expires
);
8658 t
= (time_t) dictGetEntryVal(de
);
8659 if (minttl
== -1 || t
< minttl
) {
8660 minkey
= dictGetEntryKey(de
);
8664 dbDelete(server
.db
+j
,minkey
);
8667 if (!freed
) return; /* nothing to free... */
8671 /* ============================== Append Only file ========================== */
8673 /* Called when the user switches from "appendonly yes" to "appendonly no"
8674 * at runtime using the CONFIG command. */
8675 static void stopAppendOnly(void) {
8676 flushAppendOnlyFile();
8677 aof_fsync(server
.appendfd
);
8678 close(server
.appendfd
);
8680 server
.appendfd
= -1;
8681 server
.appendseldb
= -1;
8682 server
.appendonly
= 0;
8683 /* rewrite operation in progress? kill it, wait child exit */
8684 if (server
.bgsavechildpid
!= -1) {
8687 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8688 wait3(&statloc
,0,NULL
);
8689 /* reset the buffer accumulating changes while the child saves */
8690 sdsfree(server
.bgrewritebuf
);
8691 server
.bgrewritebuf
= sdsempty();
8692 server
.bgsavechildpid
= -1;
8696 /* Called when the user switches from "appendonly no" to "appendonly yes"
8697 * at runtime using the CONFIG command. */
8698 static int startAppendOnly(void) {
8699 server
.appendonly
= 1;
8700 server
.lastfsync
= time(NULL
);
8701 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8702 if (server
.appendfd
== -1) {
8703 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8706 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8707 server
.appendonly
= 0;
8708 close(server
.appendfd
);
8709 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8715 /* Write the append only file buffer on disk.
8717 * Since we are required to write the AOF before replying to the client,
8718 * and the only way the client socket can get a write is entering when the
8719 * the event loop, we accumulate all the AOF writes in a memory
8720 * buffer and write it on disk using this function just before entering
8721 * the event loop again. */
8722 static void flushAppendOnlyFile(void) {
8726 if (sdslen(server
.aofbuf
) == 0) return;
8728 /* We want to perform a single write. This should be guaranteed atomic
8729 * at least if the filesystem we are writing is a real physical one.
8730 * While this will save us against the server being killed I don't think
8731 * there is much to do about the whole server stopping for power problems
8733 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8734 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8735 /* Ooops, we are in troubles. The best thing to do for now is
8736 * aborting instead of giving the illusion that everything is
8737 * working as expected. */
8738 if (nwritten
== -1) {
8739 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8741 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8745 sdsfree(server
.aofbuf
);
8746 server
.aofbuf
= sdsempty();
8748 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8749 * childs performing heavy I/O on disk. */
8750 if (server
.no_appendfsync_on_rewrite
&&
8751 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8753 /* Fsync if needed */
8755 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8756 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8757 now
-server
.lastfsync
> 1))
8759 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8760 * flushing metadata. */
8761 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8762 server
.lastfsync
= now
;
8766 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8768 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8769 for (j
= 0; j
< argc
; j
++) {
8770 robj
*o
= getDecodedObject(argv
[j
]);
8771 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8772 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8773 buf
= sdscatlen(buf
,"\r\n",2);
8779 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8784 /* Make sure we can use strtol */
8785 seconds
= getDecodedObject(seconds
);
8786 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8787 decrRefCount(seconds
);
8789 argv
[0] = createStringObject("EXPIREAT",8);
8791 argv
[2] = createObject(REDIS_STRING
,
8792 sdscatprintf(sdsempty(),"%ld",when
));
8793 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8794 decrRefCount(argv
[0]);
8795 decrRefCount(argv
[2]);
8799 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8800 sds buf
= sdsempty();
8803 /* The DB this command was targetting is not the same as the last command
8804 * we appendend. To issue a SELECT command is needed. */
8805 if (dictid
!= server
.appendseldb
) {
8808 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8809 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8810 (unsigned long)strlen(seldb
),seldb
);
8811 server
.appendseldb
= dictid
;
8814 if (cmd
->proc
== expireCommand
) {
8815 /* Translate EXPIRE into EXPIREAT */
8816 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8817 } else if (cmd
->proc
== setexCommand
) {
8818 /* Translate SETEX to SET and EXPIREAT */
8819 tmpargv
[0] = createStringObject("SET",3);
8820 tmpargv
[1] = argv
[1];
8821 tmpargv
[2] = argv
[3];
8822 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8823 decrRefCount(tmpargv
[0]);
8824 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8826 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8829 /* Append to the AOF buffer. This will be flushed on disk just before
8830 * of re-entering the event loop, so before the client will get a
8831 * positive reply about the operation performed. */
8832 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8834 /* If a background append only file rewriting is in progress we want to
8835 * accumulate the differences between the child DB and the current one
8836 * in a buffer, so that when the child process will do its work we
8837 * can append the differences to the new append only file. */
8838 if (server
.bgrewritechildpid
!= -1)
8839 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8844 /* In Redis commands are always executed in the context of a client, so in
8845 * order to load the append only file we need to create a fake client. */
8846 static struct redisClient
*createFakeClient(void) {
8847 struct redisClient
*c
= zmalloc(sizeof(*c
));
8851 c
->querybuf
= sdsempty();
8855 /* We set the fake client as a slave waiting for the synchronization
8856 * so that Redis will not try to send replies to this client. */
8857 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8858 c
->reply
= listCreate();
8859 listSetFreeMethod(c
->reply
,decrRefCount
);
8860 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8861 initClientMultiState(c
);
8865 static void freeFakeClient(struct redisClient
*c
) {
8866 sdsfree(c
->querybuf
);
8867 listRelease(c
->reply
);
8868 freeClientMultiState(c
);
8872 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8873 * error (the append only file is zero-length) REDIS_ERR is returned. On
8874 * fatal error an error message is logged and the program exists. */
8875 int loadAppendOnlyFile(char *filename
) {
8876 struct redisClient
*fakeClient
;
8877 FILE *fp
= fopen(filename
,"r");
8878 struct redis_stat sb
;
8879 int appendonly
= server
.appendonly
;
8881 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8885 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8889 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8890 * to the same file we're about to read. */
8891 server
.appendonly
= 0;
8893 fakeClient
= createFakeClient();
8900 struct redisCommand
*cmd
;
8903 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8909 if (buf
[0] != '*') goto fmterr
;
8911 argv
= zmalloc(sizeof(robj
*)*argc
);
8912 for (j
= 0; j
< argc
; j
++) {
8913 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8914 if (buf
[0] != '$') goto fmterr
;
8915 len
= strtol(buf
+1,NULL
,10);
8916 argsds
= sdsnewlen(NULL
,len
);
8917 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8918 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8919 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8922 /* Command lookup */
8923 cmd
= lookupCommand(argv
[0]->ptr
);
8925 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8928 /* Try object encoding */
8929 if (cmd
->flags
& REDIS_CMD_BULK
)
8930 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8931 /* Run the command in the context of a fake client */
8932 fakeClient
->argc
= argc
;
8933 fakeClient
->argv
= argv
;
8934 cmd
->proc(fakeClient
);
8935 /* Discard the reply objects list from the fake client */
8936 while(listLength(fakeClient
->reply
))
8937 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8938 /* Clean up, ready for the next command */
8939 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8941 /* Handle swapping while loading big datasets when VM is on */
8943 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
8946 if (server
.vm_enabled
&& force_swapout
) {
8947 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8948 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8953 /* This point can only be reached when EOF is reached without errors.
8954 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8955 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8958 freeFakeClient(fakeClient
);
8959 server
.appendonly
= appendonly
;
8964 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8966 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8970 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8974 /* Write binary-safe string into a file in the bulkformat
8975 * $<count>\r\n<payload>\r\n */
8976 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8980 clen
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
);
8981 cbuf
[clen
++] = '\r';
8982 cbuf
[clen
++] = '\n';
8983 if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0;
8984 if (len
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0;
8985 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8989 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8990 static int fwriteBulkDouble(FILE *fp
, double d
) {
8991 char buf
[128], dbuf
[128];
8993 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8994 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8995 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8996 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
9000 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
9001 static int fwriteBulkLongLong(FILE *fp
, long long l
) {
9002 char bbuf
[128], lbuf
[128];
9003 unsigned int blen
, llen
;
9004 llen
= ll2string(lbuf
,32,l
);
9005 blen
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
);
9006 if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0;
9010 /* Delegate writing an object to writing a bulk string or bulk long long. */
9011 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
9012 /* Avoid using getDecodedObject to help copy-on-write (we are often
9013 * in a child process when this function is called). */
9014 if (obj
->encoding
== REDIS_ENCODING_INT
) {
9015 return fwriteBulkLongLong(fp
,(long)obj
->ptr
);
9016 } else if (obj
->encoding
== REDIS_ENCODING_RAW
) {
9017 return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
9019 redisPanic("Unknown string encoding");
9023 /* Write a sequence of commands able to fully rebuild the dataset into
9024 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
9025 static int rewriteAppendOnlyFile(char *filename
) {
9026 dictIterator
*di
= NULL
;
9031 time_t now
= time(NULL
);
9033 /* Note that we have to use a different temp name here compared to the
9034 * one used by rewriteAppendOnlyFileBackground() function. */
9035 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
9036 fp
= fopen(tmpfile
,"w");
9038 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
9041 for (j
= 0; j
< server
.dbnum
; j
++) {
9042 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
9043 redisDb
*db
= server
.db
+j
;
9045 if (dictSize(d
) == 0) continue;
9046 di
= dictGetIterator(d
);
9052 /* SELECT the new DB */
9053 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
9054 if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
;
9056 /* Iterate this DB writing every entry */
9057 while((de
= dictNext(di
)) != NULL
) {
9058 sds keystr
= dictGetEntryKey(de
);
9063 keystr
= dictGetEntryKey(de
);
9064 o
= dictGetEntryVal(de
);
9065 initStaticStringObject(key
,keystr
);
9066 /* If the value for this key is swapped, load a preview in memory.
9067 * We use a "swapped" flag to remember if we need to free the
9068 * value object instead to just increment the ref count anyway
9069 * in order to avoid copy-on-write of pages if we are forked() */
9070 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
9071 o
->storage
== REDIS_VM_SWAPPING
) {
9074 o
= vmPreviewObject(o
);
9077 expiretime
= getExpire(db
,&key
);
9079 /* Save the key and associated value */
9080 if (o
->type
== REDIS_STRING
) {
9081 /* Emit a SET command */
9082 char cmd
[]="*3\r\n$3\r\nSET\r\n";
9083 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9085 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9086 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
9087 } else if (o
->type
== REDIS_LIST
) {
9088 /* Emit the RPUSHes needed to rebuild the list */
9089 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
9090 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
9091 unsigned char *zl
= o
->ptr
;
9092 unsigned char *p
= ziplistIndex(zl
,0);
9093 unsigned char *vstr
;
9097 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
9098 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9099 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9101 if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0)
9104 if (fwriteBulkLongLong(fp
,vlong
) == 0)
9107 p
= ziplistNext(zl
,p
);
9109 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
9110 list
*list
= o
->ptr
;
9114 listRewind(list
,&li
);
9115 while((ln
= listNext(&li
))) {
9116 robj
*eleobj
= listNodeValue(ln
);
9118 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9119 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9120 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9123 redisPanic("Unknown list encoding");
9125 } else if (o
->type
== REDIS_SET
) {
9126 /* Emit the SADDs needed to rebuild the set */
9128 dictIterator
*di
= dictGetIterator(set
);
9131 while((de
= dictNext(di
)) != NULL
) {
9132 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
9133 robj
*eleobj
= dictGetEntryKey(de
);
9135 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9136 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9137 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9139 dictReleaseIterator(di
);
9140 } else if (o
->type
== REDIS_ZSET
) {
9141 /* Emit the ZADDs needed to rebuild the sorted set */
9143 dictIterator
*di
= dictGetIterator(zs
->dict
);
9146 while((de
= dictNext(di
)) != NULL
) {
9147 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
9148 robj
*eleobj
= dictGetEntryKey(de
);
9149 double *score
= dictGetEntryVal(de
);
9151 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9152 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9153 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
9154 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9156 dictReleaseIterator(di
);
9157 } else if (o
->type
== REDIS_HASH
) {
9158 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
9160 /* Emit the HSETs needed to rebuild the hash */
9161 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9162 unsigned char *p
= zipmapRewind(o
->ptr
);
9163 unsigned char *field
, *val
;
9164 unsigned int flen
, vlen
;
9166 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
9167 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9168 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9169 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
9171 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
9175 dictIterator
*di
= dictGetIterator(o
->ptr
);
9178 while((de
= dictNext(di
)) != NULL
) {
9179 robj
*field
= dictGetEntryKey(de
);
9180 robj
*val
= dictGetEntryVal(de
);
9182 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9183 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9184 if (fwriteBulkObject(fp
,field
) == -1) return -1;
9185 if (fwriteBulkObject(fp
,val
) == -1) return -1;
9187 dictReleaseIterator(di
);
9190 redisPanic("Unknown object type");
9192 /* Save the expire time */
9193 if (expiretime
!= -1) {
9194 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
9195 /* If this key is already expired skip it */
9196 if (expiretime
< now
) continue;
9197 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9198 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9199 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
;
9201 if (swapped
) decrRefCount(o
);
9203 dictReleaseIterator(di
);
9206 /* Make sure data will not remain on the OS's output buffers */
9208 aof_fsync(fileno(fp
));
9211 /* Use RENAME to make sure the DB file is changed atomically only
9212 * if the generate DB file is ok. */
9213 if (rename(tmpfile
,filename
) == -1) {
9214 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
9218 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
9224 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
9225 if (di
) dictReleaseIterator(di
);
9229 /* This is how rewriting of the append only file in background works:
9231 * 1) The user calls BGREWRITEAOF
9232 * 2) Redis calls this function, that forks():
9233 * 2a) the child rewrite the append only file in a temp file.
9234 * 2b) the parent accumulates differences in server.bgrewritebuf.
9235 * 3) When the child finished '2a' exists.
9236 * 4) The parent will trap the exit code, if it's OK, will append the
9237 * data accumulated into server.bgrewritebuf into the temp file, and
9238 * finally will rename(2) the temp file in the actual file name.
9239 * The the new file is reopened as the new append only file. Profit!
9241 static int rewriteAppendOnlyFileBackground(void) {
9244 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
9245 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
9246 if ((childpid
= fork()) == 0) {
9250 if (server
.vm_enabled
) vmReopenSwapFile();
9252 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
9253 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
9260 if (childpid
== -1) {
9261 redisLog(REDIS_WARNING
,
9262 "Can't rewrite append only file in background: fork: %s",
9266 redisLog(REDIS_NOTICE
,
9267 "Background append only file rewriting started by pid %d",childpid
);
9268 server
.bgrewritechildpid
= childpid
;
9269 updateDictResizePolicy();
9270 /* We set appendseldb to -1 in order to force the next call to the
9271 * feedAppendOnlyFile() to issue a SELECT command, so the differences
9272 * accumulated by the parent into server.bgrewritebuf will start
9273 * with a SELECT statement and it will be safe to merge. */
9274 server
.appendseldb
= -1;
9277 return REDIS_OK
; /* unreached */
9280 static void bgrewriteaofCommand(redisClient
*c
) {
9281 if (server
.bgrewritechildpid
!= -1) {
9282 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
9285 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
9286 char *status
= "+Background append only file rewriting started\r\n";
9287 addReplySds(c
,sdsnew(status
));
9289 addReply(c
,shared
.err
);
9293 static void aofRemoveTempFile(pid_t childpid
) {
9296 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
9300 /* Virtual Memory is composed mainly of two subsystems:
9301 * - Blocking Virutal Memory
9302 * - Threaded Virtual Memory I/O
9303 * The two parts are not fully decoupled, but functions are split among two
9304 * different sections of the source code (delimited by comments) in order to
9305 * make more clear what functionality is about the blocking VM and what about
9306 * the threaded (not blocking) VM.
9310 * Redis VM is a blocking VM (one that blocks reading swapped values from
9311 * disk into memory when a value swapped out is needed in memory) that is made
9312 * unblocking by trying to examine the command argument vector in order to
9313 * load in background values that will likely be needed in order to exec
9314 * the command. The command is executed only once all the relevant keys
9315 * are loaded into memory.
9317 * This basically is almost as simple of a blocking VM, but almost as parallel
9318 * as a fully non-blocking VM.
9321 /* =================== Virtual Memory - Blocking Side ====================== */
9323 /* Create a VM pointer object. This kind of objects are used in place of
9324 * values in the key -> value hash table, for swapped out objects. */
9325 static vmpointer
*createVmPointer(int vtype
) {
9326 vmpointer
*vp
= zmalloc(sizeof(vmpointer
));
9328 vp
->type
= REDIS_VMPOINTER
;
9329 vp
->storage
= REDIS_VM_SWAPPED
;
9334 static void vmInit(void) {
9340 if (server
.vm_max_threads
!= 0)
9341 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
9343 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
9344 /* Try to open the old swap file, otherwise create it */
9345 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
9346 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
9348 if (server
.vm_fp
== NULL
) {
9349 redisLog(REDIS_WARNING
,
9350 "Can't open the swap file: %s. Exiting.",
9354 server
.vm_fd
= fileno(server
.vm_fp
);
9355 /* Lock the swap file for writing, this is useful in order to avoid
9356 * another instance to use the same swap file for a config error. */
9357 fl
.l_type
= F_WRLCK
;
9358 fl
.l_whence
= SEEK_SET
;
9359 fl
.l_start
= fl
.l_len
= 0;
9360 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
9361 redisLog(REDIS_WARNING
,
9362 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
9366 server
.vm_next_page
= 0;
9367 server
.vm_near_pages
= 0;
9368 server
.vm_stats_used_pages
= 0;
9369 server
.vm_stats_swapped_objects
= 0;
9370 server
.vm_stats_swapouts
= 0;
9371 server
.vm_stats_swapins
= 0;
9372 totsize
= server
.vm_pages
*server
.vm_page_size
;
9373 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
9374 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
9375 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
9379 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
9381 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
9382 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
9383 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
9384 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
9386 /* Initialize threaded I/O (used by Virtual Memory) */
9387 server
.io_newjobs
= listCreate();
9388 server
.io_processing
= listCreate();
9389 server
.io_processed
= listCreate();
9390 server
.io_ready_clients
= listCreate();
9391 pthread_mutex_init(&server
.io_mutex
,NULL
);
9392 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
9393 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
9394 server
.io_active_threads
= 0;
9395 if (pipe(pipefds
) == -1) {
9396 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
9400 server
.io_ready_pipe_read
= pipefds
[0];
9401 server
.io_ready_pipe_write
= pipefds
[1];
9402 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
9403 /* LZF requires a lot of stack */
9404 pthread_attr_init(&server
.io_threads_attr
);
9405 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
9406 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
9407 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
9408 /* Listen for events in the threaded I/O pipe */
9409 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
9410 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
9411 oom("creating file event");
9414 /* Mark the page as used */
9415 static void vmMarkPageUsed(off_t page
) {
9416 off_t byte
= page
/8;
9418 redisAssert(vmFreePage(page
) == 1);
9419 server
.vm_bitmap
[byte
] |= 1<<bit
;
9422 /* Mark N contiguous pages as used, with 'page' being the first. */
9423 static void vmMarkPagesUsed(off_t page
, off_t count
) {
9426 for (j
= 0; j
< count
; j
++)
9427 vmMarkPageUsed(page
+j
);
9428 server
.vm_stats_used_pages
+= count
;
9429 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
9430 (long long)count
, (long long)page
);
9433 /* Mark the page as free */
9434 static void vmMarkPageFree(off_t page
) {
9435 off_t byte
= page
/8;
9437 redisAssert(vmFreePage(page
) == 0);
9438 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
9441 /* Mark N contiguous pages as free, with 'page' being the first. */
9442 static void vmMarkPagesFree(off_t page
, off_t count
) {
9445 for (j
= 0; j
< count
; j
++)
9446 vmMarkPageFree(page
+j
);
9447 server
.vm_stats_used_pages
-= count
;
9448 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
9449 (long long)count
, (long long)page
);
9452 /* Test if the page is free */
9453 static int vmFreePage(off_t page
) {
9454 off_t byte
= page
/8;
9456 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9459 /* Find N contiguous free pages storing the first page of the cluster in *first.
9460 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9461 * REDIS_ERR is returned.
9463 * This function uses a simple algorithm: we try to allocate
9464 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9465 * again from the start of the swap file searching for free spaces.
9467 * If it looks pretty clear that there are no free pages near our offset
9468 * we try to find less populated places doing a forward jump of
9469 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9470 * without hurry, and then we jump again and so forth...
9472 * This function can be improved using a free list to avoid to guess
9473 * too much, since we could collect data about freed pages.
9475 * note: I implemented this function just after watching an episode of
9476 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9478 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9479 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9481 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9482 server
.vm_near_pages
= 0;
9483 server
.vm_next_page
= 0;
9485 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9486 base
= server
.vm_next_page
;
9488 while(offset
< server
.vm_pages
) {
9489 off_t
this = base
+offset
;
9491 /* If we overflow, restart from page zero */
9492 if (this >= server
.vm_pages
) {
9493 this -= server
.vm_pages
;
9495 /* Just overflowed, what we found on tail is no longer
9496 * interesting, as it's no longer contiguous. */
9500 if (vmFreePage(this)) {
9501 /* This is a free page */
9503 /* Already got N free pages? Return to the caller, with success */
9505 *first
= this-(n
-1);
9506 server
.vm_next_page
= this+1;
9507 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9511 /* The current one is not a free page */
9515 /* Fast-forward if the current page is not free and we already
9516 * searched enough near this place. */
9518 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9519 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9521 /* Note that even if we rewind after the jump, we are don't need
9522 * to make sure numfree is set to zero as we only jump *if* it
9523 * is set to zero. */
9525 /* Otherwise just check the next page */
9532 /* Write the specified object at the specified page of the swap file */
9533 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9534 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9535 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9536 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9537 redisLog(REDIS_WARNING
,
9538 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9542 rdbSaveObject(server
.vm_fp
,o
);
9543 fflush(server
.vm_fp
);
9544 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9548 /* Transfers the 'val' object to disk. Store all the information
9549 * a 'vmpointer' object containing all the information needed to load the
9550 * object back later is returned.
9552 * If we can't find enough contiguous empty pages to swap the object on disk
9553 * NULL is returned. */
9554 static vmpointer
*vmSwapObjectBlocking(robj
*val
) {
9555 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9559 assert(val
->storage
== REDIS_VM_MEMORY
);
9560 assert(val
->refcount
== 1);
9561 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
;
9562 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
;
9564 vp
= createVmPointer(val
->type
);
9566 vp
->usedpages
= pages
;
9567 decrRefCount(val
); /* Deallocate the object from memory. */
9568 vmMarkPagesUsed(page
,pages
);
9569 redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)",
9571 (unsigned long long) page
, (unsigned long long) pages
);
9572 server
.vm_stats_swapped_objects
++;
9573 server
.vm_stats_swapouts
++;
9577 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9580 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9581 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9582 redisLog(REDIS_WARNING
,
9583 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9587 o
= rdbLoadObject(type
,server
.vm_fp
);
9589 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9592 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9596 /* Load the specified object from swap to memory.
9597 * The newly allocated object is returned.
9599 * If preview is true the unserialized object is returned to the caller but
9600 * the pages are not marked as freed, nor the vp object is freed. */
9601 static robj
*vmGenericLoadObject(vmpointer
*vp
, int preview
) {
9604 redisAssert(vp
->type
== REDIS_VMPOINTER
&&
9605 (vp
->storage
== REDIS_VM_SWAPPED
|| vp
->storage
== REDIS_VM_LOADING
));
9606 val
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
);
9608 redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
);
9609 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9611 server
.vm_stats_swapped_objects
--;
9613 redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
);
9615 server
.vm_stats_swapins
++;
9619 /* Plain object loading, from swap to memory.
9621 * 'o' is actually a redisVmPointer structure that will be freed by the call.
9622 * The return value is the loaded object. */
9623 static robj
*vmLoadObject(robj
*o
) {
9624 /* If we are loading the object in background, stop it, we
9625 * need to load this object synchronously ASAP. */
9626 if (o
->storage
== REDIS_VM_LOADING
)
9627 vmCancelThreadedIOJob(o
);
9628 return vmGenericLoadObject((vmpointer
*)o
,0);
9631 /* Just load the value on disk, without to modify the key.
9632 * This is useful when we want to perform some operation on the value
9633 * without to really bring it from swap to memory, like while saving the
9634 * dataset or rewriting the append only log. */
9635 static robj
*vmPreviewObject(robj
*o
) {
9636 return vmGenericLoadObject((vmpointer
*)o
,1);
9639 /* How a good candidate is this object for swapping?
9640 * The better candidate it is, the greater the returned value.
9642 * Currently we try to perform a fast estimation of the object size in
9643 * memory, and combine it with aging informations.
9645 * Basically swappability = idle-time * log(estimated size)
9647 * Bigger objects are preferred over smaller objects, but not
9648 * proportionally, this is why we use the logarithm. This algorithm is
9649 * just a first try and will probably be tuned later. */
9650 static double computeObjectSwappability(robj
*o
) {
9651 /* actual age can be >= minage, but not < minage. As we use wrapping
9652 * 21 bit clocks with minutes resolution for the LRU. */
9653 time_t minage
= abs(server
.lruclock
- o
->lru
);
9657 struct dictEntry
*de
;
9660 if (minage
<= 0) return 0;
9663 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9666 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9671 listNode
*ln
= listFirst(l
);
9673 asize
= sizeof(list
);
9675 robj
*ele
= ln
->value
;
9678 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9679 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9680 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9685 z
= (o
->type
== REDIS_ZSET
);
9686 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9688 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9689 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9694 de
= dictGetRandomKey(d
);
9695 ele
= dictGetEntryKey(de
);
9696 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9697 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9698 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9699 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9703 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9704 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9705 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9706 unsigned int klen
, vlen
;
9707 unsigned char *key
, *val
;
9709 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9713 asize
= len
*(klen
+vlen
+3);
9714 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9716 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9721 de
= dictGetRandomKey(d
);
9722 ele
= dictGetEntryKey(de
);
9723 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9724 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9725 ele
= dictGetEntryVal(de
);
9726 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9727 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9728 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9733 return (double)minage
*log(1+asize
);
9736 /* Try to swap an object that's a good candidate for swapping.
9737 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9738 * to swap any object at all.
9740 * If 'usethreaded' is true, Redis will try to swap the object in background
9741 * using I/O threads. */
9742 static int vmSwapOneObject(int usethreads
) {
9744 struct dictEntry
*best
= NULL
;
9745 double best_swappability
= 0;
9746 redisDb
*best_db
= NULL
;
9750 for (j
= 0; j
< server
.dbnum
; j
++) {
9751 redisDb
*db
= server
.db
+j
;
9752 /* Why maxtries is set to 100?
9753 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9754 * are swappable objects */
9757 if (dictSize(db
->dict
) == 0) continue;
9758 for (i
= 0; i
< 5; i
++) {
9760 double swappability
;
9762 if (maxtries
) maxtries
--;
9763 de
= dictGetRandomKey(db
->dict
);
9764 val
= dictGetEntryVal(de
);
9765 /* Only swap objects that are currently in memory.
9767 * Also don't swap shared objects: not a good idea in general and
9768 * we need to ensure that the main thread does not touch the
9769 * object while the I/O thread is using it, but we can't
9770 * control other keys without adding additional mutex. */
9771 if (val
->storage
!= REDIS_VM_MEMORY
|| val
->refcount
!= 1) {
9772 if (maxtries
) i
--; /* don't count this try */
9775 swappability
= computeObjectSwappability(val
);
9776 if (!best
|| swappability
> best_swappability
) {
9778 best_swappability
= swappability
;
9783 if (best
== NULL
) return REDIS_ERR
;
9784 key
= dictGetEntryKey(best
);
9785 val
= dictGetEntryVal(best
);
9787 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9788 key
, best_swappability
);
9792 robj
*keyobj
= createStringObject(key
,sdslen(key
));
9793 vmSwapObjectThreaded(keyobj
,val
,best_db
);
9794 decrRefCount(keyobj
);
9799 if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
9800 dictGetEntryVal(best
) = vp
;
9808 static int vmSwapOneObjectBlocking() {
9809 return vmSwapOneObject(0);
9812 static int vmSwapOneObjectThreaded() {
9813 return vmSwapOneObject(1);
9816 /* Return true if it's safe to swap out objects in a given moment.
9817 * Basically we don't want to swap objects out while there is a BGSAVE
9818 * or a BGAEOREWRITE running in backgroud. */
9819 static int vmCanSwapOut(void) {
9820 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9823 /* =================== Virtual Memory - Threaded I/O ======================= */
9825 static void freeIOJob(iojob
*j
) {
9826 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9827 j
->type
== REDIS_IOJOB_DO_SWAP
||
9828 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9830 /* we fix the storage type, otherwise decrRefCount() will try to
9831 * kill the I/O thread Job (that does no longer exists). */
9832 if (j
->val
->storage
== REDIS_VM_SWAPPING
)
9833 j
->val
->storage
= REDIS_VM_MEMORY
;
9834 decrRefCount(j
->val
);
9836 decrRefCount(j
->key
);
9840 /* Every time a thread finished a Job, it writes a byte into the write side
9841 * of an unix pipe in order to "awake" the main thread, and this function
9843 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9847 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9849 REDIS_NOTUSED(mask
);
9850 REDIS_NOTUSED(privdata
);
9852 /* For every byte we read in the read side of the pipe, there is one
9853 * I/O job completed to process. */
9854 while((retval
= read(fd
,buf
,1)) == 1) {
9857 struct dictEntry
*de
;
9859 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9861 /* Get the processed element (the oldest one) */
9863 assert(listLength(server
.io_processed
) != 0);
9864 if (toprocess
== -1) {
9865 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9866 if (toprocess
<= 0) toprocess
= 1;
9868 ln
= listFirst(server
.io_processed
);
9870 listDelNode(server
.io_processed
,ln
);
9872 /* If this job is marked as canceled, just ignore it */
9877 /* Post process it in the main thread, as there are things we
9878 * can do just here to avoid race conditions and/or invasive locks */
9879 redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
);
9880 de
= dictFind(j
->db
->dict
,j
->key
->ptr
);
9881 redisAssert(de
!= NULL
);
9882 if (j
->type
== REDIS_IOJOB_LOAD
) {
9884 vmpointer
*vp
= dictGetEntryVal(de
);
9886 /* Key loaded, bring it at home */
9887 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9888 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9889 (unsigned char*) j
->key
->ptr
);
9890 server
.vm_stats_swapped_objects
--;
9891 server
.vm_stats_swapins
++;
9892 dictGetEntryVal(de
) = j
->val
;
9893 incrRefCount(j
->val
);
9895 /* Handle clients waiting for this key to be loaded. */
9896 handleClientsBlockedOnSwappedKey(db
,j
->key
);
9899 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9900 /* Now we know the amount of pages required to swap this object.
9901 * Let's find some space for it, and queue this task again
9902 * rebranded as REDIS_IOJOB_DO_SWAP. */
9903 if (!vmCanSwapOut() ||
9904 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9906 /* Ooops... no space or we can't swap as there is
9907 * a fork()ed Redis trying to save stuff on disk. */
9908 j
->val
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9911 /* Note that we need to mark this pages as used now,
9912 * if the job will be canceled, we'll mark them as freed
9914 vmMarkPagesUsed(j
->page
,j
->pages
);
9915 j
->type
= REDIS_IOJOB_DO_SWAP
;
9920 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9923 /* Key swapped. We can finally free some memory. */
9924 if (j
->val
->storage
!= REDIS_VM_SWAPPING
) {
9925 vmpointer
*vp
= (vmpointer
*) j
->id
;
9926 printf("storage: %d\n",vp
->storage
);
9927 printf("key->name: %s\n",(char*)j
->key
->ptr
);
9928 printf("val: %p\n",(void*)j
->val
);
9929 printf("val->type: %d\n",j
->val
->type
);
9930 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9932 redisAssert(j
->val
->storage
== REDIS_VM_SWAPPING
);
9933 vp
= createVmPointer(j
->val
->type
);
9935 vp
->usedpages
= j
->pages
;
9936 dictGetEntryVal(de
) = vp
;
9937 /* Fix the storage otherwise decrRefCount will attempt to
9938 * remove the associated I/O job */
9939 j
->val
->storage
= REDIS_VM_MEMORY
;
9940 decrRefCount(j
->val
);
9941 redisLog(REDIS_DEBUG
,
9942 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9943 (unsigned char*) j
->key
->ptr
,
9944 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9945 server
.vm_stats_swapped_objects
++;
9946 server
.vm_stats_swapouts
++;
9948 /* Put a few more swap requests in queue if we are still
9950 if (trytoswap
&& vmCanSwapOut() &&
9951 zmalloc_used_memory() > server
.vm_max_memory
)
9956 more
= listLength(server
.io_newjobs
) <
9957 (unsigned) server
.vm_max_threads
;
9959 /* Don't waste CPU time if swappable objects are rare. */
9960 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9968 if (processed
== toprocess
) return;
9970 if (retval
< 0 && errno
!= EAGAIN
) {
9971 redisLog(REDIS_WARNING
,
9972 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9977 static void lockThreadedIO(void) {
9978 pthread_mutex_lock(&server
.io_mutex
);
9981 static void unlockThreadedIO(void) {
9982 pthread_mutex_unlock(&server
.io_mutex
);
9985 /* Remove the specified object from the threaded I/O queue if still not
9986 * processed, otherwise make sure to flag it as canceled. */
9987 static void vmCancelThreadedIOJob(robj
*o
) {
9989 server
.io_newjobs
, /* 0 */
9990 server
.io_processing
, /* 1 */
9991 server
.io_processed
/* 2 */
9995 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9998 /* Search for a matching object in one of the queues */
9999 for (i
= 0; i
< 3; i
++) {
10003 listRewind(lists
[i
],&li
);
10004 while ((ln
= listNext(&li
)) != NULL
) {
10005 iojob
*job
= ln
->value
;
10007 if (job
->canceled
) continue; /* Skip this, already canceled. */
10008 if (job
->id
== o
) {
10009 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
10010 (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
);
10011 /* Mark the pages as free since the swap didn't happened
10012 * or happened but is now discarded. */
10013 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
10014 vmMarkPagesFree(job
->page
,job
->pages
);
10015 /* Cancel the job. It depends on the list the job is
10018 case 0: /* io_newjobs */
10019 /* If the job was yet not processed the best thing to do
10020 * is to remove it from the queue at all */
10022 listDelNode(lists
[i
],ln
);
10024 case 1: /* io_processing */
10025 /* Oh Shi- the thread is messing with the Job:
10027 * Probably it's accessing the object if this is a
10028 * PREPARE_SWAP or DO_SWAP job.
10029 * If it's a LOAD job it may be reading from disk and
10030 * if we don't wait for the job to terminate before to
10031 * cancel it, maybe in a few microseconds data can be
10032 * corrupted in this pages. So the short story is:
10034 * Better to wait for the job to move into the
10035 * next queue (processed)... */
10037 /* We try again and again until the job is completed. */
10038 unlockThreadedIO();
10039 /* But let's wait some time for the I/O thread
10040 * to finish with this job. After all this condition
10041 * should be very rare. */
10044 case 2: /* io_processed */
10045 /* The job was already processed, that's easy...
10046 * just mark it as canceled so that we'll ignore it
10047 * when processing completed jobs. */
10051 /* Finally we have to adjust the storage type of the object
10052 * in order to "UNDO" the operaiton. */
10053 if (o
->storage
== REDIS_VM_LOADING
)
10054 o
->storage
= REDIS_VM_SWAPPED
;
10055 else if (o
->storage
== REDIS_VM_SWAPPING
)
10056 o
->storage
= REDIS_VM_MEMORY
;
10057 unlockThreadedIO();
10058 redisLog(REDIS_DEBUG
,"*** DONE");
10063 unlockThreadedIO();
10064 printf("Not found: %p\n", (void*)o
);
10065 redisAssert(1 != 1); /* We should never reach this */
10068 static void *IOThreadEntryPoint(void *arg
) {
10071 REDIS_NOTUSED(arg
);
10073 pthread_detach(pthread_self());
10075 /* Get a new job to process */
10077 if (listLength(server
.io_newjobs
) == 0) {
10078 /* No new jobs in queue, exit. */
10079 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
10080 (long) pthread_self());
10081 server
.io_active_threads
--;
10082 unlockThreadedIO();
10085 ln
= listFirst(server
.io_newjobs
);
10087 listDelNode(server
.io_newjobs
,ln
);
10088 /* Add the job in the processing queue */
10089 j
->thread
= pthread_self();
10090 listAddNodeTail(server
.io_processing
,j
);
10091 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
10092 unlockThreadedIO();
10093 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
10094 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
10096 /* Process the Job */
10097 if (j
->type
== REDIS_IOJOB_LOAD
) {
10098 vmpointer
*vp
= (vmpointer
*)j
->id
;
10099 j
->val
= vmReadObjectFromSwap(j
->page
,vp
->vtype
);
10100 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
10101 FILE *fp
= fopen("/dev/null","w+");
10102 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
10104 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
10105 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
10109 /* Done: insert the job into the processed queue */
10110 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
10111 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
10113 listDelNode(server
.io_processing
,ln
);
10114 listAddNodeTail(server
.io_processed
,j
);
10115 unlockThreadedIO();
10117 /* Signal the main thread there is new stuff to process */
10118 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
10120 return NULL
; /* never reached */
10123 static void spawnIOThread(void) {
10125 sigset_t mask
, omask
;
10128 sigemptyset(&mask
);
10129 sigaddset(&mask
,SIGCHLD
);
10130 sigaddset(&mask
,SIGHUP
);
10131 sigaddset(&mask
,SIGPIPE
);
10132 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
10133 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
10134 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
10138 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
10139 server
.io_active_threads
++;
10142 /* We need to wait for the last thread to exit before we are able to
10143 * fork() in order to BGSAVE or BGREWRITEAOF. */
10144 static void waitEmptyIOJobsQueue(void) {
10146 int io_processed_len
;
10149 if (listLength(server
.io_newjobs
) == 0 &&
10150 listLength(server
.io_processing
) == 0 &&
10151 server
.io_active_threads
== 0)
10153 unlockThreadedIO();
10156 /* While waiting for empty jobs queue condition we post-process some
10157 * finshed job, as I/O threads may be hanging trying to write against
10158 * the io_ready_pipe_write FD but there are so much pending jobs that
10159 * it's blocking. */
10160 io_processed_len
= listLength(server
.io_processed
);
10161 unlockThreadedIO();
10162 if (io_processed_len
) {
10163 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
10164 usleep(1000); /* 1 millisecond */
10166 usleep(10000); /* 10 milliseconds */
10171 static void vmReopenSwapFile(void) {
10172 /* Note: we don't close the old one as we are in the child process
10173 * and don't want to mess at all with the original file object. */
10174 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
10175 if (server
.vm_fp
== NULL
) {
10176 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
10177 server
.vm_swap_file
);
10180 server
.vm_fd
= fileno(server
.vm_fp
);
10183 /* This function must be called while with threaded IO locked */
10184 static void queueIOJob(iojob
*j
) {
10185 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
10186 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
10187 listAddNodeTail(server
.io_newjobs
,j
);
10188 if (server
.io_active_threads
< server
.vm_max_threads
)
10192 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
10195 j
= zmalloc(sizeof(*j
));
10196 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
10200 j
->id
= j
->val
= val
;
10203 j
->thread
= (pthread_t
) -1;
10204 val
->storage
= REDIS_VM_SWAPPING
;
10208 unlockThreadedIO();
10212 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
10214 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
10215 * If there is not already a job loading the key, it is craeted.
10216 * The key is added to the io_keys list in the client structure, and also
10217 * in the hash table mapping swapped keys to waiting clients, that is,
10218 * server.io_waited_keys. */
10219 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
10220 struct dictEntry
*de
;
10224 /* If the key does not exist or is already in RAM we don't need to
10225 * block the client at all. */
10226 de
= dictFind(c
->db
->dict
,key
->ptr
);
10227 if (de
== NULL
) return 0;
10228 o
= dictGetEntryVal(de
);
10229 if (o
->storage
== REDIS_VM_MEMORY
) {
10231 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
10232 /* We were swapping the key, undo it! */
10233 vmCancelThreadedIOJob(o
);
10237 /* OK: the key is either swapped, or being loaded just now. */
10239 /* Add the key to the list of keys this client is waiting for.
10240 * This maps clients to keys they are waiting for. */
10241 listAddNodeTail(c
->io_keys
,key
);
10244 /* Add the client to the swapped keys => clients waiting map. */
10245 de
= dictFind(c
->db
->io_keys
,key
);
10249 /* For every key we take a list of clients blocked for it */
10251 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
10253 assert(retval
== DICT_OK
);
10255 l
= dictGetEntryVal(de
);
10257 listAddNodeTail(l
,c
);
10259 /* Are we already loading the key from disk? If not create a job */
10260 if (o
->storage
== REDIS_VM_SWAPPED
) {
10262 vmpointer
*vp
= (vmpointer
*)o
;
10264 o
->storage
= REDIS_VM_LOADING
;
10265 j
= zmalloc(sizeof(*j
));
10266 j
->type
= REDIS_IOJOB_LOAD
;
10271 j
->page
= vp
->page
;
10274 j
->thread
= (pthread_t
) -1;
10277 unlockThreadedIO();
10282 /* Preload keys for any command with first, last and step values for
10283 * the command keys prototype, as defined in the command table. */
10284 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10286 if (cmd
->vm_firstkey
== 0) return;
10287 last
= cmd
->vm_lastkey
;
10288 if (last
< 0) last
= argc
+last
;
10289 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
10290 redisAssert(j
< argc
);
10291 waitForSwappedKey(c
,argv
[j
]);
10295 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
10296 * Note that the number of keys to preload is user-defined, so we need to
10297 * apply a sanity check against argc. */
10298 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10300 REDIS_NOTUSED(cmd
);
10302 num
= atoi(argv
[2]->ptr
);
10303 if (num
> (argc
-3)) return;
10304 for (i
= 0; i
< num
; i
++) {
10305 waitForSwappedKey(c
,argv
[3+i
]);
10309 /* Preload keys needed to execute the entire MULTI/EXEC block.
10311 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
10312 * and will block the client when any command requires a swapped out value. */
10313 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10315 struct redisCommand
*mcmd
;
10317 REDIS_NOTUSED(cmd
);
10318 REDIS_NOTUSED(argc
);
10319 REDIS_NOTUSED(argv
);
10321 if (!(c
->flags
& REDIS_MULTI
)) return;
10322 for (i
= 0; i
< c
->mstate
.count
; i
++) {
10323 mcmd
= c
->mstate
.commands
[i
].cmd
;
10324 margc
= c
->mstate
.commands
[i
].argc
;
10325 margv
= c
->mstate
.commands
[i
].argv
;
10327 if (mcmd
->vm_preload_proc
!= NULL
) {
10328 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
10330 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
10335 /* Is this client attempting to run a command against swapped keys?
10336 * If so, block it ASAP, load the keys in background, then resume it.
10338 * The important idea about this function is that it can fail! If keys will
10339 * still be swapped when the client is resumed, this key lookups will
10340 * just block loading keys from disk. In practical terms this should only
10341 * happen with SORT BY command or if there is a bug in this function.
10343 * Return 1 if the client is marked as blocked, 0 if the client can
10344 * continue as the keys it is going to access appear to be in memory. */
10345 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
10346 if (cmd
->vm_preload_proc
!= NULL
) {
10347 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
10349 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
10352 /* If the client was blocked for at least one key, mark it as blocked. */
10353 if (listLength(c
->io_keys
)) {
10354 c
->flags
|= REDIS_IO_WAIT
;
10355 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
10356 server
.vm_blocked_clients
++;
10363 /* Remove the 'key' from the list of blocked keys for a given client.
10365 * The function returns 1 when there are no longer blocking keys after
10366 * the current one was removed (and the client can be unblocked). */
10367 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
10371 struct dictEntry
*de
;
10373 /* Remove the key from the list of keys this client is waiting for. */
10374 listRewind(c
->io_keys
,&li
);
10375 while ((ln
= listNext(&li
)) != NULL
) {
10376 if (equalStringObjects(ln
->value
,key
)) {
10377 listDelNode(c
->io_keys
,ln
);
10381 assert(ln
!= NULL
);
10383 /* Remove the client form the key => waiting clients map. */
10384 de
= dictFind(c
->db
->io_keys
,key
);
10385 assert(de
!= NULL
);
10386 l
= dictGetEntryVal(de
);
10387 ln
= listSearchKey(l
,c
);
10388 assert(ln
!= NULL
);
10390 if (listLength(l
) == 0)
10391 dictDelete(c
->db
->io_keys
,key
);
10393 return listLength(c
->io_keys
) == 0;
10396 /* Every time we now a key was loaded back in memory, we handle clients
10397 * waiting for this key if any. */
10398 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
10399 struct dictEntry
*de
;
10404 de
= dictFind(db
->io_keys
,key
);
10407 l
= dictGetEntryVal(de
);
10408 len
= listLength(l
);
10409 /* Note: we can't use something like while(listLength(l)) as the list
10410 * can be freed by the calling function when we remove the last element. */
10413 redisClient
*c
= ln
->value
;
10415 if (dontWaitForSwappedKey(c
,key
)) {
10416 /* Put the client in the list of clients ready to go as we
10417 * loaded all the keys about it. */
10418 listAddNodeTail(server
.io_ready_clients
,c
);
10423 /* =========================== Remote Configuration ========================= */
10425 static void configSetCommand(redisClient
*c
) {
10426 robj
*o
= getDecodedObject(c
->argv
[3]);
10429 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
10430 zfree(server
.dbfilename
);
10431 server
.dbfilename
= zstrdup(o
->ptr
);
10432 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
10433 zfree(server
.requirepass
);
10434 server
.requirepass
= zstrdup(o
->ptr
);
10435 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
10436 zfree(server
.masterauth
);
10437 server
.masterauth
= zstrdup(o
->ptr
);
10438 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
10439 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10440 ll
< 0) goto badfmt
;
10441 server
.maxmemory
= ll
;
10442 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
10443 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10444 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
10445 server
.maxidletime
= ll
;
10446 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
10447 if (!strcasecmp(o
->ptr
,"no")) {
10448 server
.appendfsync
= APPENDFSYNC_NO
;
10449 } else if (!strcasecmp(o
->ptr
,"everysec")) {
10450 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10451 } else if (!strcasecmp(o
->ptr
,"always")) {
10452 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10456 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10457 int yn
= yesnotoi(o
->ptr
);
10459 if (yn
== -1) goto badfmt
;
10460 server
.no_appendfsync_on_rewrite
= yn
;
10461 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10462 int old
= server
.appendonly
;
10463 int new = yesnotoi(o
->ptr
);
10465 if (new == -1) goto badfmt
;
10470 if (startAppendOnly() == REDIS_ERR
) {
10471 addReplySds(c
,sdscatprintf(sdsempty(),
10472 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10478 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10480 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10482 /* Perform sanity check before setting the new config:
10483 * - Even number of args
10484 * - Seconds >= 1, changes >= 0 */
10486 sdsfreesplitres(v
,vlen
);
10489 for (j
= 0; j
< vlen
; j
++) {
10493 val
= strtoll(v
[j
], &eptr
, 10);
10494 if (eptr
[0] != '\0' ||
10495 ((j
& 1) == 0 && val
< 1) ||
10496 ((j
& 1) == 1 && val
< 0)) {
10497 sdsfreesplitres(v
,vlen
);
10501 /* Finally set the new config */
10502 resetServerSaveParams();
10503 for (j
= 0; j
< vlen
; j
+= 2) {
10507 seconds
= strtoll(v
[j
],NULL
,10);
10508 changes
= strtoll(v
[j
+1],NULL
,10);
10509 appendServerSaveParams(seconds
, changes
);
10511 sdsfreesplitres(v
,vlen
);
10513 addReplySds(c
,sdscatprintf(sdsempty(),
10514 "-ERR not supported CONFIG parameter %s\r\n",
10515 (char*)c
->argv
[2]->ptr
));
10520 addReply(c
,shared
.ok
);
10523 badfmt
: /* Bad format errors */
10524 addReplySds(c
,sdscatprintf(sdsempty(),
10525 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10527 (char*)c
->argv
[2]->ptr
));
10531 static void configGetCommand(redisClient
*c
) {
10532 robj
*o
= getDecodedObject(c
->argv
[2]);
10533 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10534 char *pattern
= o
->ptr
;
10537 addReply(c
,lenobj
);
10538 decrRefCount(lenobj
);
10540 if (stringmatch(pattern
,"dbfilename",0)) {
10541 addReplyBulkCString(c
,"dbfilename");
10542 addReplyBulkCString(c
,server
.dbfilename
);
10545 if (stringmatch(pattern
,"requirepass",0)) {
10546 addReplyBulkCString(c
,"requirepass");
10547 addReplyBulkCString(c
,server
.requirepass
);
10550 if (stringmatch(pattern
,"masterauth",0)) {
10551 addReplyBulkCString(c
,"masterauth");
10552 addReplyBulkCString(c
,server
.masterauth
);
10555 if (stringmatch(pattern
,"maxmemory",0)) {
10558 ll2string(buf
,128,server
.maxmemory
);
10559 addReplyBulkCString(c
,"maxmemory");
10560 addReplyBulkCString(c
,buf
);
10563 if (stringmatch(pattern
,"timeout",0)) {
10566 ll2string(buf
,128,server
.maxidletime
);
10567 addReplyBulkCString(c
,"timeout");
10568 addReplyBulkCString(c
,buf
);
10571 if (stringmatch(pattern
,"appendonly",0)) {
10572 addReplyBulkCString(c
,"appendonly");
10573 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10576 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10577 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10578 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10581 if (stringmatch(pattern
,"appendfsync",0)) {
10584 switch(server
.appendfsync
) {
10585 case APPENDFSYNC_NO
: policy
= "no"; break;
10586 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10587 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10588 default: policy
= "unknown"; break; /* too harmless to panic */
10590 addReplyBulkCString(c
,"appendfsync");
10591 addReplyBulkCString(c
,policy
);
10594 if (stringmatch(pattern
,"save",0)) {
10595 sds buf
= sdsempty();
10598 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10599 buf
= sdscatprintf(buf
,"%ld %d",
10600 server
.saveparams
[j
].seconds
,
10601 server
.saveparams
[j
].changes
);
10602 if (j
!= server
.saveparamslen
-1)
10603 buf
= sdscatlen(buf
," ",1);
10605 addReplyBulkCString(c
,"save");
10606 addReplyBulkCString(c
,buf
);
10611 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10614 static void configCommand(redisClient
*c
) {
10615 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10616 if (c
->argc
!= 4) goto badarity
;
10617 configSetCommand(c
);
10618 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10619 if (c
->argc
!= 3) goto badarity
;
10620 configGetCommand(c
);
10621 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10622 if (c
->argc
!= 2) goto badarity
;
10623 server
.stat_numcommands
= 0;
10624 server
.stat_numconnections
= 0;
10625 server
.stat_expiredkeys
= 0;
10626 server
.stat_starttime
= time(NULL
);
10627 addReply(c
,shared
.ok
);
10629 addReplySds(c
,sdscatprintf(sdsempty(),
10630 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10635 addReplySds(c
,sdscatprintf(sdsempty(),
10636 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10637 (char*) c
->argv
[1]->ptr
));
10640 /* =========================== Pubsub implementation ======================== */
10642 static void freePubsubPattern(void *p
) {
10643 pubsubPattern
*pat
= p
;
10645 decrRefCount(pat
->pattern
);
10649 static int listMatchPubsubPattern(void *a
, void *b
) {
10650 pubsubPattern
*pa
= a
, *pb
= b
;
10652 return (pa
->client
== pb
->client
) &&
10653 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10656 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10657 * 0 if the client was already subscribed to that channel. */
10658 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10659 struct dictEntry
*de
;
10660 list
*clients
= NULL
;
10663 /* Add the channel to the client -> channels hash table */
10664 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10666 incrRefCount(channel
);
10667 /* Add the client to the channel -> list of clients hash table */
10668 de
= dictFind(server
.pubsub_channels
,channel
);
10670 clients
= listCreate();
10671 dictAdd(server
.pubsub_channels
,channel
,clients
);
10672 incrRefCount(channel
);
10674 clients
= dictGetEntryVal(de
);
10676 listAddNodeTail(clients
,c
);
10678 /* Notify the client */
10679 addReply(c
,shared
.mbulk3
);
10680 addReply(c
,shared
.subscribebulk
);
10681 addReplyBulk(c
,channel
);
10682 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10686 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10687 * 0 if the client was not subscribed to the specified channel. */
10688 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10689 struct dictEntry
*de
;
10694 /* Remove the channel from the client -> channels hash table */
10695 incrRefCount(channel
); /* channel may be just a pointer to the same object
10696 we have in the hash tables. Protect it... */
10697 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10699 /* Remove the client from the channel -> clients list hash table */
10700 de
= dictFind(server
.pubsub_channels
,channel
);
10701 assert(de
!= NULL
);
10702 clients
= dictGetEntryVal(de
);
10703 ln
= listSearchKey(clients
,c
);
10704 assert(ln
!= NULL
);
10705 listDelNode(clients
,ln
);
10706 if (listLength(clients
) == 0) {
10707 /* Free the list and associated hash entry at all if this was
10708 * the latest client, so that it will be possible to abuse
10709 * Redis PUBSUB creating millions of channels. */
10710 dictDelete(server
.pubsub_channels
,channel
);
10713 /* Notify the client */
10715 addReply(c
,shared
.mbulk3
);
10716 addReply(c
,shared
.unsubscribebulk
);
10717 addReplyBulk(c
,channel
);
10718 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10719 listLength(c
->pubsub_patterns
));
10722 decrRefCount(channel
); /* it is finally safe to release it */
10726 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10727 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10730 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10732 pubsubPattern
*pat
;
10733 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10734 incrRefCount(pattern
);
10735 pat
= zmalloc(sizeof(*pat
));
10736 pat
->pattern
= getDecodedObject(pattern
);
10738 listAddNodeTail(server
.pubsub_patterns
,pat
);
10740 /* Notify the client */
10741 addReply(c
,shared
.mbulk3
);
10742 addReply(c
,shared
.psubscribebulk
);
10743 addReplyBulk(c
,pattern
);
10744 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10748 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10749 * 0 if the client was not subscribed to the specified channel. */
10750 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10755 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10756 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10758 listDelNode(c
->pubsub_patterns
,ln
);
10760 pat
.pattern
= pattern
;
10761 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10762 listDelNode(server
.pubsub_patterns
,ln
);
10764 /* Notify the client */
10766 addReply(c
,shared
.mbulk3
);
10767 addReply(c
,shared
.punsubscribebulk
);
10768 addReplyBulk(c
,pattern
);
10769 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10770 listLength(c
->pubsub_patterns
));
10772 decrRefCount(pattern
);
10776 /* Unsubscribe from all the channels. Return the number of channels the
10777 * client was subscribed from. */
10778 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10779 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10783 while((de
= dictNext(di
)) != NULL
) {
10784 robj
*channel
= dictGetEntryKey(de
);
10786 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10788 dictReleaseIterator(di
);
10792 /* Unsubscribe from all the patterns. Return the number of patterns the
10793 * client was subscribed from. */
10794 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10799 listRewind(c
->pubsub_patterns
,&li
);
10800 while ((ln
= listNext(&li
)) != NULL
) {
10801 robj
*pattern
= ln
->value
;
10803 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10808 /* Publish a message */
10809 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10811 struct dictEntry
*de
;
10815 /* Send to clients listening for that channel */
10816 de
= dictFind(server
.pubsub_channels
,channel
);
10818 list
*list
= dictGetEntryVal(de
);
10822 listRewind(list
,&li
);
10823 while ((ln
= listNext(&li
)) != NULL
) {
10824 redisClient
*c
= ln
->value
;
10826 addReply(c
,shared
.mbulk3
);
10827 addReply(c
,shared
.messagebulk
);
10828 addReplyBulk(c
,channel
);
10829 addReplyBulk(c
,message
);
10833 /* Send to clients listening to matching channels */
10834 if (listLength(server
.pubsub_patterns
)) {
10835 listRewind(server
.pubsub_patterns
,&li
);
10836 channel
= getDecodedObject(channel
);
10837 while ((ln
= listNext(&li
)) != NULL
) {
10838 pubsubPattern
*pat
= ln
->value
;
10840 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10841 sdslen(pat
->pattern
->ptr
),
10842 (char*)channel
->ptr
,
10843 sdslen(channel
->ptr
),0)) {
10844 addReply(pat
->client
,shared
.mbulk4
);
10845 addReply(pat
->client
,shared
.pmessagebulk
);
10846 addReplyBulk(pat
->client
,pat
->pattern
);
10847 addReplyBulk(pat
->client
,channel
);
10848 addReplyBulk(pat
->client
,message
);
10852 decrRefCount(channel
);
10857 static void subscribeCommand(redisClient
*c
) {
10860 for (j
= 1; j
< c
->argc
; j
++)
10861 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10864 static void unsubscribeCommand(redisClient
*c
) {
10865 if (c
->argc
== 1) {
10866 pubsubUnsubscribeAllChannels(c
,1);
10871 for (j
= 1; j
< c
->argc
; j
++)
10872 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10876 static void psubscribeCommand(redisClient
*c
) {
10879 for (j
= 1; j
< c
->argc
; j
++)
10880 pubsubSubscribePattern(c
,c
->argv
[j
]);
10883 static void punsubscribeCommand(redisClient
*c
) {
10884 if (c
->argc
== 1) {
10885 pubsubUnsubscribeAllPatterns(c
,1);
10890 for (j
= 1; j
< c
->argc
; j
++)
10891 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10895 static void publishCommand(redisClient
*c
) {
10896 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10897 addReplyLongLong(c
,receivers
);
10900 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10902 * The implementation uses a per-DB hash table mapping keys to list of clients
10903 * WATCHing those keys, so that given a key that is going to be modified
10904 * we can mark all the associated clients as dirty.
10906 * Also every client contains a list of WATCHed keys so that's possible to
10907 * un-watch such keys when the client is freed or when UNWATCH is called. */
10909 /* In the client->watched_keys list we need to use watchedKey structures
10910 * as in order to identify a key in Redis we need both the key name and the
10912 typedef struct watchedKey
{
10917 /* Watch for the specified key */
10918 static void watchForKey(redisClient
*c
, robj
*key
) {
10919 list
*clients
= NULL
;
10924 /* Check if we are already watching for this key */
10925 listRewind(c
->watched_keys
,&li
);
10926 while((ln
= listNext(&li
))) {
10927 wk
= listNodeValue(ln
);
10928 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10929 return; /* Key already watched */
10931 /* This key is not already watched in this DB. Let's add it */
10932 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10934 clients
= listCreate();
10935 dictAdd(c
->db
->watched_keys
,key
,clients
);
10938 listAddNodeTail(clients
,c
);
10939 /* Add the new key to the lits of keys watched by this client */
10940 wk
= zmalloc(sizeof(*wk
));
10944 listAddNodeTail(c
->watched_keys
,wk
);
10947 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10948 * flag is up to the caller. */
10949 static void unwatchAllKeys(redisClient
*c
) {
10953 if (listLength(c
->watched_keys
) == 0) return;
10954 listRewind(c
->watched_keys
,&li
);
10955 while((ln
= listNext(&li
))) {
10959 /* Lookup the watched key -> clients list and remove the client
10961 wk
= listNodeValue(ln
);
10962 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10963 assert(clients
!= NULL
);
10964 listDelNode(clients
,listSearchKey(clients
,c
));
10965 /* Kill the entry at all if this was the only client */
10966 if (listLength(clients
) == 0)
10967 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10968 /* Remove this watched key from the client->watched list */
10969 listDelNode(c
->watched_keys
,ln
);
10970 decrRefCount(wk
->key
);
10975 /* "Touch" a key, so that if this key is being WATCHed by some client the
10976 * next EXEC will fail. */
10977 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10982 if (dictSize(db
->watched_keys
) == 0) return;
10983 clients
= dictFetchValue(db
->watched_keys
, key
);
10984 if (!clients
) return;
10986 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10987 /* Check if we are already watching for this key */
10988 listRewind(clients
,&li
);
10989 while((ln
= listNext(&li
))) {
10990 redisClient
*c
= listNodeValue(ln
);
10992 c
->flags
|= REDIS_DIRTY_CAS
;
10996 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10997 * flush but will be deleted as effect of the flushing operation should
10998 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10999 * a FLUSHALL operation (all the DBs flushed). */
11000 static void touchWatchedKeysOnFlush(int dbid
) {
11004 /* For every client, check all the waited keys */
11005 listRewind(server
.clients
,&li1
);
11006 while((ln
= listNext(&li1
))) {
11007 redisClient
*c
= listNodeValue(ln
);
11008 listRewind(c
->watched_keys
,&li2
);
11009 while((ln
= listNext(&li2
))) {
11010 watchedKey
*wk
= listNodeValue(ln
);
11012 /* For every watched key matching the specified DB, if the
11013 * key exists, mark the client as dirty, as the key will be
11015 if (dbid
== -1 || wk
->db
->id
== dbid
) {
11016 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
)
11017 c
->flags
|= REDIS_DIRTY_CAS
;
11023 static void watchCommand(redisClient
*c
) {
11026 if (c
->flags
& REDIS_MULTI
) {
11027 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
11030 for (j
= 1; j
< c
->argc
; j
++)
11031 watchForKey(c
,c
->argv
[j
]);
11032 addReply(c
,shared
.ok
);
11035 static void unwatchCommand(redisClient
*c
) {
11037 c
->flags
&= (~REDIS_DIRTY_CAS
);
11038 addReply(c
,shared
.ok
);
11041 /* ================================= Debugging ============================== */
11043 /* Compute the sha1 of string at 's' with 'len' bytes long.
11044 * The SHA1 is then xored againt the string pointed by digest.
11045 * Since xor is commutative, this operation is used in order to
11046 * "add" digests relative to unordered elements.
11048 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
11049 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
11051 unsigned char hash
[20], *s
= ptr
;
11055 SHA1Update(&ctx
,s
,len
);
11056 SHA1Final(hash
,&ctx
);
11058 for (j
= 0; j
< 20; j
++)
11059 digest
[j
] ^= hash
[j
];
11062 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
11063 o
= getDecodedObject(o
);
11064 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
11068 /* This function instead of just computing the SHA1 and xoring it
11069 * against diget, also perform the digest of "digest" itself and
11070 * replace the old value with the new one.
11072 * So the final digest will be:
11074 * digest = SHA1(digest xor SHA1(data))
11076 * This function is used every time we want to preserve the order so
11077 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
11079 * Also note that mixdigest("foo") followed by mixdigest("bar")
11080 * will lead to a different digest compared to "fo", "obar".
11082 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
11086 xorDigest(digest
,s
,len
);
11088 SHA1Update(&ctx
,digest
,20);
11089 SHA1Final(digest
,&ctx
);
11092 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
11093 o
= getDecodedObject(o
);
11094 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
11098 /* Compute the dataset digest. Since keys, sets elements, hashes elements
11099 * are not ordered, we use a trick: every aggregate digest is the xor
11100 * of the digests of their elements. This way the order will not change
11101 * the result. For list instead we use a feedback entering the output digest
11102 * as input in order to ensure that a different ordered list will result in
11103 * a different digest. */
11104 static void computeDatasetDigest(unsigned char *final
) {
11105 unsigned char digest
[20];
11107 dictIterator
*di
= NULL
;
11112 memset(final
,0,20); /* Start with a clean result */
11114 for (j
= 0; j
< server
.dbnum
; j
++) {
11115 redisDb
*db
= server
.db
+j
;
11117 if (dictSize(db
->dict
) == 0) continue;
11118 di
= dictGetIterator(db
->dict
);
11120 /* hash the DB id, so the same dataset moved in a different
11121 * DB will lead to a different digest */
11123 mixDigest(final
,&aux
,sizeof(aux
));
11125 /* Iterate this DB writing every entry */
11126 while((de
= dictNext(di
)) != NULL
) {
11131 memset(digest
,0,20); /* This key-val digest */
11132 key
= dictGetEntryKey(de
);
11133 keyobj
= createStringObject(key
,sdslen(key
));
11135 mixDigest(digest
,key
,sdslen(key
));
11137 /* Make sure the key is loaded if VM is active */
11138 o
= lookupKeyRead(db
,keyobj
);
11140 aux
= htonl(o
->type
);
11141 mixDigest(digest
,&aux
,sizeof(aux
));
11142 expiretime
= getExpire(db
,keyobj
);
11144 /* Save the key and associated value */
11145 if (o
->type
== REDIS_STRING
) {
11146 mixObjectDigest(digest
,o
);
11147 } else if (o
->type
== REDIS_LIST
) {
11148 listTypeIterator
*li
= listTypeInitIterator(o
,0,REDIS_TAIL
);
11149 listTypeEntry entry
;
11150 while(listTypeNext(li
,&entry
)) {
11151 robj
*eleobj
= listTypeGet(&entry
);
11152 mixObjectDigest(digest
,eleobj
);
11153 decrRefCount(eleobj
);
11155 listTypeReleaseIterator(li
);
11156 } else if (o
->type
== REDIS_SET
) {
11157 dict
*set
= o
->ptr
;
11158 dictIterator
*di
= dictGetIterator(set
);
11161 while((de
= dictNext(di
)) != NULL
) {
11162 robj
*eleobj
= dictGetEntryKey(de
);
11164 xorObjectDigest(digest
,eleobj
);
11166 dictReleaseIterator(di
);
11167 } else if (o
->type
== REDIS_ZSET
) {
11169 dictIterator
*di
= dictGetIterator(zs
->dict
);
11172 while((de
= dictNext(di
)) != NULL
) {
11173 robj
*eleobj
= dictGetEntryKey(de
);
11174 double *score
= dictGetEntryVal(de
);
11175 unsigned char eledigest
[20];
11177 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
11178 memset(eledigest
,0,20);
11179 mixObjectDigest(eledigest
,eleobj
);
11180 mixDigest(eledigest
,buf
,strlen(buf
));
11181 xorDigest(digest
,eledigest
,20);
11183 dictReleaseIterator(di
);
11184 } else if (o
->type
== REDIS_HASH
) {
11185 hashTypeIterator
*hi
;
11188 hi
= hashTypeInitIterator(o
);
11189 while (hashTypeNext(hi
) != REDIS_ERR
) {
11190 unsigned char eledigest
[20];
11192 memset(eledigest
,0,20);
11193 obj
= hashTypeCurrent(hi
,REDIS_HASH_KEY
);
11194 mixObjectDigest(eledigest
,obj
);
11196 obj
= hashTypeCurrent(hi
,REDIS_HASH_VALUE
);
11197 mixObjectDigest(eledigest
,obj
);
11199 xorDigest(digest
,eledigest
,20);
11201 hashTypeReleaseIterator(hi
);
11203 redisPanic("Unknown object type");
11205 /* If the key has an expire, add it to the mix */
11206 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
11207 /* We can finally xor the key-val digest to the final digest */
11208 xorDigest(final
,digest
,20);
11209 decrRefCount(keyobj
);
11211 dictReleaseIterator(di
);
11215 static void debugCommand(redisClient
*c
) {
11216 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
11217 *((char*)-1) = 'x';
11218 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
11219 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
11220 addReply(c
,shared
.err
);
11224 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
11225 addReply(c
,shared
.err
);
11228 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
11229 addReply(c
,shared
.ok
);
11230 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
11232 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
11233 addReply(c
,shared
.err
);
11236 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
11237 addReply(c
,shared
.ok
);
11238 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
11239 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11243 addReply(c
,shared
.nokeyerr
);
11246 val
= dictGetEntryVal(de
);
11247 if (!server
.vm_enabled
|| (val
->storage
== REDIS_VM_MEMORY
||
11248 val
->storage
== REDIS_VM_SWAPPING
)) {
11252 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
11253 strenc
= strencoding
[val
->encoding
];
11255 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
11258 addReplySds(c
,sdscatprintf(sdsempty(),
11259 "+Value at:%p refcount:%d "
11260 "encoding:%s serializedlength:%lld\r\n",
11261 (void*)val
, val
->refcount
,
11262 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
11264 vmpointer
*vp
= (vmpointer
*) val
;
11265 addReplySds(c
,sdscatprintf(sdsempty(),
11266 "+Value swapped at: page %llu "
11267 "using %llu pages\r\n",
11268 (unsigned long long) vp
->page
,
11269 (unsigned long long) vp
->usedpages
));
11271 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
11272 lookupKeyRead(c
->db
,c
->argv
[2]);
11273 addReply(c
,shared
.ok
);
11274 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
11275 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11279 if (!server
.vm_enabled
) {
11280 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
11284 addReply(c
,shared
.nokeyerr
);
11287 val
= dictGetEntryVal(de
);
11289 if (val
->storage
!= REDIS_VM_MEMORY
) {
11290 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
11291 } else if (val
->refcount
!= 1) {
11292 addReplySds(c
,sdsnew("-ERR Object is shared\r\n"));
11293 } else if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
11294 dictGetEntryVal(de
) = vp
;
11295 addReply(c
,shared
.ok
);
11297 addReply(c
,shared
.err
);
11299 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
11304 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
11306 for (j
= 0; j
< keys
; j
++) {
11307 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
11308 key
= createStringObject(buf
,strlen(buf
));
11309 if (lookupKeyRead(c
->db
,key
) != NULL
) {
11313 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
11314 val
= createStringObject(buf
,strlen(buf
));
11315 dbAdd(c
->db
,key
,val
);
11318 addReply(c
,shared
.ok
);
11319 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
11320 unsigned char digest
[20];
11321 sds d
= sdsnew("+");
11324 computeDatasetDigest(digest
);
11325 for (j
= 0; j
< 20; j
++)
11326 d
= sdscatprintf(d
, "%02x",digest
[j
]);
11328 d
= sdscatlen(d
,"\r\n",2);
11331 addReplySds(c
,sdsnew(
11332 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
11336 static void _redisAssert(char *estr
, char *file
, int line
) {
11337 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
11338 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
11339 #ifdef HAVE_BACKTRACE
11340 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11341 *((char*)-1) = 'x';
11345 static void _redisPanic(char *msg
, char *file
, int line
) {
11346 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
11347 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
11348 #ifdef HAVE_BACKTRACE
11349 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11350 *((char*)-1) = 'x';
11354 /* =================================== Main! ================================ */
11357 int linuxOvercommitMemoryValue(void) {
11358 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
11361 if (!fp
) return -1;
11362 if (fgets(buf
,64,fp
) == NULL
) {
11371 void linuxOvercommitMemoryWarning(void) {
11372 if (linuxOvercommitMemoryValue() == 0) {
11373 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
11376 #endif /* __linux__ */
11378 static void daemonize(void) {
11382 if (fork() != 0) exit(0); /* parent exits */
11383 setsid(); /* create a new session */
11385 /* Every output goes to /dev/null. If Redis is daemonized but
11386 * the 'logfile' is set to 'stdout' in the configuration file
11387 * it will not log at all. */
11388 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
11389 dup2(fd
, STDIN_FILENO
);
11390 dup2(fd
, STDOUT_FILENO
);
11391 dup2(fd
, STDERR_FILENO
);
11392 if (fd
> STDERR_FILENO
) close(fd
);
11394 /* Try to write the pid file */
11395 fp
= fopen(server
.pidfile
,"w");
11397 fprintf(fp
,"%d\n",getpid());
11402 static void version() {
11403 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
11404 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
11408 static void usage() {
11409 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
11410 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
11414 int main(int argc
, char **argv
) {
11417 initServerConfig();
11418 sortCommandTable();
11420 if (strcmp(argv
[1], "-v") == 0 ||
11421 strcmp(argv
[1], "--version") == 0) version();
11422 if (strcmp(argv
[1], "--help") == 0) usage();
11423 resetServerSaveParams();
11424 loadServerConfig(argv
[1]);
11425 } else if ((argc
> 2)) {
11428 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
11430 if (server
.daemonize
) daemonize();
11432 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
11434 linuxOvercommitMemoryWarning();
11436 start
= time(NULL
);
11437 if (server
.appendonly
) {
11438 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
11439 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
11441 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11442 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11444 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11445 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11447 aeDeleteEventLoop(server
.el
);
11451 /* ============================= Backtrace support ========================= */
11453 #ifdef HAVE_BACKTRACE
11454 static char *findFuncName(void *pointer
, unsigned long *offset
);
11456 static void *getMcontextEip(ucontext_t
*uc
) {
11457 #if defined(__FreeBSD__)
11458 return (void*) uc
->uc_mcontext
.mc_eip
;
11459 #elif defined(__dietlibc__)
11460 return (void*) uc
->uc_mcontext
.eip
;
11461 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11463 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11465 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11467 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11468 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11469 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11471 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11473 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11474 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11475 #elif defined(__ia64__) /* Linux IA64 */
11476 return (void*) uc
->uc_mcontext
.sc_ip
;
11482 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11484 char **messages
= NULL
;
11485 int i
, trace_size
= 0;
11486 unsigned long offset
=0;
11487 ucontext_t
*uc
= (ucontext_t
*) secret
;
11489 REDIS_NOTUSED(info
);
11491 redisLog(REDIS_WARNING
,
11492 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11493 infostring
= genRedisInfoString();
11494 redisLog(REDIS_WARNING
, "%s",infostring
);
11495 /* It's not safe to sdsfree() the returned string under memory
11496 * corruption conditions. Let it leak as we are going to abort */
11498 trace_size
= backtrace(trace
, 100);
11499 /* overwrite sigaction with caller's address */
11500 if (getMcontextEip(uc
) != NULL
) {
11501 trace
[1] = getMcontextEip(uc
);
11503 messages
= backtrace_symbols(trace
, trace_size
);
11505 for (i
=1; i
<trace_size
; ++i
) {
11506 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11508 p
= strchr(messages
[i
],'+');
11509 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11510 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11512 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11515 /* free(messages); Don't call free() with possibly corrupted memory. */
11519 static void sigtermHandler(int sig
) {
11520 REDIS_NOTUSED(sig
);
11522 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11523 server
.shutdown_asap
= 1;
11526 static void setupSigSegvAction(void) {
11527 struct sigaction act
;
11529 sigemptyset (&act
.sa_mask
);
11530 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11531 * is used. Otherwise, sa_handler is used */
11532 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11533 act
.sa_sigaction
= segvHandler
;
11534 sigaction (SIGSEGV
, &act
, NULL
);
11535 sigaction (SIGBUS
, &act
, NULL
);
11536 sigaction (SIGFPE
, &act
, NULL
);
11537 sigaction (SIGILL
, &act
, NULL
);
11538 sigaction (SIGBUS
, &act
, NULL
);
11540 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11541 act
.sa_handler
= sigtermHandler
;
11542 sigaction (SIGTERM
, &act
, NULL
);
11546 #include "staticsymbols.h"
11547 /* This function try to convert a pointer into a function name. It's used in
11548 * oreder to provide a backtrace under segmentation fault that's able to
11549 * display functions declared as static (otherwise the backtrace is useless). */
11550 static char *findFuncName(void *pointer
, unsigned long *offset
){
11552 unsigned long off
, minoff
= 0;
11554 /* Try to match against the Symbol with the smallest offset */
11555 for (i
=0; symsTable
[i
].pointer
; i
++) {
11556 unsigned long lp
= (unsigned long) pointer
;
11558 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11559 off
=lp
-symsTable
[i
].pointer
;
11560 if (ret
< 0 || off
< minoff
) {
11566 if (ret
== -1) return NULL
;
11568 return symsTable
[ret
].name
;
11570 #else /* HAVE_BACKTRACE */
11571 static void setupSigSegvAction(void) {
11573 #endif /* HAVE_BACKTRACE */