2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.10"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
64 #include "solarisfixes.h"
68 #include "ae.h" /* Event driven programming library */
69 #include "sds.h" /* Dynamic safe strings */
70 #include "anet.h" /* Networking the easy way */
71 #include "dict.h" /* Hash tables */
72 #include "adlist.h" /* Linked lists */
73 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
74 #include "lzf.h" /* LZF compression library */
75 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
82 /* Static server configuration */
83 #define REDIS_SERVERPORT 6379 /* TCP port */
84 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
85 #define REDIS_IOBUF_LEN 1024
86 #define REDIS_LOADBUF_LEN 1024
87 #define REDIS_STATIC_ARGS 8
88 #define REDIS_DEFAULT_DBNUM 16
89 #define REDIS_CONFIGLINE_MAX 1024
90 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
91 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
92 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
93 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
94 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
96 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
97 #define REDIS_WRITEV_THRESHOLD 3
98 /* Max number of iovecs used for each writev call */
99 #define REDIS_WRITEV_IOVEC_COUNT 256
101 /* Hash table parameters */
102 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
105 #define REDIS_CMD_BULK 1 /* Bulk write command */
106 #define REDIS_CMD_INLINE 2 /* Inline command */
107 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
108 this flags will return an error when the 'maxmemory' option is set in the
109 config file and the server is using more than maxmemory bytes of memory.
110 In short this commands are denied on low memory conditions. */
111 #define REDIS_CMD_DENYOOM 4
112 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
115 #define REDIS_STRING 0
121 /* Objects encoding. Some kind of objects like Strings and Hashes can be
122 * internally represented in multiple ways. The 'encoding' field of the object
123 * is set to one of this fields for this object. */
124 #define REDIS_ENCODING_RAW 0 /* Raw representation */
125 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
126 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
127 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
129 static char* strencoding
[] = {
130 "raw", "int", "zipmap", "hashtable"
133 /* Object types only used for dumping to disk */
134 #define REDIS_EXPIRETIME 253
135 #define REDIS_SELECTDB 254
136 #define REDIS_EOF 255
138 /* Defines related to the dump file format. To store 32 bits lengths for short
139 * keys requires a lot of space, so we check the most significant 2 bits of
140 * the first byte to interpreter the length:
142 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
143 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
144 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
145 * 11|000000 this means: specially encoded object will follow. The six bits
146 * number specify the kind of object that follows.
147 * See the REDIS_RDB_ENC_* defines.
149 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
150 * values, will fit inside. */
151 #define REDIS_RDB_6BITLEN 0
152 #define REDIS_RDB_14BITLEN 1
153 #define REDIS_RDB_32BITLEN 2
154 #define REDIS_RDB_ENCVAL 3
155 #define REDIS_RDB_LENERR UINT_MAX
157 /* When a length of a string object stored on disk has the first two bits
158 * set, the remaining two bits specify a special encoding for the object
159 * accordingly to the following defines: */
160 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
161 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
162 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
163 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
165 /* Virtual memory object->where field. */
166 #define REDIS_VM_MEMORY 0 /* The object is on memory */
167 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
168 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
169 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
171 /* Virtual memory static configuration stuff.
172 * Check vmFindContiguousPages() to know more about this magic numbers. */
173 #define REDIS_VM_MAX_NEAR_PAGES 65536
174 #define REDIS_VM_MAX_RANDOM_JUMP 4096
175 #define REDIS_VM_MAX_THREADS 32
176 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
177 /* The following is the *percentage* of completed I/O jobs to process when the
178 * handelr is called. While Virtual Memory I/O operations are performed by
179 * threads, this operations must be processed by the main thread when completed
180 * in order to take effect. */
181 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
184 #define REDIS_SLAVE 1 /* This client is a slave server */
185 #define REDIS_MASTER 2 /* This client is a master server */
186 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
187 #define REDIS_MULTI 8 /* This client is in a MULTI context */
188 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
189 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
191 /* Slave replication state - slave side */
192 #define REDIS_REPL_NONE 0 /* No active replication */
193 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
194 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
196 /* Slave replication state - from the point of view of master
197 * Note that in SEND_BULK and ONLINE state the slave receives new updates
198 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
199 * to start the next background saving in order to send updates to it. */
200 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
201 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
202 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
203 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
205 /* List related stuff */
209 /* Sort operations */
210 #define REDIS_SORT_GET 0
211 #define REDIS_SORT_ASC 1
212 #define REDIS_SORT_DESC 2
213 #define REDIS_SORTKEY_MAX 1024
216 #define REDIS_DEBUG 0
217 #define REDIS_VERBOSE 1
218 #define REDIS_NOTICE 2
219 #define REDIS_WARNING 3
221 /* Anti-warning macro... */
222 #define REDIS_NOTUSED(V) ((void) V)
224 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
225 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
227 /* Append only defines */
228 #define APPENDFSYNC_NO 0
229 #define APPENDFSYNC_ALWAYS 1
230 #define APPENDFSYNC_EVERYSEC 2
232 /* Hashes related defaults */
233 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
234 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
236 /* We can print the stacktrace, so our assert is defined this way: */
237 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
238 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
239 static void _redisAssert(char *estr
, char *file
, int line
);
240 static void _redisPanic(char *msg
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 #define REDIS_SHARED_INTEGERS 10000
509 struct sharedObjectsStruct
{
510 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
511 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
512 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
513 *outofrangeerr
, *plus
,
514 *select0
, *select1
, *select2
, *select3
, *select4
,
515 *select5
, *select6
, *select7
, *select8
, *select9
,
516 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
517 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
518 *integers
[REDIS_SHARED_INTEGERS
];
521 /* Global vars that are actally used as constants. The following double
522 * values are used for double on-disk serialization, and are initialized
523 * at runtime to avoid strange compiler optimizations. */
525 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
527 /* VM threaded I/O request message */
528 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
529 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
530 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
531 typedef struct iojob
{
532 int type
; /* Request type, REDIS_IOJOB_* */
533 redisDb
*db
;/* Redis database */
534 robj
*key
; /* This I/O request is about swapping this key */
535 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
536 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
537 off_t page
; /* Swap page where to read/write the object */
538 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
539 int canceled
; /* True if this command was canceled by blocking side of VM */
540 pthread_t thread
; /* ID of the thread processing this entry */
543 /*================================ Prototypes =============================== */
545 static void freeStringObject(robj
*o
);
546 static void freeListObject(robj
*o
);
547 static void freeSetObject(robj
*o
);
548 static void decrRefCount(void *o
);
549 static robj
*createObject(int type
, void *ptr
);
550 static void freeClient(redisClient
*c
);
551 static int rdbLoad(char *filename
);
552 static void addReply(redisClient
*c
, robj
*obj
);
553 static void addReplySds(redisClient
*c
, sds s
);
554 static void incrRefCount(robj
*o
);
555 static int rdbSaveBackground(char *filename
);
556 static robj
*createStringObject(char *ptr
, size_t len
);
557 static robj
*dupStringObject(robj
*o
);
558 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
559 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
560 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
561 static int syncWithMaster(void);
562 static robj
*tryObjectEncoding(robj
*o
);
563 static robj
*getDecodedObject(robj
*o
);
564 static int removeExpire(redisDb
*db
, robj
*key
);
565 static int expireIfNeeded(redisDb
*db
, robj
*key
);
566 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
567 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
568 static int deleteKey(redisDb
*db
, robj
*key
);
569 static time_t getExpire(redisDb
*db
, robj
*key
);
570 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
571 static void updateSlavesWaitingBgsave(int bgsaveerr
);
572 static void freeMemoryIfNeeded(void);
573 static int processCommand(redisClient
*c
);
574 static void setupSigSegvAction(void);
575 static void rdbRemoveTempFile(pid_t childpid
);
576 static void aofRemoveTempFile(pid_t childpid
);
577 static size_t stringObjectLen(robj
*o
);
578 static void processInputBuffer(redisClient
*c
);
579 static zskiplist
*zslCreate(void);
580 static void zslFree(zskiplist
*zsl
);
581 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
582 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
583 static void initClientMultiState(redisClient
*c
);
584 static void freeClientMultiState(redisClient
*c
);
585 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
586 static void unblockClientWaitingData(redisClient
*c
);
587 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
588 static void vmInit(void);
589 static void vmMarkPagesFree(off_t page
, off_t count
);
590 static robj
*vmLoadObject(robj
*key
);
591 static robj
*vmPreviewObject(robj
*key
);
592 static int vmSwapOneObjectBlocking(void);
593 static int vmSwapOneObjectThreaded(void);
594 static int vmCanSwapOut(void);
595 static int tryFreeOneObjectFromFreelist(void);
596 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
597 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
598 static void vmCancelThreadedIOJob(robj
*o
);
599 static void lockThreadedIO(void);
600 static void unlockThreadedIO(void);
601 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
602 static void freeIOJob(iojob
*j
);
603 static void queueIOJob(iojob
*j
);
604 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
605 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
606 static void waitEmptyIOJobsQueue(void);
607 static void vmReopenSwapFile(void);
608 static int vmFreePage(off_t page
);
609 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
610 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
611 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
612 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
613 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
614 static struct redisCommand
*lookupCommand(char *name
);
615 static void call(redisClient
*c
, struct redisCommand
*cmd
);
616 static void resetClient(redisClient
*c
);
617 static void convertToRealHash(robj
*o
);
618 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
619 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
620 static void freePubsubPattern(void *p
);
621 static int listMatchPubsubPattern(void *a
, void *b
);
622 static int compareStringObjects(robj
*a
, robj
*b
);
624 static int rewriteAppendOnlyFileBackground(void);
626 static void authCommand(redisClient
*c
);
627 static void pingCommand(redisClient
*c
);
628 static void echoCommand(redisClient
*c
);
629 static void setCommand(redisClient
*c
);
630 static void setnxCommand(redisClient
*c
);
631 static void setexCommand(redisClient
*c
);
632 static void getCommand(redisClient
*c
);
633 static void delCommand(redisClient
*c
);
634 static void existsCommand(redisClient
*c
);
635 static void incrCommand(redisClient
*c
);
636 static void decrCommand(redisClient
*c
);
637 static void incrbyCommand(redisClient
*c
);
638 static void decrbyCommand(redisClient
*c
);
639 static void selectCommand(redisClient
*c
);
640 static void randomkeyCommand(redisClient
*c
);
641 static void keysCommand(redisClient
*c
);
642 static void dbsizeCommand(redisClient
*c
);
643 static void lastsaveCommand(redisClient
*c
);
644 static void saveCommand(redisClient
*c
);
645 static void bgsaveCommand(redisClient
*c
);
646 static void bgrewriteaofCommand(redisClient
*c
);
647 static void shutdownCommand(redisClient
*c
);
648 static void moveCommand(redisClient
*c
);
649 static void renameCommand(redisClient
*c
);
650 static void renamenxCommand(redisClient
*c
);
651 static void lpushCommand(redisClient
*c
);
652 static void rpushCommand(redisClient
*c
);
653 static void lpopCommand(redisClient
*c
);
654 static void rpopCommand(redisClient
*c
);
655 static void llenCommand(redisClient
*c
);
656 static void lindexCommand(redisClient
*c
);
657 static void lrangeCommand(redisClient
*c
);
658 static void ltrimCommand(redisClient
*c
);
659 static void typeCommand(redisClient
*c
);
660 static void lsetCommand(redisClient
*c
);
661 static void saddCommand(redisClient
*c
);
662 static void sremCommand(redisClient
*c
);
663 static void smoveCommand(redisClient
*c
);
664 static void sismemberCommand(redisClient
*c
);
665 static void scardCommand(redisClient
*c
);
666 static void spopCommand(redisClient
*c
);
667 static void srandmemberCommand(redisClient
*c
);
668 static void sinterCommand(redisClient
*c
);
669 static void sinterstoreCommand(redisClient
*c
);
670 static void sunionCommand(redisClient
*c
);
671 static void sunionstoreCommand(redisClient
*c
);
672 static void sdiffCommand(redisClient
*c
);
673 static void sdiffstoreCommand(redisClient
*c
);
674 static void syncCommand(redisClient
*c
);
675 static void flushdbCommand(redisClient
*c
);
676 static void flushallCommand(redisClient
*c
);
677 static void sortCommand(redisClient
*c
);
678 static void lremCommand(redisClient
*c
);
679 static void rpoplpushcommand(redisClient
*c
);
680 static void infoCommand(redisClient
*c
);
681 static void mgetCommand(redisClient
*c
);
682 static void monitorCommand(redisClient
*c
);
683 static void expireCommand(redisClient
*c
);
684 static void expireatCommand(redisClient
*c
);
685 static void getsetCommand(redisClient
*c
);
686 static void ttlCommand(redisClient
*c
);
687 static void slaveofCommand(redisClient
*c
);
688 static void debugCommand(redisClient
*c
);
689 static void msetCommand(redisClient
*c
);
690 static void msetnxCommand(redisClient
*c
);
691 static void zaddCommand(redisClient
*c
);
692 static void zincrbyCommand(redisClient
*c
);
693 static void zrangeCommand(redisClient
*c
);
694 static void zrangebyscoreCommand(redisClient
*c
);
695 static void zcountCommand(redisClient
*c
);
696 static void zrevrangeCommand(redisClient
*c
);
697 static void zcardCommand(redisClient
*c
);
698 static void zremCommand(redisClient
*c
);
699 static void zscoreCommand(redisClient
*c
);
700 static void zremrangebyscoreCommand(redisClient
*c
);
701 static void multiCommand(redisClient
*c
);
702 static void execCommand(redisClient
*c
);
703 static void discardCommand(redisClient
*c
);
704 static void blpopCommand(redisClient
*c
);
705 static void brpopCommand(redisClient
*c
);
706 static void appendCommand(redisClient
*c
);
707 static void substrCommand(redisClient
*c
);
708 static void zrankCommand(redisClient
*c
);
709 static void zrevrankCommand(redisClient
*c
);
710 static void hsetCommand(redisClient
*c
);
711 static void hsetnxCommand(redisClient
*c
);
712 static void hgetCommand(redisClient
*c
);
713 static void hmsetCommand(redisClient
*c
);
714 static void hmgetCommand(redisClient
*c
);
715 static void hdelCommand(redisClient
*c
);
716 static void hlenCommand(redisClient
*c
);
717 static void zremrangebyrankCommand(redisClient
*c
);
718 static void zunionCommand(redisClient
*c
);
719 static void zinterCommand(redisClient
*c
);
720 static void hkeysCommand(redisClient
*c
);
721 static void hvalsCommand(redisClient
*c
);
722 static void hgetallCommand(redisClient
*c
);
723 static void hexistsCommand(redisClient
*c
);
724 static void configCommand(redisClient
*c
);
725 static void hincrbyCommand(redisClient
*c
);
726 static void subscribeCommand(redisClient
*c
);
727 static void unsubscribeCommand(redisClient
*c
);
728 static void psubscribeCommand(redisClient
*c
);
729 static void punsubscribeCommand(redisClient
*c
);
730 static void publishCommand(redisClient
*c
);
732 /*================================= Globals ================================= */
735 static struct redisServer server
; /* server global state */
736 static struct redisCommand cmdTable
[] = {
737 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
738 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
739 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
740 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
741 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
742 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
743 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
744 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
746 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
747 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
748 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
749 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
750 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
754 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
757 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
758 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
759 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
760 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
761 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
762 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
763 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
764 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
765 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
766 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
767 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
768 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
769 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
770 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
771 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
772 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
773 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
774 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
775 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
776 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
777 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
778 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
781 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
782 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
783 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
784 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
786 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
787 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
788 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
789 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
791 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
792 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
793 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
794 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
795 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
797 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
800 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
801 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
802 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
803 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
804 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
805 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
806 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
807 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
810 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
811 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
812 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
819 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
825 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
827 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
832 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
835 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
838 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
840 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
841 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
842 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
843 {NULL
,NULL
,0,0,NULL
,0,0,0}
846 /*============================ Utility functions ============================ */
848 /* Glob-style pattern matching. */
849 static int stringmatchlen(const char *pattern
, int patternLen
,
850 const char *string
, int stringLen
, int nocase
)
855 while (pattern
[1] == '*') {
860 return 1; /* match */
862 if (stringmatchlen(pattern
+1, patternLen
-1,
863 string
, stringLen
, nocase
))
864 return 1; /* match */
868 return 0; /* no match */
872 return 0; /* no match */
882 not = pattern
[0] == '^';
889 if (pattern
[0] == '\\') {
892 if (pattern
[0] == string
[0])
894 } else if (pattern
[0] == ']') {
896 } else if (patternLen
== 0) {
900 } else if (pattern
[1] == '-' && patternLen
>= 3) {
901 int start
= pattern
[0];
902 int end
= pattern
[2];
910 start
= tolower(start
);
916 if (c
>= start
&& c
<= end
)
920 if (pattern
[0] == string
[0])
923 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
933 return 0; /* no match */
939 if (patternLen
>= 2) {
946 if (pattern
[0] != string
[0])
947 return 0; /* no match */
949 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
950 return 0; /* no match */
958 if (stringLen
== 0) {
959 while(*pattern
== '*') {
966 if (patternLen
== 0 && stringLen
== 0)
971 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
972 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
975 /* Convert a string representing an amount of memory into the number of
976 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
979 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
981 static long long memtoll(const char *p
, int *err
) {
984 long mul
; /* unit multiplier */
989 /* Search the first non digit character. */
992 while(*u
&& isdigit(*u
)) u
++;
993 if (*u
== '\0' || !strcasecmp(u
,"b")) {
995 } else if (!strcasecmp(u
,"k")) {
997 } else if (!strcasecmp(u
,"kb")) {
999 } else if (!strcasecmp(u
,"m")) {
1001 } else if (!strcasecmp(u
,"mb")) {
1003 } else if (!strcasecmp(u
,"g")) {
1004 mul
= 1000L*1000*1000;
1005 } else if (!strcasecmp(u
,"gb")) {
1006 mul
= 1024L*1024*1024;
1012 if (digits
>= sizeof(buf
)) {
1016 memcpy(buf
,p
,digits
);
1018 val
= strtoll(buf
,NULL
,10);
1022 static void redisLog(int level
, const char *fmt
, ...) {
1026 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1030 if (level
>= server
.verbosity
) {
1036 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1037 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1038 vfprintf(fp
, fmt
, ap
);
1044 if (server
.logfile
) fclose(fp
);
1047 /*====================== Hash table type implementation ==================== */
1049 /* This is an hash table type that uses the SDS dynamic strings libary as
1050 * keys and radis objects as values (objects can hold SDS strings,
1053 static void dictVanillaFree(void *privdata
, void *val
)
1055 DICT_NOTUSED(privdata
);
1059 static void dictListDestructor(void *privdata
, void *val
)
1061 DICT_NOTUSED(privdata
);
1062 listRelease((list
*)val
);
1065 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1069 DICT_NOTUSED(privdata
);
1071 l1
= sdslen((sds
)key1
);
1072 l2
= sdslen((sds
)key2
);
1073 if (l1
!= l2
) return 0;
1074 return memcmp(key1
, key2
, l1
) == 0;
1077 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1079 DICT_NOTUSED(privdata
);
1081 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1085 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1088 const robj
*o1
= key1
, *o2
= key2
;
1089 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1092 static unsigned int dictObjHash(const void *key
) {
1093 const robj
*o
= key
;
1094 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1097 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1100 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1103 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1104 o2
->encoding
== REDIS_ENCODING_INT
&&
1105 o1
->ptr
== o2
->ptr
) return 1;
1107 o1
= getDecodedObject(o1
);
1108 o2
= getDecodedObject(o2
);
1109 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1115 static unsigned int dictEncObjHash(const void *key
) {
1116 robj
*o
= (robj
*) key
;
1118 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1119 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1121 if (o
->encoding
== REDIS_ENCODING_INT
) {
1125 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1126 return dictGenHashFunction((unsigned char*)buf
, len
);
1130 o
= getDecodedObject(o
);
1131 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1138 /* Sets type and expires */
1139 static dictType setDictType
= {
1140 dictEncObjHash
, /* hash function */
1143 dictEncObjKeyCompare
, /* key compare */
1144 dictRedisObjectDestructor
, /* key destructor */
1145 NULL
/* val destructor */
1148 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1149 static dictType zsetDictType
= {
1150 dictEncObjHash
, /* hash function */
1153 dictEncObjKeyCompare
, /* key compare */
1154 dictRedisObjectDestructor
, /* key destructor */
1155 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1159 static dictType dbDictType
= {
1160 dictObjHash
, /* hash function */
1163 dictObjKeyCompare
, /* key compare */
1164 dictRedisObjectDestructor
, /* key destructor */
1165 dictRedisObjectDestructor
/* val destructor */
1169 static dictType keyptrDictType
= {
1170 dictObjHash
, /* hash function */
1173 dictObjKeyCompare
, /* key compare */
1174 dictRedisObjectDestructor
, /* key destructor */
1175 NULL
/* val destructor */
1178 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1179 static dictType hashDictType
= {
1180 dictEncObjHash
, /* hash function */
1183 dictEncObjKeyCompare
, /* key compare */
1184 dictRedisObjectDestructor
, /* key destructor */
1185 dictRedisObjectDestructor
/* val destructor */
1188 /* Keylist hash table type has unencoded redis objects as keys and
1189 * lists as values. It's used for blocking operations (BLPOP) and to
1190 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1191 static dictType keylistDictType
= {
1192 dictObjHash
, /* hash function */
1195 dictObjKeyCompare
, /* key compare */
1196 dictRedisObjectDestructor
, /* key destructor */
1197 dictListDestructor
/* val destructor */
1200 static void version();
1202 /* ========================= Random utility functions ======================= */
1204 /* Redis generally does not try to recover from out of memory conditions
1205 * when allocating objects or strings, it is not clear if it will be possible
1206 * to report this condition to the client since the networking layer itself
1207 * is based on heap allocation for send buffers, so we simply abort.
1208 * At least the code will be simpler to read... */
1209 static void oom(const char *msg
) {
1210 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1215 /* ====================== Redis server networking stuff ===================== */
1216 static void closeTimedoutClients(void) {
1219 time_t now
= time(NULL
);
1222 listRewind(server
.clients
,&li
);
1223 while ((ln
= listNext(&li
)) != NULL
) {
1224 c
= listNodeValue(ln
);
1225 if (server
.maxidletime
&&
1226 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1227 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1228 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1229 listLength(c
->pubsub_patterns
) == 0 &&
1230 (now
- c
->lastinteraction
> server
.maxidletime
))
1232 redisLog(REDIS_VERBOSE
,"Closing idle client");
1234 } else if (c
->flags
& REDIS_BLOCKED
) {
1235 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1236 addReply(c
,shared
.nullmultibulk
);
1237 unblockClientWaitingData(c
);
1243 static int htNeedsResize(dict
*dict
) {
1244 long long size
, used
;
1246 size
= dictSlots(dict
);
1247 used
= dictSize(dict
);
1248 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1249 (used
*100/size
< REDIS_HT_MINFILL
));
1252 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1253 * we resize the hash table to save memory */
1254 static void tryResizeHashTables(void) {
1257 for (j
= 0; j
< server
.dbnum
; j
++) {
1258 if (htNeedsResize(server
.db
[j
].dict
))
1259 dictResize(server
.db
[j
].dict
);
1260 if (htNeedsResize(server
.db
[j
].expires
))
1261 dictResize(server
.db
[j
].expires
);
1265 /* Our hash table implementation performs rehashing incrementally while
1266 * we write/read from the hash table. Still if the server is idle, the hash
1267 * table will use two tables for a long time. So we try to use 1 millisecond
1268 * of CPU time at every serverCron() loop in order to rehash some key. */
1269 static void incrementallyRehash(void) {
1272 for (j
= 0; j
< server
.dbnum
; j
++) {
1273 if (dictIsRehashing(server
.db
[j
].dict
)) {
1274 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1275 break; /* already used our millisecond for this loop... */
1280 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1281 void backgroundSaveDoneHandler(int statloc
) {
1282 int exitcode
= WEXITSTATUS(statloc
);
1283 int bysignal
= WIFSIGNALED(statloc
);
1285 if (!bysignal
&& exitcode
== 0) {
1286 redisLog(REDIS_NOTICE
,
1287 "Background saving terminated with success");
1289 server
.lastsave
= time(NULL
);
1290 } else if (!bysignal
&& exitcode
!= 0) {
1291 redisLog(REDIS_WARNING
, "Background saving error");
1293 redisLog(REDIS_WARNING
,
1294 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1295 rdbRemoveTempFile(server
.bgsavechildpid
);
1297 server
.bgsavechildpid
= -1;
1298 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1299 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1300 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1303 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1305 void backgroundRewriteDoneHandler(int statloc
) {
1306 int exitcode
= WEXITSTATUS(statloc
);
1307 int bysignal
= WIFSIGNALED(statloc
);
1309 if (!bysignal
&& exitcode
== 0) {
1313 redisLog(REDIS_NOTICE
,
1314 "Background append only file rewriting terminated with success");
1315 /* Now it's time to flush the differences accumulated by the parent */
1316 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1317 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1319 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1322 /* Flush our data... */
1323 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1324 (signed) sdslen(server
.bgrewritebuf
)) {
1325 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1329 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1330 /* Now our work is to rename the temp file into the stable file. And
1331 * switch the file descriptor used by the server for append only. */
1332 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1333 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1337 /* Mission completed... almost */
1338 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1339 if (server
.appendfd
!= -1) {
1340 /* If append only is actually enabled... */
1341 close(server
.appendfd
);
1342 server
.appendfd
= fd
;
1344 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1345 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1347 /* If append only is disabled we just generate a dump in this
1348 * format. Why not? */
1351 } else if (!bysignal
&& exitcode
!= 0) {
1352 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1354 redisLog(REDIS_WARNING
,
1355 "Background append only file rewriting terminated by signal %d",
1359 sdsfree(server
.bgrewritebuf
);
1360 server
.bgrewritebuf
= sdsempty();
1361 aofRemoveTempFile(server
.bgrewritechildpid
);
1362 server
.bgrewritechildpid
= -1;
1365 /* This function is called once a background process of some kind terminates,
1366 * as we want to avoid resizing the hash tables when there is a child in order
1367 * to play well with copy-on-write (otherwise when a resize happens lots of
1368 * memory pages are copied). The goal of this function is to update the ability
1369 * for dict.c to resize the hash tables accordingly to the fact we have o not
1370 * running childs. */
1371 static void updateDictResizePolicy(void) {
1372 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1375 dictDisableResize();
1378 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1379 int j
, loops
= server
.cronloops
++;
1380 REDIS_NOTUSED(eventLoop
);
1382 REDIS_NOTUSED(clientData
);
1384 /* We take a cached value of the unix time in the global state because
1385 * with virtual memory and aging there is to store the current time
1386 * in objects at every object access, and accuracy is not needed.
1387 * To access a global var is faster than calling time(NULL) */
1388 server
.unixtime
= time(NULL
);
1390 /* Show some info about non-empty databases */
1391 for (j
= 0; j
< server
.dbnum
; j
++) {
1392 long long size
, used
, vkeys
;
1394 size
= dictSlots(server
.db
[j
].dict
);
1395 used
= dictSize(server
.db
[j
].dict
);
1396 vkeys
= dictSize(server
.db
[j
].expires
);
1397 if (!(loops
% 50) && (used
|| vkeys
)) {
1398 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1399 /* dictPrintStats(server.dict); */
1403 /* We don't want to resize the hash tables while a bacground saving
1404 * is in progress: the saving child is created using fork() that is
1405 * implemented with a copy-on-write semantic in most modern systems, so
1406 * if we resize the HT while there is the saving child at work actually
1407 * a lot of memory movements in the parent will cause a lot of pages
1409 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1410 if (!(loops
% 10)) tryResizeHashTables();
1411 if (server
.activerehashing
) incrementallyRehash();
1414 /* Show information about connected clients */
1415 if (!(loops
% 50)) {
1416 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1417 listLength(server
.clients
)-listLength(server
.slaves
),
1418 listLength(server
.slaves
),
1419 zmalloc_used_memory());
1422 /* Close connections of timedout clients */
1423 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1424 closeTimedoutClients();
1426 /* Check if a background saving or AOF rewrite in progress terminated */
1427 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1431 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1432 if (pid
== server
.bgsavechildpid
) {
1433 backgroundSaveDoneHandler(statloc
);
1435 backgroundRewriteDoneHandler(statloc
);
1437 updateDictResizePolicy();
1440 /* If there is not a background saving in progress check if
1441 * we have to save now */
1442 time_t now
= time(NULL
);
1443 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1444 struct saveparam
*sp
= server
.saveparams
+j
;
1446 if (server
.dirty
>= sp
->changes
&&
1447 now
-server
.lastsave
> sp
->seconds
) {
1448 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1449 sp
->changes
, sp
->seconds
);
1450 rdbSaveBackground(server
.dbfilename
);
1456 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1457 * will use few CPU cycles if there are few expiring keys, otherwise
1458 * it will get more aggressive to avoid that too much memory is used by
1459 * keys that can be removed from the keyspace. */
1460 for (j
= 0; j
< server
.dbnum
; j
++) {
1462 redisDb
*db
= server
.db
+j
;
1464 /* Continue to expire if at the end of the cycle more than 25%
1465 * of the keys were expired. */
1467 long num
= dictSize(db
->expires
);
1468 time_t now
= time(NULL
);
1471 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1472 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1477 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1478 t
= (time_t) dictGetEntryVal(de
);
1480 deleteKey(db
,dictGetEntryKey(de
));
1482 server
.stat_expiredkeys
++;
1485 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1488 /* Swap a few keys on disk if we are over the memory limit and VM
1489 * is enbled. Try to free objects from the free list first. */
1490 if (vmCanSwapOut()) {
1491 while (server
.vm_enabled
&& zmalloc_used_memory() >
1492 server
.vm_max_memory
)
1496 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1497 retval
= (server
.vm_max_threads
== 0) ?
1498 vmSwapOneObjectBlocking() :
1499 vmSwapOneObjectThreaded();
1500 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1501 zmalloc_used_memory() >
1502 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1504 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1506 /* Note that when using threade I/O we free just one object,
1507 * because anyway when the I/O thread in charge to swap this
1508 * object out will finish, the handler of completed jobs
1509 * will try to swap more objects if we are still out of memory. */
1510 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1514 /* Check if we should connect to a MASTER */
1515 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1516 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1517 if (syncWithMaster() == REDIS_OK
) {
1518 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1519 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1525 /* This function gets called every time Redis is entering the
1526 * main loop of the event driven library, that is, before to sleep
1527 * for ready file descriptors. */
1528 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1529 REDIS_NOTUSED(eventLoop
);
1531 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1535 listRewind(server
.io_ready_clients
,&li
);
1536 while((ln
= listNext(&li
))) {
1537 redisClient
*c
= ln
->value
;
1538 struct redisCommand
*cmd
;
1540 /* Resume the client. */
1541 listDelNode(server
.io_ready_clients
,ln
);
1542 c
->flags
&= (~REDIS_IO_WAIT
);
1543 server
.vm_blocked_clients
--;
1544 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1545 readQueryFromClient
, c
);
1546 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1547 assert(cmd
!= NULL
);
1550 /* There may be more data to process in the input buffer. */
1551 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1552 processInputBuffer(c
);
1557 static void createSharedObjects(void) {
1560 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1561 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1562 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1563 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1564 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1565 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1566 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1567 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1568 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1569 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1570 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1571 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1572 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1573 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1574 "-ERR no such key\r\n"));
1575 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1576 "-ERR syntax error\r\n"));
1577 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1578 "-ERR source and destination objects are the same\r\n"));
1579 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1580 "-ERR index out of range\r\n"));
1581 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1582 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1583 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1584 shared
.select0
= createStringObject("select 0\r\n",10);
1585 shared
.select1
= createStringObject("select 1\r\n",10);
1586 shared
.select2
= createStringObject("select 2\r\n",10);
1587 shared
.select3
= createStringObject("select 3\r\n",10);
1588 shared
.select4
= createStringObject("select 4\r\n",10);
1589 shared
.select5
= createStringObject("select 5\r\n",10);
1590 shared
.select6
= createStringObject("select 6\r\n",10);
1591 shared
.select7
= createStringObject("select 7\r\n",10);
1592 shared
.select8
= createStringObject("select 8\r\n",10);
1593 shared
.select9
= createStringObject("select 9\r\n",10);
1594 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1595 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1596 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1597 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1598 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1599 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1600 shared
.mbulk3
= createStringObject("*3\r\n",4);
1601 shared
.mbulk4
= createStringObject("*4\r\n",4);
1602 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1603 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1604 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1608 static void appendServerSaveParams(time_t seconds
, int changes
) {
1609 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1610 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1611 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1612 server
.saveparamslen
++;
1615 static void resetServerSaveParams() {
1616 zfree(server
.saveparams
);
1617 server
.saveparams
= NULL
;
1618 server
.saveparamslen
= 0;
1621 static void initServerConfig() {
1622 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1623 server
.port
= REDIS_SERVERPORT
;
1624 server
.verbosity
= REDIS_VERBOSE
;
1625 server
.maxidletime
= REDIS_MAXIDLETIME
;
1626 server
.saveparams
= NULL
;
1627 server
.logfile
= NULL
; /* NULL = log on standard output */
1628 server
.bindaddr
= NULL
;
1629 server
.glueoutputbuf
= 1;
1630 server
.daemonize
= 0;
1631 server
.appendonly
= 0;
1632 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1633 server
.lastfsync
= time(NULL
);
1634 server
.appendfd
= -1;
1635 server
.appendseldb
= -1; /* Make sure the first time will not match */
1636 server
.pidfile
= zstrdup("/var/run/redis.pid");
1637 server
.dbfilename
= zstrdup("dump.rdb");
1638 server
.appendfilename
= zstrdup("appendonly.aof");
1639 server
.requirepass
= NULL
;
1640 server
.rdbcompression
= 1;
1641 server
.activerehashing
= 1;
1642 server
.maxclients
= 0;
1643 server
.blpop_blocked_clients
= 0;
1644 server
.maxmemory
= 0;
1645 server
.vm_enabled
= 0;
1646 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1647 server
.vm_page_size
= 256; /* 256 bytes per page */
1648 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1649 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1650 server
.vm_max_threads
= 4;
1651 server
.vm_blocked_clients
= 0;
1652 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1653 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1655 resetServerSaveParams();
1657 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1658 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1659 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1660 /* Replication related */
1662 server
.masterauth
= NULL
;
1663 server
.masterhost
= NULL
;
1664 server
.masterport
= 6379;
1665 server
.master
= NULL
;
1666 server
.replstate
= REDIS_REPL_NONE
;
1668 /* Double constants initialization */
1670 R_PosInf
= 1.0/R_Zero
;
1671 R_NegInf
= -1.0/R_Zero
;
1672 R_Nan
= R_Zero
/R_Zero
;
1675 static void initServer() {
1678 signal(SIGHUP
, SIG_IGN
);
1679 signal(SIGPIPE
, SIG_IGN
);
1680 setupSigSegvAction();
1682 server
.devnull
= fopen("/dev/null","w");
1683 if (server
.devnull
== NULL
) {
1684 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1687 server
.clients
= listCreate();
1688 server
.slaves
= listCreate();
1689 server
.monitors
= listCreate();
1690 server
.objfreelist
= listCreate();
1691 createSharedObjects();
1692 server
.el
= aeCreateEventLoop();
1693 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1694 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1695 if (server
.fd
== -1) {
1696 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1699 for (j
= 0; j
< server
.dbnum
; j
++) {
1700 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1701 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1702 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1703 if (server
.vm_enabled
)
1704 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1705 server
.db
[j
].id
= j
;
1707 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1708 server
.pubsub_patterns
= listCreate();
1709 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1710 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1711 server
.cronloops
= 0;
1712 server
.bgsavechildpid
= -1;
1713 server
.bgrewritechildpid
= -1;
1714 server
.bgrewritebuf
= sdsempty();
1715 server
.lastsave
= time(NULL
);
1717 server
.stat_numcommands
= 0;
1718 server
.stat_numconnections
= 0;
1719 server
.stat_expiredkeys
= 0;
1720 server
.stat_starttime
= time(NULL
);
1721 server
.unixtime
= time(NULL
);
1722 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1723 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1724 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1726 if (server
.appendonly
) {
1727 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1728 if (server
.appendfd
== -1) {
1729 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1735 if (server
.vm_enabled
) vmInit();
1738 /* Empty the whole database */
1739 static long long emptyDb() {
1741 long long removed
= 0;
1743 for (j
= 0; j
< server
.dbnum
; j
++) {
1744 removed
+= dictSize(server
.db
[j
].dict
);
1745 dictEmpty(server
.db
[j
].dict
);
1746 dictEmpty(server
.db
[j
].expires
);
1751 static int yesnotoi(char *s
) {
1752 if (!strcasecmp(s
,"yes")) return 1;
1753 else if (!strcasecmp(s
,"no")) return 0;
1757 /* I agree, this is a very rudimental way to load a configuration...
1758 will improve later if the config gets more complex */
1759 static void loadServerConfig(char *filename
) {
1761 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1765 if (filename
[0] == '-' && filename
[1] == '\0')
1768 if ((fp
= fopen(filename
,"r")) == NULL
) {
1769 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1774 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1780 line
= sdstrim(line
," \t\r\n");
1782 /* Skip comments and blank lines*/
1783 if (line
[0] == '#' || line
[0] == '\0') {
1788 /* Split into arguments */
1789 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1790 sdstolower(argv
[0]);
1792 /* Execute config directives */
1793 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1794 server
.maxidletime
= atoi(argv
[1]);
1795 if (server
.maxidletime
< 0) {
1796 err
= "Invalid timeout value"; goto loaderr
;
1798 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1799 server
.port
= atoi(argv
[1]);
1800 if (server
.port
< 1 || server
.port
> 65535) {
1801 err
= "Invalid port"; goto loaderr
;
1803 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1804 server
.bindaddr
= zstrdup(argv
[1]);
1805 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1806 int seconds
= atoi(argv
[1]);
1807 int changes
= atoi(argv
[2]);
1808 if (seconds
< 1 || changes
< 0) {
1809 err
= "Invalid save parameters"; goto loaderr
;
1811 appendServerSaveParams(seconds
,changes
);
1812 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1813 if (chdir(argv
[1]) == -1) {
1814 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1815 argv
[1], strerror(errno
));
1818 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1819 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1820 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1821 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1822 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1824 err
= "Invalid log level. Must be one of debug, notice, warning";
1827 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1830 server
.logfile
= zstrdup(argv
[1]);
1831 if (!strcasecmp(server
.logfile
,"stdout")) {
1832 zfree(server
.logfile
);
1833 server
.logfile
= NULL
;
1835 if (server
.logfile
) {
1836 /* Test if we are able to open the file. The server will not
1837 * be able to abort just for this problem later... */
1838 logfp
= fopen(server
.logfile
,"a");
1839 if (logfp
== NULL
) {
1840 err
= sdscatprintf(sdsempty(),
1841 "Can't open the log file: %s", strerror(errno
));
1846 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1847 server
.dbnum
= atoi(argv
[1]);
1848 if (server
.dbnum
< 1) {
1849 err
= "Invalid number of databases"; goto loaderr
;
1851 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1852 loadServerConfig(argv
[1]);
1853 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1854 server
.maxclients
= atoi(argv
[1]);
1855 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1856 server
.maxmemory
= memtoll(argv
[1],NULL
);
1857 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1858 server
.masterhost
= sdsnew(argv
[1]);
1859 server
.masterport
= atoi(argv
[2]);
1860 server
.replstate
= REDIS_REPL_CONNECT
;
1861 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1862 server
.masterauth
= zstrdup(argv
[1]);
1863 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1864 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1865 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1867 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1868 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1869 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1871 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1872 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1873 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1875 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1876 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1877 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1879 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1880 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1881 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1883 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1884 if (!strcasecmp(argv
[1],"no")) {
1885 server
.appendfsync
= APPENDFSYNC_NO
;
1886 } else if (!strcasecmp(argv
[1],"always")) {
1887 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1888 } else if (!strcasecmp(argv
[1],"everysec")) {
1889 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1891 err
= "argument must be 'no', 'always' or 'everysec'";
1894 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1895 server
.requirepass
= zstrdup(argv
[1]);
1896 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1897 zfree(server
.pidfile
);
1898 server
.pidfile
= zstrdup(argv
[1]);
1899 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1900 zfree(server
.dbfilename
);
1901 server
.dbfilename
= zstrdup(argv
[1]);
1902 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1903 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1904 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1906 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1907 zfree(server
.vm_swap_file
);
1908 server
.vm_swap_file
= zstrdup(argv
[1]);
1909 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1910 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1911 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1912 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1913 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1914 server
.vm_pages
= memtoll(argv
[1], NULL
);
1915 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1916 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1917 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1918 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1919 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1920 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1922 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1924 for (j
= 0; j
< argc
; j
++)
1929 if (fp
!= stdin
) fclose(fp
);
1933 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1934 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1935 fprintf(stderr
, ">>> '%s'\n", line
);
1936 fprintf(stderr
, "%s\n", err
);
1940 static void freeClientArgv(redisClient
*c
) {
1943 for (j
= 0; j
< c
->argc
; j
++)
1944 decrRefCount(c
->argv
[j
]);
1945 for (j
= 0; j
< c
->mbargc
; j
++)
1946 decrRefCount(c
->mbargv
[j
]);
1951 static void freeClient(redisClient
*c
) {
1954 /* Note that if the client we are freeing is blocked into a blocking
1955 * call, we have to set querybuf to NULL *before* to call
1956 * unblockClientWaitingData() to avoid processInputBuffer() will get
1957 * called. Also it is important to remove the file events after
1958 * this, because this call adds the READABLE event. */
1959 sdsfree(c
->querybuf
);
1961 if (c
->flags
& REDIS_BLOCKED
)
1962 unblockClientWaitingData(c
);
1964 /* Unsubscribe from all the pubsub channels */
1965 pubsubUnsubscribeAllChannels(c
,0);
1966 pubsubUnsubscribeAllPatterns(c
,0);
1967 dictRelease(c
->pubsub_channels
);
1968 listRelease(c
->pubsub_patterns
);
1969 /* Obvious cleanup */
1970 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1971 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1972 listRelease(c
->reply
);
1975 /* Remove from the list of clients */
1976 ln
= listSearchKey(server
.clients
,c
);
1977 redisAssert(ln
!= NULL
);
1978 listDelNode(server
.clients
,ln
);
1979 /* Remove from the list of clients waiting for swapped keys */
1980 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1981 ln
= listSearchKey(server
.io_ready_clients
,c
);
1983 listDelNode(server
.io_ready_clients
,ln
);
1984 server
.vm_blocked_clients
--;
1987 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1988 ln
= listFirst(c
->io_keys
);
1989 dontWaitForSwappedKey(c
,ln
->value
);
1991 listRelease(c
->io_keys
);
1992 /* Master/slave cleanup */
1993 if (c
->flags
& REDIS_SLAVE
) {
1994 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1996 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1997 ln
= listSearchKey(l
,c
);
1998 redisAssert(ln
!= NULL
);
2001 if (c
->flags
& REDIS_MASTER
) {
2002 server
.master
= NULL
;
2003 server
.replstate
= REDIS_REPL_CONNECT
;
2005 /* Release memory */
2008 freeClientMultiState(c
);
2012 #define GLUEREPLY_UP_TO (1024)
2013 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2015 char buf
[GLUEREPLY_UP_TO
];
2020 listRewind(c
->reply
,&li
);
2021 while((ln
= listNext(&li
))) {
2025 objlen
= sdslen(o
->ptr
);
2026 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2027 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2029 listDelNode(c
->reply
,ln
);
2031 if (copylen
== 0) return;
2035 /* Now the output buffer is empty, add the new single element */
2036 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2037 listAddNodeHead(c
->reply
,o
);
2040 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2041 redisClient
*c
= privdata
;
2042 int nwritten
= 0, totwritten
= 0, objlen
;
2045 REDIS_NOTUSED(mask
);
2047 /* Use writev() if we have enough buffers to send */
2048 if (!server
.glueoutputbuf
&&
2049 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2050 !(c
->flags
& REDIS_MASTER
))
2052 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2056 while(listLength(c
->reply
)) {
2057 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2058 glueReplyBuffersIfNeeded(c
);
2060 o
= listNodeValue(listFirst(c
->reply
));
2061 objlen
= sdslen(o
->ptr
);
2064 listDelNode(c
->reply
,listFirst(c
->reply
));
2068 if (c
->flags
& REDIS_MASTER
) {
2069 /* Don't reply to a master */
2070 nwritten
= objlen
- c
->sentlen
;
2072 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2073 if (nwritten
<= 0) break;
2075 c
->sentlen
+= nwritten
;
2076 totwritten
+= nwritten
;
2077 /* If we fully sent the object on head go to the next one */
2078 if (c
->sentlen
== objlen
) {
2079 listDelNode(c
->reply
,listFirst(c
->reply
));
2082 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2083 * bytes, in a single threaded server it's a good idea to serve
2084 * other clients as well, even if a very large request comes from
2085 * super fast link that is always able to accept data (in real world
2086 * scenario think about 'KEYS *' against the loopback interfae) */
2087 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2089 if (nwritten
== -1) {
2090 if (errno
== EAGAIN
) {
2093 redisLog(REDIS_VERBOSE
,
2094 "Error writing to client: %s", strerror(errno
));
2099 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2100 if (listLength(c
->reply
) == 0) {
2102 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2106 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2108 redisClient
*c
= privdata
;
2109 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2111 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2112 int offset
, ion
= 0;
2114 REDIS_NOTUSED(mask
);
2117 while (listLength(c
->reply
)) {
2118 offset
= c
->sentlen
;
2122 /* fill-in the iov[] array */
2123 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2124 o
= listNodeValue(node
);
2125 objlen
= sdslen(o
->ptr
);
2127 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2130 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2131 break; /* no more iovecs */
2133 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2134 iov
[ion
].iov_len
= objlen
- offset
;
2135 willwrite
+= objlen
- offset
;
2136 offset
= 0; /* just for the first item */
2143 /* write all collected blocks at once */
2144 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2145 if (errno
!= EAGAIN
) {
2146 redisLog(REDIS_VERBOSE
,
2147 "Error writing to client: %s", strerror(errno
));
2154 totwritten
+= nwritten
;
2155 offset
= c
->sentlen
;
2157 /* remove written robjs from c->reply */
2158 while (nwritten
&& listLength(c
->reply
)) {
2159 o
= listNodeValue(listFirst(c
->reply
));
2160 objlen
= sdslen(o
->ptr
);
2162 if(nwritten
>= objlen
- offset
) {
2163 listDelNode(c
->reply
, listFirst(c
->reply
));
2164 nwritten
-= objlen
- offset
;
2168 c
->sentlen
+= nwritten
;
2176 c
->lastinteraction
= time(NULL
);
2178 if (listLength(c
->reply
) == 0) {
2180 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2184 static struct redisCommand
*lookupCommand(char *name
) {
2186 while(cmdTable
[j
].name
!= NULL
) {
2187 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2193 /* resetClient prepare the client to process the next command */
2194 static void resetClient(redisClient
*c
) {
2200 /* Call() is the core of Redis execution of a command */
2201 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2204 dirty
= server
.dirty
;
2206 dirty
= server
.dirty
-dirty
;
2208 if (server
.appendonly
&& dirty
)
2209 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2210 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2211 listLength(server
.slaves
))
2212 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2213 if (listLength(server
.monitors
))
2214 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2215 server
.stat_numcommands
++;
2218 /* If this function gets called we already read a whole
2219 * command, argments are in the client argv/argc fields.
2220 * processCommand() execute the command or prepare the
2221 * server for a bulk read from the client.
2223 * If 1 is returned the client is still alive and valid and
2224 * and other operations can be performed by the caller. Otherwise
2225 * if 0 is returned the client was destroied (i.e. after QUIT). */
2226 static int processCommand(redisClient
*c
) {
2227 struct redisCommand
*cmd
;
2229 /* Free some memory if needed (maxmemory setting) */
2230 if (server
.maxmemory
) freeMemoryIfNeeded();
2232 /* Handle the multi bulk command type. This is an alternative protocol
2233 * supported by Redis in order to receive commands that are composed of
2234 * multiple binary-safe "bulk" arguments. The latency of processing is
2235 * a bit higher but this allows things like multi-sets, so if this
2236 * protocol is used only for MSET and similar commands this is a big win. */
2237 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2238 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2239 if (c
->multibulk
<= 0) {
2243 decrRefCount(c
->argv
[c
->argc
-1]);
2247 } else if (c
->multibulk
) {
2248 if (c
->bulklen
== -1) {
2249 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2250 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2254 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2255 decrRefCount(c
->argv
[0]);
2256 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2258 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2263 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2267 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2268 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2272 if (c
->multibulk
== 0) {
2276 /* Here we need to swap the multi-bulk argc/argv with the
2277 * normal argc/argv of the client structure. */
2279 c
->argv
= c
->mbargv
;
2280 c
->mbargv
= auxargv
;
2283 c
->argc
= c
->mbargc
;
2284 c
->mbargc
= auxargc
;
2286 /* We need to set bulklen to something different than -1
2287 * in order for the code below to process the command without
2288 * to try to read the last argument of a bulk command as
2289 * a special argument. */
2291 /* continue below and process the command */
2298 /* -- end of multi bulk commands processing -- */
2300 /* The QUIT command is handled as a special case. Normal command
2301 * procs are unable to close the client connection safely */
2302 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2307 /* Now lookup the command and check ASAP about trivial error conditions
2308 * such wrong arity, bad command name and so forth. */
2309 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2312 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2313 (char*)c
->argv
[0]->ptr
));
2316 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2317 (c
->argc
< -cmd
->arity
)) {
2319 sdscatprintf(sdsempty(),
2320 "-ERR wrong number of arguments for '%s' command\r\n",
2324 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2325 /* This is a bulk command, we have to read the last argument yet. */
2326 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2328 decrRefCount(c
->argv
[c
->argc
-1]);
2329 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2331 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2336 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2337 /* It is possible that the bulk read is already in the
2338 * buffer. Check this condition and handle it accordingly.
2339 * This is just a fast path, alternative to call processInputBuffer().
2340 * It's a good idea since the code is small and this condition
2341 * happens most of the times. */
2342 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2343 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2345 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2347 /* Otherwise return... there is to read the last argument
2348 * from the socket. */
2352 /* Let's try to encode the bulk object to save space. */
2353 if (cmd
->flags
& REDIS_CMD_BULK
)
2354 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2356 /* Check if the user is authenticated */
2357 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2358 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2363 /* Handle the maxmemory directive */
2364 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2365 zmalloc_used_memory() > server
.maxmemory
)
2367 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2372 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2373 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2375 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2376 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2377 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2382 /* Exec the command */
2383 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2384 queueMultiCommand(c
,cmd
);
2385 addReply(c
,shared
.queued
);
2387 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2388 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2392 /* Prepare the client for the next command */
2397 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2402 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2403 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2404 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2405 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2408 if (argc
<= REDIS_STATIC_ARGS
) {
2411 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2414 lenobj
= createObject(REDIS_STRING
,
2415 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2416 lenobj
->refcount
= 0;
2417 outv
[outc
++] = lenobj
;
2418 for (j
= 0; j
< argc
; j
++) {
2419 lenobj
= createObject(REDIS_STRING
,
2420 sdscatprintf(sdsempty(),"$%lu\r\n",
2421 (unsigned long) stringObjectLen(argv
[j
])));
2422 lenobj
->refcount
= 0;
2423 outv
[outc
++] = lenobj
;
2424 outv
[outc
++] = argv
[j
];
2425 outv
[outc
++] = shared
.crlf
;
2428 /* Increment all the refcounts at start and decrement at end in order to
2429 * be sure to free objects if there is no slave in a replication state
2430 * able to be feed with commands */
2431 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2432 listRewind(slaves
,&li
);
2433 while((ln
= listNext(&li
))) {
2434 redisClient
*slave
= ln
->value
;
2436 /* Don't feed slaves that are still waiting for BGSAVE to start */
2437 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2439 /* Feed all the other slaves, MONITORs and so on */
2440 if (slave
->slaveseldb
!= dictid
) {
2444 case 0: selectcmd
= shared
.select0
; break;
2445 case 1: selectcmd
= shared
.select1
; break;
2446 case 2: selectcmd
= shared
.select2
; break;
2447 case 3: selectcmd
= shared
.select3
; break;
2448 case 4: selectcmd
= shared
.select4
; break;
2449 case 5: selectcmd
= shared
.select5
; break;
2450 case 6: selectcmd
= shared
.select6
; break;
2451 case 7: selectcmd
= shared
.select7
; break;
2452 case 8: selectcmd
= shared
.select8
; break;
2453 case 9: selectcmd
= shared
.select9
; break;
2455 selectcmd
= createObject(REDIS_STRING
,
2456 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2457 selectcmd
->refcount
= 0;
2460 addReply(slave
,selectcmd
);
2461 slave
->slaveseldb
= dictid
;
2463 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2465 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2466 if (outv
!= static_outv
) zfree(outv
);
2469 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2470 s
= sdscatlen(s
,"\"",1);
2475 s
= sdscatprintf(s
,"\\%c",*p
);
2477 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2478 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2479 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2480 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2481 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2484 s
= sdscatprintf(s
,"%c",*p
);
2486 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2491 return sdscatlen(s
,"\"",1);
2494 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2498 sds cmdrepr
= sdsnew("+");
2502 gettimeofday(&tv
,NULL
);
2503 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2504 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2506 for (j
= 0; j
< argc
; j
++) {
2507 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2508 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2510 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2511 sdslen(argv
[j
]->ptr
));
2514 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2516 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2517 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2519 listRewind(monitors
,&li
);
2520 while((ln
= listNext(&li
))) {
2521 redisClient
*monitor
= ln
->value
;
2522 addReply(monitor
,cmdobj
);
2524 decrRefCount(cmdobj
);
2527 static void processInputBuffer(redisClient
*c
) {
2529 /* Before to process the input buffer, make sure the client is not
2530 * waitig for a blocking operation such as BLPOP. Note that the first
2531 * iteration the client is never blocked, otherwise the processInputBuffer
2532 * would not be called at all, but after the execution of the first commands
2533 * in the input buffer the client may be blocked, and the "goto again"
2534 * will try to reiterate. The following line will make it return asap. */
2535 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2536 if (c
->bulklen
== -1) {
2537 /* Read the first line of the query */
2538 char *p
= strchr(c
->querybuf
,'\n');
2545 query
= c
->querybuf
;
2546 c
->querybuf
= sdsempty();
2547 querylen
= 1+(p
-(query
));
2548 if (sdslen(query
) > querylen
) {
2549 /* leave data after the first line of the query in the buffer */
2550 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2552 *p
= '\0'; /* remove "\n" */
2553 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2554 sdsupdatelen(query
);
2556 /* Now we can split the query in arguments */
2557 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2560 if (c
->argv
) zfree(c
->argv
);
2561 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2563 for (j
= 0; j
< argc
; j
++) {
2564 if (sdslen(argv
[j
])) {
2565 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2573 /* Execute the command. If the client is still valid
2574 * after processCommand() return and there is something
2575 * on the query buffer try to process the next command. */
2576 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2578 /* Nothing to process, argc == 0. Just process the query
2579 * buffer if it's not empty or return to the caller */
2580 if (sdslen(c
->querybuf
)) goto again
;
2583 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2584 redisLog(REDIS_VERBOSE
, "Client protocol error");
2589 /* Bulk read handling. Note that if we are at this point
2590 the client already sent a command terminated with a newline,
2591 we are reading the bulk data that is actually the last
2592 argument of the command. */
2593 int qbl
= sdslen(c
->querybuf
);
2595 if (c
->bulklen
<= qbl
) {
2596 /* Copy everything but the final CRLF as final argument */
2597 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2599 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2600 /* Process the command. If the client is still valid after
2601 * the processing and there is more data in the buffer
2602 * try to parse it. */
2603 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2609 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2610 redisClient
*c
= (redisClient
*) privdata
;
2611 char buf
[REDIS_IOBUF_LEN
];
2614 REDIS_NOTUSED(mask
);
2616 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2618 if (errno
== EAGAIN
) {
2621 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2625 } else if (nread
== 0) {
2626 redisLog(REDIS_VERBOSE
, "Client closed connection");
2631 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2632 c
->lastinteraction
= time(NULL
);
2636 processInputBuffer(c
);
2639 static int selectDb(redisClient
*c
, int id
) {
2640 if (id
< 0 || id
>= server
.dbnum
)
2642 c
->db
= &server
.db
[id
];
2646 static void *dupClientReplyValue(void *o
) {
2647 incrRefCount((robj
*)o
);
2651 static int listMatchObjects(void *a
, void *b
) {
2652 return compareStringObjects(a
,b
) == 0;
2655 static redisClient
*createClient(int fd
) {
2656 redisClient
*c
= zmalloc(sizeof(*c
));
2658 anetNonBlock(NULL
,fd
);
2659 anetTcpNoDelay(NULL
,fd
);
2660 if (!c
) return NULL
;
2663 c
->querybuf
= sdsempty();
2672 c
->lastinteraction
= time(NULL
);
2673 c
->authenticated
= 0;
2674 c
->replstate
= REDIS_REPL_NONE
;
2675 c
->reply
= listCreate();
2676 listSetFreeMethod(c
->reply
,decrRefCount
);
2677 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2678 c
->blockingkeys
= NULL
;
2679 c
->blockingkeysnum
= 0;
2680 c
->io_keys
= listCreate();
2681 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2682 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2683 c
->pubsub_patterns
= listCreate();
2684 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2685 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2686 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2687 readQueryFromClient
, c
) == AE_ERR
) {
2691 listAddNodeTail(server
.clients
,c
);
2692 initClientMultiState(c
);
2696 static void addReply(redisClient
*c
, robj
*obj
) {
2697 if (listLength(c
->reply
) == 0 &&
2698 (c
->replstate
== REDIS_REPL_NONE
||
2699 c
->replstate
== REDIS_REPL_ONLINE
) &&
2700 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2701 sendReplyToClient
, c
) == AE_ERR
) return;
2703 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2704 obj
= dupStringObject(obj
);
2705 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2707 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2710 static void addReplySds(redisClient
*c
, sds s
) {
2711 robj
*o
= createObject(REDIS_STRING
,s
);
2716 static void addReplyDouble(redisClient
*c
, double d
) {
2719 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2720 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2721 (unsigned long) strlen(buf
),buf
));
2724 static void addReplyLong(redisClient
*c
, long l
) {
2729 addReply(c
,shared
.czero
);
2731 } else if (l
== 1) {
2732 addReply(c
,shared
.cone
);
2735 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2736 addReplySds(c
,sdsnewlen(buf
,len
));
2739 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2744 addReply(c
,shared
.czero
);
2746 } else if (ll
== 1) {
2747 addReply(c
,shared
.cone
);
2750 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2751 addReplySds(c
,sdsnewlen(buf
,len
));
2754 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2759 addReply(c
,shared
.czero
);
2761 } else if (ul
== 1) {
2762 addReply(c
,shared
.cone
);
2765 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2766 addReplySds(c
,sdsnewlen(buf
,len
));
2769 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2772 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2773 len
= sdslen(obj
->ptr
);
2775 long n
= (long)obj
->ptr
;
2777 /* Compute how many bytes will take this integer as a radix 10 string */
2783 while((n
= n
/10) != 0) {
2787 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2790 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2791 addReplyBulkLen(c
,obj
);
2793 addReply(c
,shared
.crlf
);
2796 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2797 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2799 addReply(c
,shared
.nullbulk
);
2801 robj
*o
= createStringObject(s
,strlen(s
));
2807 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2812 REDIS_NOTUSED(mask
);
2813 REDIS_NOTUSED(privdata
);
2815 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2816 if (cfd
== AE_ERR
) {
2817 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2820 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2821 if ((c
= createClient(cfd
)) == NULL
) {
2822 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2823 close(cfd
); /* May be already closed, just ingore errors */
2826 /* If maxclient directive is set and this is one client more... close the
2827 * connection. Note that we create the client instead to check before
2828 * for this condition, since now the socket is already set in nonblocking
2829 * mode and we can send an error for free using the Kernel I/O */
2830 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2831 char *err
= "-ERR max number of clients reached\r\n";
2833 /* That's a best effort error message, don't check write errors */
2834 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2835 /* Nothing to do, Just to avoid the warning... */
2840 server
.stat_numconnections
++;
2843 /* ======================= Redis objects implementation ===================== */
2845 static robj
*createObject(int type
, void *ptr
) {
2848 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2849 if (listLength(server
.objfreelist
)) {
2850 listNode
*head
= listFirst(server
.objfreelist
);
2851 o
= listNodeValue(head
);
2852 listDelNode(server
.objfreelist
,head
);
2853 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2855 if (server
.vm_enabled
) {
2856 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2857 o
= zmalloc(sizeof(*o
));
2859 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2863 o
->encoding
= REDIS_ENCODING_RAW
;
2866 if (server
.vm_enabled
) {
2867 /* Note that this code may run in the context of an I/O thread
2868 * and accessing to server.unixtime in theory is an error
2869 * (no locks). But in practice this is safe, and even if we read
2870 * garbage Redis will not fail, as it's just a statistical info */
2871 o
->vm
.atime
= server
.unixtime
;
2872 o
->storage
= REDIS_VM_MEMORY
;
2877 static robj
*createStringObject(char *ptr
, size_t len
) {
2878 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2881 static robj
*createStringObjectFromLongLong(long long value
) {
2883 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2884 incrRefCount(shared
.integers
[value
]);
2885 o
= shared
.integers
[value
];
2887 o
= createObject(REDIS_STRING
, NULL
);
2888 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2889 o
->encoding
= REDIS_ENCODING_INT
;
2890 o
->ptr
= (void*)((long)value
);
2892 o
->ptr
= sdscatprintf(sdsempty(),"%lld",value
);
2898 static robj
*dupStringObject(robj
*o
) {
2899 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2900 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2903 static robj
*createListObject(void) {
2904 list
*l
= listCreate();
2906 listSetFreeMethod(l
,decrRefCount
);
2907 return createObject(REDIS_LIST
,l
);
2910 static robj
*createSetObject(void) {
2911 dict
*d
= dictCreate(&setDictType
,NULL
);
2912 return createObject(REDIS_SET
,d
);
2915 static robj
*createHashObject(void) {
2916 /* All the Hashes start as zipmaps. Will be automatically converted
2917 * into hash tables if there are enough elements or big elements
2919 unsigned char *zm
= zipmapNew();
2920 robj
*o
= createObject(REDIS_HASH
,zm
);
2921 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2925 static robj
*createZsetObject(void) {
2926 zset
*zs
= zmalloc(sizeof(*zs
));
2928 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2929 zs
->zsl
= zslCreate();
2930 return createObject(REDIS_ZSET
,zs
);
2933 static void freeStringObject(robj
*o
) {
2934 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2939 static void freeListObject(robj
*o
) {
2940 listRelease((list
*) o
->ptr
);
2943 static void freeSetObject(robj
*o
) {
2944 dictRelease((dict
*) o
->ptr
);
2947 static void freeZsetObject(robj
*o
) {
2950 dictRelease(zs
->dict
);
2955 static void freeHashObject(robj
*o
) {
2956 switch (o
->encoding
) {
2957 case REDIS_ENCODING_HT
:
2958 dictRelease((dict
*) o
->ptr
);
2960 case REDIS_ENCODING_ZIPMAP
:
2964 redisPanic("Unknown hash encoding type");
2969 static void incrRefCount(robj
*o
) {
2973 static void decrRefCount(void *obj
) {
2976 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
2977 /* Object is a key of a swapped out value, or in the process of being
2979 if (server
.vm_enabled
&&
2980 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2982 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2983 redisAssert(o
->type
== REDIS_STRING
);
2984 freeStringObject(o
);
2985 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2986 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2987 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2988 !listAddNodeHead(server
.objfreelist
,o
))
2990 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2991 server
.vm_stats_swapped_objects
--;
2994 /* Object is in memory, or in the process of being swapped out. */
2995 if (--(o
->refcount
) == 0) {
2996 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2997 vmCancelThreadedIOJob(obj
);
2999 case REDIS_STRING
: freeStringObject(o
); break;
3000 case REDIS_LIST
: freeListObject(o
); break;
3001 case REDIS_SET
: freeSetObject(o
); break;
3002 case REDIS_ZSET
: freeZsetObject(o
); break;
3003 case REDIS_HASH
: freeHashObject(o
); break;
3004 default: redisPanic("Unknown object type"); break;
3006 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3007 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3008 !listAddNodeHead(server
.objfreelist
,o
))
3010 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3014 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3015 dictEntry
*de
= dictFind(db
->dict
,key
);
3017 robj
*key
= dictGetEntryKey(de
);
3018 robj
*val
= dictGetEntryVal(de
);
3020 if (server
.vm_enabled
) {
3021 if (key
->storage
== REDIS_VM_MEMORY
||
3022 key
->storage
== REDIS_VM_SWAPPING
)
3024 /* If we were swapping the object out, stop it, this key
3026 if (key
->storage
== REDIS_VM_SWAPPING
)
3027 vmCancelThreadedIOJob(key
);
3028 /* Update the access time of the key for the aging algorithm. */
3029 key
->vm
.atime
= server
.unixtime
;
3031 int notify
= (key
->storage
== REDIS_VM_LOADING
);
3033 /* Our value was swapped on disk. Bring it at home. */
3034 redisAssert(val
== NULL
);
3035 val
= vmLoadObject(key
);
3036 dictGetEntryVal(de
) = val
;
3038 /* Clients blocked by the VM subsystem may be waiting for
3040 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3049 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3050 expireIfNeeded(db
,key
);
3051 return lookupKey(db
,key
);
3054 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3055 deleteIfVolatile(db
,key
);
3056 return lookupKey(db
,key
);
3059 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3060 robj
*o
= lookupKeyRead(c
->db
, key
);
3061 if (!o
) addReply(c
,reply
);
3065 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3066 robj
*o
= lookupKeyWrite(c
->db
, key
);
3067 if (!o
) addReply(c
,reply
);
3071 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3072 if (o
->type
!= type
) {
3073 addReply(c
,shared
.wrongtypeerr
);
3079 static int deleteKey(redisDb
*db
, robj
*key
) {
3082 /* We need to protect key from destruction: after the first dictDelete()
3083 * it may happen that 'key' is no longer valid if we don't increment
3084 * it's count. This may happen when we get the object reference directly
3085 * from the hash table with dictRandomKey() or dict iterators */
3087 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3088 retval
= dictDelete(db
->dict
,key
);
3091 return retval
== DICT_OK
;
3094 /* Check if the nul-terminated string 's' can be represented by a long
3095 * (that is, is a number that fits into long without any other space or
3096 * character before or after the digits).
3098 * If so, the function returns REDIS_OK and *longval is set to the value
3099 * of the number. Otherwise REDIS_ERR is returned */
3100 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3101 char buf
[32], *endptr
;
3105 value
= strtol(s
, &endptr
, 10);
3106 if (endptr
[0] != '\0') return REDIS_ERR
;
3107 slen
= snprintf(buf
,32,"%ld",value
);
3109 /* If the number converted back into a string is not identical
3110 * then it's not possible to encode the string as integer */
3111 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3112 if (longval
) *longval
= value
;
3116 /* Try to encode a string object in order to save space */
3117 static robj
*tryObjectEncoding(robj
*o
) {
3121 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3122 return o
; /* Already encoded */
3124 /* It's not safe to encode shared objects: shared objects can be shared
3125 * everywhere in the "object space" of Redis. Encoded objects can only
3126 * appear as "values" (and not, for instance, as keys) */
3127 if (o
->refcount
> 1) return o
;
3129 /* Currently we try to encode only strings */
3130 redisAssert(o
->type
== REDIS_STRING
);
3132 /* Check if we can represent this string as a long integer */
3133 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3135 /* Ok, this object can be encoded */
3136 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3138 incrRefCount(shared
.integers
[value
]);
3139 return shared
.integers
[value
];
3141 o
->encoding
= REDIS_ENCODING_INT
;
3143 o
->ptr
= (void*) value
;
3148 /* Get a decoded version of an encoded object (returned as a new object).
3149 * If the object is already raw-encoded just increment the ref count. */
3150 static robj
*getDecodedObject(robj
*o
) {
3153 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3157 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3160 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3161 dec
= createStringObject(buf
,strlen(buf
));
3164 redisPanic("Unknown encoding type");
3168 /* Compare two string objects via strcmp() or alike.
3169 * Note that the objects may be integer-encoded. In such a case we
3170 * use snprintf() to get a string representation of the numbers on the stack
3171 * and compare the strings, it's much faster than calling getDecodedObject().
3173 * Important note: if objects are not integer encoded, but binary-safe strings,
3174 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3176 static int compareStringObjects(robj
*a
, robj
*b
) {
3177 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3178 char bufa
[128], bufb
[128], *astr
, *bstr
;
3181 if (a
== b
) return 0;
3182 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3183 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3189 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3190 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3196 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3199 static size_t stringObjectLen(robj
*o
) {
3200 redisAssert(o
->type
== REDIS_STRING
);
3201 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3202 return sdslen(o
->ptr
);
3206 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3210 static int getDoubleFromObject(robj
*o
, double *target
) {
3217 redisAssert(o
->type
== REDIS_STRING
);
3218 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3219 value
= strtod(o
->ptr
, &eptr
);
3220 if (eptr
[0] != '\0') return REDIS_ERR
;
3221 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3222 value
= (long)o
->ptr
;
3224 redisAssert(1 != 1);
3232 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3234 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3236 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3238 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3247 static int getLongLongFromObject(robj
*o
, long long *target
) {
3254 redisAssert(o
->type
== REDIS_STRING
);
3255 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3256 value
= strtoll(o
->ptr
, &eptr
, 10);
3257 if (eptr
[0] != '\0') return REDIS_ERR
;
3258 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3259 value
= (long)o
->ptr
;
3261 redisAssert(1 != 1);
3269 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3271 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3273 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3275 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3284 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3287 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3288 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3290 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3292 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3301 /*============================ RDB saving/loading =========================== */
3303 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3304 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3308 static int rdbSaveTime(FILE *fp
, time_t t
) {
3309 int32_t t32
= (int32_t) t
;
3310 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3314 /* check rdbLoadLen() comments for more info */
3315 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3316 unsigned char buf
[2];
3319 /* Save a 6 bit len */
3320 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3321 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3322 } else if (len
< (1<<14)) {
3323 /* Save a 14 bit len */
3324 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3326 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3328 /* Save a 32 bit len */
3329 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3330 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3332 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3337 /* String objects in the form "2391" "-100" without any space and with a
3338 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3339 * encoded as integers to save space */
3340 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3342 char *endptr
, buf
[32];
3344 /* Check if it's possible to encode this value as a number */
3345 value
= strtoll(s
, &endptr
, 10);
3346 if (endptr
[0] != '\0') return 0;
3347 snprintf(buf
,32,"%lld",value
);
3349 /* If the number converted back into a string is not identical
3350 * then it's not possible to encode the string as integer */
3351 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3353 /* Finally check if it fits in our ranges */
3354 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3355 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3356 enc
[1] = value
&0xFF;
3358 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3359 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3360 enc
[1] = value
&0xFF;
3361 enc
[2] = (value
>>8)&0xFF;
3363 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3364 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3365 enc
[1] = value
&0xFF;
3366 enc
[2] = (value
>>8)&0xFF;
3367 enc
[3] = (value
>>16)&0xFF;
3368 enc
[4] = (value
>>24)&0xFF;
3375 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3376 size_t comprlen
, outlen
;
3380 /* We require at least four bytes compression for this to be worth it */
3381 if (len
<= 4) return 0;
3383 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3384 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3385 if (comprlen
== 0) {
3389 /* Data compressed! Let's save it on disk */
3390 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3391 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3392 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3393 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3394 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3403 /* Save a string objet as [len][data] on disk. If the object is a string
3404 * representation of an integer value we try to safe it in a special form */
3405 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3408 /* Try integer encoding */
3410 unsigned char buf
[5];
3411 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3412 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3417 /* Try LZF compression - under 20 bytes it's unable to compress even
3418 * aaaaaaaaaaaaaaaaaa so skip it */
3419 if (server
.rdbcompression
&& len
> 20) {
3422 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3423 if (retval
== -1) return -1;
3424 if (retval
> 0) return 0;
3425 /* retval == 0 means data can't be compressed, save the old way */
3428 /* Store verbatim */
3429 if (rdbSaveLen(fp
,len
) == -1) return -1;
3430 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3434 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3435 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3438 /* Avoid incr/decr ref count business when possible.
3439 * This plays well with copy-on-write given that we are probably
3440 * in a child process (BGSAVE). Also this makes sure key objects
3441 * of swapped objects are not incRefCount-ed (an assert does not allow
3442 * this in order to avoid bugs) */
3443 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3444 obj
= getDecodedObject(obj
);
3445 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3448 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3453 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3454 * 8 bit integer specifing the length of the representation.
3455 * This 8 bit integer has special values in order to specify the following
3461 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3462 unsigned char buf
[128];
3468 } else if (!isfinite(val
)) {
3470 buf
[0] = (val
< 0) ? 255 : 254;
3472 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3473 buf
[0] = strlen((char*)buf
+1);
3476 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3480 /* Save a Redis object. */
3481 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3482 if (o
->type
== REDIS_STRING
) {
3483 /* Save a string value */
3484 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3485 } else if (o
->type
== REDIS_LIST
) {
3486 /* Save a list value */
3487 list
*list
= o
->ptr
;
3491 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3492 listRewind(list
,&li
);
3493 while((ln
= listNext(&li
))) {
3494 robj
*eleobj
= listNodeValue(ln
);
3496 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3498 } else if (o
->type
== REDIS_SET
) {
3499 /* Save a set value */
3501 dictIterator
*di
= dictGetIterator(set
);
3504 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3505 while((de
= dictNext(di
)) != NULL
) {
3506 robj
*eleobj
= dictGetEntryKey(de
);
3508 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3510 dictReleaseIterator(di
);
3511 } else if (o
->type
== REDIS_ZSET
) {
3512 /* Save a set value */
3514 dictIterator
*di
= dictGetIterator(zs
->dict
);
3517 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3518 while((de
= dictNext(di
)) != NULL
) {
3519 robj
*eleobj
= dictGetEntryKey(de
);
3520 double *score
= dictGetEntryVal(de
);
3522 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3523 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3525 dictReleaseIterator(di
);
3526 } else if (o
->type
== REDIS_HASH
) {
3527 /* Save a hash value */
3528 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3529 unsigned char *p
= zipmapRewind(o
->ptr
);
3530 unsigned int count
= zipmapLen(o
->ptr
);
3531 unsigned char *key
, *val
;
3532 unsigned int klen
, vlen
;
3534 if (rdbSaveLen(fp
,count
) == -1) return -1;
3535 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3536 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3537 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3540 dictIterator
*di
= dictGetIterator(o
->ptr
);
3543 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3544 while((de
= dictNext(di
)) != NULL
) {
3545 robj
*key
= dictGetEntryKey(de
);
3546 robj
*val
= dictGetEntryVal(de
);
3548 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3549 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3551 dictReleaseIterator(di
);
3554 redisPanic("Unknown object type");
3559 /* Return the length the object will have on disk if saved with
3560 * the rdbSaveObject() function. Currently we use a trick to get
3561 * this length with very little changes to the code. In the future
3562 * we could switch to a faster solution. */
3563 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3564 if (fp
== NULL
) fp
= server
.devnull
;
3566 assert(rdbSaveObject(fp
,o
) != 1);
3570 /* Return the number of pages required to save this object in the swap file */
3571 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3572 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3574 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3577 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3578 static int rdbSave(char *filename
) {
3579 dictIterator
*di
= NULL
;
3584 time_t now
= time(NULL
);
3586 /* Wait for I/O therads to terminate, just in case this is a
3587 * foreground-saving, to avoid seeking the swap file descriptor at the
3589 if (server
.vm_enabled
)
3590 waitEmptyIOJobsQueue();
3592 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3593 fp
= fopen(tmpfile
,"w");
3595 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3598 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3599 for (j
= 0; j
< server
.dbnum
; j
++) {
3600 redisDb
*db
= server
.db
+j
;
3602 if (dictSize(d
) == 0) continue;
3603 di
= dictGetIterator(d
);
3609 /* Write the SELECT DB opcode */
3610 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3611 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3613 /* Iterate this DB writing every entry */
3614 while((de
= dictNext(di
)) != NULL
) {
3615 robj
*key
= dictGetEntryKey(de
);
3616 robj
*o
= dictGetEntryVal(de
);
3617 time_t expiretime
= getExpire(db
,key
);
3619 /* Save the expire time */
3620 if (expiretime
!= -1) {
3621 /* If this key is already expired skip it */
3622 if (expiretime
< now
) continue;
3623 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3624 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3626 /* Save the key and associated value. This requires special
3627 * handling if the value is swapped out. */
3628 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3629 key
->storage
== REDIS_VM_SWAPPING
) {
3630 /* Save type, key, value */
3631 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3632 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3633 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3635 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3637 /* Get a preview of the object in memory */
3638 po
= vmPreviewObject(key
);
3639 /* Save type, key, value */
3640 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3641 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3642 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3643 /* Remove the loaded object from memory */
3647 dictReleaseIterator(di
);
3650 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3652 /* Make sure data will not remain on the OS's output buffers */
3657 /* Use RENAME to make sure the DB file is changed atomically only
3658 * if the generate DB file is ok. */
3659 if (rename(tmpfile
,filename
) == -1) {
3660 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3664 redisLog(REDIS_NOTICE
,"DB saved on disk");
3666 server
.lastsave
= time(NULL
);
3672 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3673 if (di
) dictReleaseIterator(di
);
3677 static int rdbSaveBackground(char *filename
) {
3680 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3681 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3682 if ((childpid
= fork()) == 0) {
3684 if (server
.vm_enabled
) vmReopenSwapFile();
3686 if (rdbSave(filename
) == REDIS_OK
) {
3693 if (childpid
== -1) {
3694 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3698 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3699 server
.bgsavechildpid
= childpid
;
3700 updateDictResizePolicy();
3703 return REDIS_OK
; /* unreached */
3706 static void rdbRemoveTempFile(pid_t childpid
) {
3709 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3713 static int rdbLoadType(FILE *fp
) {
3715 if (fread(&type
,1,1,fp
) == 0) return -1;
3719 static time_t rdbLoadTime(FILE *fp
) {
3721 if (fread(&t32
,4,1,fp
) == 0) return -1;
3722 return (time_t) t32
;
3725 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3726 * of this file for a description of how this are stored on disk.
3728 * isencoded is set to 1 if the readed length is not actually a length but
3729 * an "encoding type", check the above comments for more info */
3730 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3731 unsigned char buf
[2];
3735 if (isencoded
) *isencoded
= 0;
3736 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3737 type
= (buf
[0]&0xC0)>>6;
3738 if (type
== REDIS_RDB_6BITLEN
) {
3739 /* Read a 6 bit len */
3741 } else if (type
== REDIS_RDB_ENCVAL
) {
3742 /* Read a 6 bit len encoding type */
3743 if (isencoded
) *isencoded
= 1;
3745 } else if (type
== REDIS_RDB_14BITLEN
) {
3746 /* Read a 14 bit len */
3747 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3748 return ((buf
[0]&0x3F)<<8)|buf
[1];
3750 /* Read a 32 bit len */
3751 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3756 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3757 unsigned char enc
[4];
3760 if (enctype
== REDIS_RDB_ENC_INT8
) {
3761 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3762 val
= (signed char)enc
[0];
3763 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3765 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3766 v
= enc
[0]|(enc
[1]<<8);
3768 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3770 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3771 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3774 val
= 0; /* anti-warning */
3775 redisPanic("Unknown RDB integer encoding type");
3777 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3780 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3781 unsigned int len
, clen
;
3782 unsigned char *c
= NULL
;
3785 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3786 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3787 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3788 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3789 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3790 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3792 return createObject(REDIS_STRING
,val
);
3799 static robj
*rdbLoadStringObject(FILE*fp
) {
3804 len
= rdbLoadLen(fp
,&isencoded
);
3807 case REDIS_RDB_ENC_INT8
:
3808 case REDIS_RDB_ENC_INT16
:
3809 case REDIS_RDB_ENC_INT32
:
3810 return rdbLoadIntegerObject(fp
,len
);
3811 case REDIS_RDB_ENC_LZF
:
3812 return rdbLoadLzfStringObject(fp
);
3814 redisPanic("Unknown RDB encoding type");
3818 if (len
== REDIS_RDB_LENERR
) return NULL
;
3819 val
= sdsnewlen(NULL
,len
);
3820 if (len
&& fread(val
,len
,1,fp
) == 0) {
3824 return createObject(REDIS_STRING
,val
);
3827 /* For information about double serialization check rdbSaveDoubleValue() */
3828 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3832 if (fread(&len
,1,1,fp
) == 0) return -1;
3834 case 255: *val
= R_NegInf
; return 0;
3835 case 254: *val
= R_PosInf
; return 0;
3836 case 253: *val
= R_Nan
; return 0;
3838 if (fread(buf
,len
,1,fp
) == 0) return -1;
3840 sscanf(buf
, "%lg", val
);
3845 /* Load a Redis object of the specified type from the specified file.
3846 * On success a newly allocated object is returned, otherwise NULL. */
3847 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3850 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3851 if (type
== REDIS_STRING
) {
3852 /* Read string value */
3853 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3854 o
= tryObjectEncoding(o
);
3855 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3856 /* Read list/set value */
3859 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3860 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3861 /* It's faster to expand the dict to the right size asap in order
3862 * to avoid rehashing */
3863 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3864 dictExpand(o
->ptr
,listlen
);
3865 /* Load every single element of the list/set */
3869 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3870 ele
= tryObjectEncoding(ele
);
3871 if (type
== REDIS_LIST
) {
3872 listAddNodeTail((list
*)o
->ptr
,ele
);
3874 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3877 } else if (type
== REDIS_ZSET
) {
3878 /* Read list/set value */
3882 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3883 o
= createZsetObject();
3885 /* Load every single element of the list/set */
3888 double *score
= zmalloc(sizeof(double));
3890 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3891 ele
= tryObjectEncoding(ele
);
3892 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3893 dictAdd(zs
->dict
,ele
,score
);
3894 zslInsert(zs
->zsl
,*score
,ele
);
3895 incrRefCount(ele
); /* added to skiplist */
3897 } else if (type
== REDIS_HASH
) {
3900 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3901 o
= createHashObject();
3902 /* Too many entries? Use an hash table. */
3903 if (hashlen
> server
.hash_max_zipmap_entries
)
3904 convertToRealHash(o
);
3905 /* Load every key/value, then set it into the zipmap or hash
3906 * table, as needed. */
3910 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3911 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3912 /* If we are using a zipmap and there are too big values
3913 * the object is converted to real hash table encoding. */
3914 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3915 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3916 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3918 convertToRealHash(o
);
3921 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3922 unsigned char *zm
= o
->ptr
;
3924 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3925 val
->ptr
,sdslen(val
->ptr
),NULL
);
3930 key
= tryObjectEncoding(key
);
3931 val
= tryObjectEncoding(val
);
3932 dictAdd((dict
*)o
->ptr
,key
,val
);
3936 redisPanic("Unknown object type");
3941 static int rdbLoad(char *filename
) {
3943 robj
*keyobj
= NULL
;
3945 int type
, retval
, rdbver
;
3946 dict
*d
= server
.db
[0].dict
;
3947 redisDb
*db
= server
.db
+0;
3949 time_t expiretime
= -1, now
= time(NULL
);
3950 long long loadedkeys
= 0;
3952 fp
= fopen(filename
,"r");
3953 if (!fp
) return REDIS_ERR
;
3954 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3956 if (memcmp(buf
,"REDIS",5) != 0) {
3958 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3961 rdbver
= atoi(buf
+5);
3964 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3971 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3972 if (type
== REDIS_EXPIRETIME
) {
3973 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3974 /* We read the time so we need to read the object type again */
3975 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3977 if (type
== REDIS_EOF
) break;
3978 /* Handle SELECT DB opcode as a special case */
3979 if (type
== REDIS_SELECTDB
) {
3980 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3982 if (dbid
>= (unsigned)server
.dbnum
) {
3983 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3986 db
= server
.db
+dbid
;
3991 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3993 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3994 /* Add the new object in the hash table */
3995 retval
= dictAdd(d
,keyobj
,o
);
3996 if (retval
== DICT_ERR
) {
3997 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
4000 /* Set the expire time if needed */
4001 if (expiretime
!= -1) {
4002 setExpire(db
,keyobj
,expiretime
);
4003 /* Delete this key if already expired */
4004 if (expiretime
< now
) deleteKey(db
,keyobj
);
4008 /* Handle swapping while loading big datasets when VM is on */
4010 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
4011 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4012 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4019 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4020 if (keyobj
) decrRefCount(keyobj
);
4021 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4023 return REDIS_ERR
; /* Just to avoid warning */
4026 /*================================== Commands =============================== */
4028 static void authCommand(redisClient
*c
) {
4029 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4030 c
->authenticated
= 1;
4031 addReply(c
,shared
.ok
);
4033 c
->authenticated
= 0;
4034 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4038 static void pingCommand(redisClient
*c
) {
4039 addReply(c
,shared
.pong
);
4042 static void echoCommand(redisClient
*c
) {
4043 addReplyBulk(c
,c
->argv
[1]);
4046 /*=================================== Strings =============================== */
4048 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4050 long seconds
= 0; /* initialized to avoid an harmness warning */
4053 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4056 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4061 if (nx
) deleteIfVolatile(c
->db
,key
);
4062 retval
= dictAdd(c
->db
->dict
,key
,val
);
4063 if (retval
== DICT_ERR
) {
4065 /* If the key is about a swapped value, we want a new key object
4066 * to overwrite the old. So we delete the old key in the database.
4067 * This will also make sure that swap pages about the old object
4068 * will be marked as free. */
4069 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,key
))
4071 dictReplace(c
->db
->dict
,key
,val
);
4074 addReply(c
,shared
.czero
);
4082 removeExpire(c
->db
,key
);
4083 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4084 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4087 static void setCommand(redisClient
*c
) {
4088 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4091 static void setnxCommand(redisClient
*c
) {
4092 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4095 static void setexCommand(redisClient
*c
) {
4096 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4099 static int getGenericCommand(redisClient
*c
) {
4102 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4105 if (o
->type
!= REDIS_STRING
) {
4106 addReply(c
,shared
.wrongtypeerr
);
4114 static void getCommand(redisClient
*c
) {
4115 getGenericCommand(c
);
4118 static void getsetCommand(redisClient
*c
) {
4119 if (getGenericCommand(c
) == REDIS_ERR
) return;
4120 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4121 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4123 incrRefCount(c
->argv
[1]);
4125 incrRefCount(c
->argv
[2]);
4127 removeExpire(c
->db
,c
->argv
[1]);
4130 static void mgetCommand(redisClient
*c
) {
4133 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4134 for (j
= 1; j
< c
->argc
; j
++) {
4135 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4137 addReply(c
,shared
.nullbulk
);
4139 if (o
->type
!= REDIS_STRING
) {
4140 addReply(c
,shared
.nullbulk
);
4148 static void msetGenericCommand(redisClient
*c
, int nx
) {
4149 int j
, busykeys
= 0;
4151 if ((c
->argc
% 2) == 0) {
4152 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4155 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4156 * set nothing at all if at least one already key exists. */
4158 for (j
= 1; j
< c
->argc
; j
+= 2) {
4159 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4165 addReply(c
, shared
.czero
);
4169 for (j
= 1; j
< c
->argc
; j
+= 2) {
4172 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4173 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4174 if (retval
== DICT_ERR
) {
4175 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4176 incrRefCount(c
->argv
[j
+1]);
4178 incrRefCount(c
->argv
[j
]);
4179 incrRefCount(c
->argv
[j
+1]);
4181 removeExpire(c
->db
,c
->argv
[j
]);
4183 server
.dirty
+= (c
->argc
-1)/2;
4184 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4187 static void msetCommand(redisClient
*c
) {
4188 msetGenericCommand(c
,0);
4191 static void msetnxCommand(redisClient
*c
) {
4192 msetGenericCommand(c
,1);
4195 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4200 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4202 if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return;
4205 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4206 o
= tryObjectEncoding(o
);
4207 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4208 if (retval
== DICT_ERR
) {
4209 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4210 removeExpire(c
->db
,c
->argv
[1]);
4212 incrRefCount(c
->argv
[1]);
4215 addReply(c
,shared
.colon
);
4217 addReply(c
,shared
.crlf
);
4220 static void incrCommand(redisClient
*c
) {
4221 incrDecrCommand(c
,1);
4224 static void decrCommand(redisClient
*c
) {
4225 incrDecrCommand(c
,-1);
4228 static void incrbyCommand(redisClient
*c
) {
4231 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4232 incrDecrCommand(c
,incr
);
4235 static void decrbyCommand(redisClient
*c
) {
4238 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4239 incrDecrCommand(c
,-incr
);
4242 static void appendCommand(redisClient
*c
) {
4247 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4249 /* Create the key */
4250 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4251 incrRefCount(c
->argv
[1]);
4252 incrRefCount(c
->argv
[2]);
4253 totlen
= stringObjectLen(c
->argv
[2]);
4257 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4260 o
= dictGetEntryVal(de
);
4261 if (o
->type
!= REDIS_STRING
) {
4262 addReply(c
,shared
.wrongtypeerr
);
4265 /* If the object is specially encoded or shared we have to make
4267 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4268 robj
*decoded
= getDecodedObject(o
);
4270 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4271 decrRefCount(decoded
);
4272 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4275 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4276 o
->ptr
= sdscatlen(o
->ptr
,
4277 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4279 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4280 (unsigned long) c
->argv
[2]->ptr
);
4282 totlen
= sdslen(o
->ptr
);
4285 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4288 static void substrCommand(redisClient
*c
) {
4290 long start
= atoi(c
->argv
[2]->ptr
);
4291 long end
= atoi(c
->argv
[3]->ptr
);
4292 size_t rangelen
, strlen
;
4295 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4296 checkType(c
,o
,REDIS_STRING
)) return;
4298 o
= getDecodedObject(o
);
4299 strlen
= sdslen(o
->ptr
);
4301 /* convert negative indexes */
4302 if (start
< 0) start
= strlen
+start
;
4303 if (end
< 0) end
= strlen
+end
;
4304 if (start
< 0) start
= 0;
4305 if (end
< 0) end
= 0;
4307 /* indexes sanity checks */
4308 if (start
> end
|| (size_t)start
>= strlen
) {
4309 /* Out of range start or start > end result in null reply */
4310 addReply(c
,shared
.nullbulk
);
4314 if ((size_t)end
>= strlen
) end
= strlen
-1;
4315 rangelen
= (end
-start
)+1;
4317 /* Return the result */
4318 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4319 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4320 addReplySds(c
,range
);
4321 addReply(c
,shared
.crlf
);
4325 /* ========================= Type agnostic commands ========================= */
4327 static void delCommand(redisClient
*c
) {
4330 for (j
= 1; j
< c
->argc
; j
++) {
4331 if (deleteKey(c
->db
,c
->argv
[j
])) {
4336 addReplyLong(c
,deleted
);
4339 static void existsCommand(redisClient
*c
) {
4340 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4343 static void selectCommand(redisClient
*c
) {
4344 int id
= atoi(c
->argv
[1]->ptr
);
4346 if (selectDb(c
,id
) == REDIS_ERR
) {
4347 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4349 addReply(c
,shared
.ok
);
4353 static void randomkeyCommand(redisClient
*c
) {
4358 de
= dictGetRandomKey(c
->db
->dict
);
4359 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4363 addReply(c
,shared
.nullbulk
);
4367 key
= dictGetEntryKey(de
);
4368 if (server
.vm_enabled
) {
4369 key
= dupStringObject(key
);
4370 addReplyBulk(c
,key
);
4373 addReplyBulk(c
,key
);
4377 static void keysCommand(redisClient
*c
) {
4380 sds pattern
= c
->argv
[1]->ptr
;
4381 int plen
= sdslen(pattern
);
4382 unsigned long numkeys
= 0;
4383 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4385 di
= dictGetIterator(c
->db
->dict
);
4387 decrRefCount(lenobj
);
4388 while((de
= dictNext(di
)) != NULL
) {
4389 robj
*keyobj
= dictGetEntryKey(de
);
4391 sds key
= keyobj
->ptr
;
4392 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4393 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4394 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4395 addReplyBulk(c
,keyobj
);
4400 dictReleaseIterator(di
);
4401 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4404 static void dbsizeCommand(redisClient
*c
) {
4406 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4409 static void lastsaveCommand(redisClient
*c
) {
4411 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4414 static void typeCommand(redisClient
*c
) {
4418 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4423 case REDIS_STRING
: type
= "+string"; break;
4424 case REDIS_LIST
: type
= "+list"; break;
4425 case REDIS_SET
: type
= "+set"; break;
4426 case REDIS_ZSET
: type
= "+zset"; break;
4427 case REDIS_HASH
: type
= "+hash"; break;
4428 default: type
= "+unknown"; break;
4431 addReplySds(c
,sdsnew(type
));
4432 addReply(c
,shared
.crlf
);
4435 static void saveCommand(redisClient
*c
) {
4436 if (server
.bgsavechildpid
!= -1) {
4437 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4440 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4441 addReply(c
,shared
.ok
);
4443 addReply(c
,shared
.err
);
4447 static void bgsaveCommand(redisClient
*c
) {
4448 if (server
.bgsavechildpid
!= -1) {
4449 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4452 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4453 char *status
= "+Background saving started\r\n";
4454 addReplySds(c
,sdsnew(status
));
4456 addReply(c
,shared
.err
);
4460 static void shutdownCommand(redisClient
*c
) {
4461 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4462 /* Kill the saving child if there is a background saving in progress.
4463 We want to avoid race conditions, for instance our saving child may
4464 overwrite the synchronous saving did by SHUTDOWN. */
4465 if (server
.bgsavechildpid
!= -1) {
4466 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4467 kill(server
.bgsavechildpid
,SIGKILL
);
4468 rdbRemoveTempFile(server
.bgsavechildpid
);
4470 if (server
.appendonly
) {
4471 /* Append only file: fsync() the AOF and exit */
4472 fsync(server
.appendfd
);
4473 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4476 /* Snapshotting. Perform a SYNC SAVE and exit */
4477 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4478 if (server
.daemonize
)
4479 unlink(server
.pidfile
);
4480 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4481 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4482 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4485 /* Ooops.. error saving! The best we can do is to continue
4486 * operating. Note that if there was a background saving process,
4487 * in the next cron() Redis will be notified that the background
4488 * saving aborted, handling special stuff like slaves pending for
4489 * synchronization... */
4490 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4492 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4497 static void renameGenericCommand(redisClient
*c
, int nx
) {
4500 /* To use the same key as src and dst is probably an error */
4501 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4502 addReply(c
,shared
.sameobjecterr
);
4506 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4510 deleteIfVolatile(c
->db
,c
->argv
[2]);
4511 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4514 addReply(c
,shared
.czero
);
4517 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4519 incrRefCount(c
->argv
[2]);
4521 deleteKey(c
->db
,c
->argv
[1]);
4523 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4526 static void renameCommand(redisClient
*c
) {
4527 renameGenericCommand(c
,0);
4530 static void renamenxCommand(redisClient
*c
) {
4531 renameGenericCommand(c
,1);
4534 static void moveCommand(redisClient
*c
) {
4539 /* Obtain source and target DB pointers */
4542 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4543 addReply(c
,shared
.outofrangeerr
);
4547 selectDb(c
,srcid
); /* Back to the source DB */
4549 /* If the user is moving using as target the same
4550 * DB as the source DB it is probably an error. */
4552 addReply(c
,shared
.sameobjecterr
);
4556 /* Check if the element exists and get a reference */
4557 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4559 addReply(c
,shared
.czero
);
4563 /* Try to add the element to the target DB */
4564 deleteIfVolatile(dst
,c
->argv
[1]);
4565 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4566 addReply(c
,shared
.czero
);
4569 incrRefCount(c
->argv
[1]);
4572 /* OK! key moved, free the entry in the source DB */
4573 deleteKey(src
,c
->argv
[1]);
4575 addReply(c
,shared
.cone
);
4578 /* =================================== Lists ================================ */
4579 static void pushGenericCommand(redisClient
*c
, int where
) {
4583 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4585 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4586 addReply(c
,shared
.cone
);
4589 lobj
= createListObject();
4591 if (where
== REDIS_HEAD
) {
4592 listAddNodeHead(list
,c
->argv
[2]);
4594 listAddNodeTail(list
,c
->argv
[2]);
4596 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4597 incrRefCount(c
->argv
[1]);
4598 incrRefCount(c
->argv
[2]);
4600 if (lobj
->type
!= REDIS_LIST
) {
4601 addReply(c
,shared
.wrongtypeerr
);
4604 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4605 addReply(c
,shared
.cone
);
4609 if (where
== REDIS_HEAD
) {
4610 listAddNodeHead(list
,c
->argv
[2]);
4612 listAddNodeTail(list
,c
->argv
[2]);
4614 incrRefCount(c
->argv
[2]);
4617 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4620 static void lpushCommand(redisClient
*c
) {
4621 pushGenericCommand(c
,REDIS_HEAD
);
4624 static void rpushCommand(redisClient
*c
) {
4625 pushGenericCommand(c
,REDIS_TAIL
);
4628 static void llenCommand(redisClient
*c
) {
4632 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4633 checkType(c
,o
,REDIS_LIST
)) return;
4636 addReplyUlong(c
,listLength(l
));
4639 static void lindexCommand(redisClient
*c
) {
4641 int index
= atoi(c
->argv
[2]->ptr
);
4645 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4646 checkType(c
,o
,REDIS_LIST
)) return;
4649 ln
= listIndex(list
, index
);
4651 addReply(c
,shared
.nullbulk
);
4653 robj
*ele
= listNodeValue(ln
);
4654 addReplyBulk(c
,ele
);
4658 static void lsetCommand(redisClient
*c
) {
4660 int index
= atoi(c
->argv
[2]->ptr
);
4664 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4665 checkType(c
,o
,REDIS_LIST
)) return;
4668 ln
= listIndex(list
, index
);
4670 addReply(c
,shared
.outofrangeerr
);
4672 robj
*ele
= listNodeValue(ln
);
4675 listNodeValue(ln
) = c
->argv
[3];
4676 incrRefCount(c
->argv
[3]);
4677 addReply(c
,shared
.ok
);
4682 static void popGenericCommand(redisClient
*c
, int where
) {
4687 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4688 checkType(c
,o
,REDIS_LIST
)) return;
4691 if (where
== REDIS_HEAD
)
4692 ln
= listFirst(list
);
4694 ln
= listLast(list
);
4697 addReply(c
,shared
.nullbulk
);
4699 robj
*ele
= listNodeValue(ln
);
4700 addReplyBulk(c
,ele
);
4701 listDelNode(list
,ln
);
4702 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4707 static void lpopCommand(redisClient
*c
) {
4708 popGenericCommand(c
,REDIS_HEAD
);
4711 static void rpopCommand(redisClient
*c
) {
4712 popGenericCommand(c
,REDIS_TAIL
);
4715 static void lrangeCommand(redisClient
*c
) {
4717 int start
= atoi(c
->argv
[2]->ptr
);
4718 int end
= atoi(c
->argv
[3]->ptr
);
4725 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4726 || checkType(c
,o
,REDIS_LIST
)) return;
4728 llen
= listLength(list
);
4730 /* convert negative indexes */
4731 if (start
< 0) start
= llen
+start
;
4732 if (end
< 0) end
= llen
+end
;
4733 if (start
< 0) start
= 0;
4734 if (end
< 0) end
= 0;
4736 /* indexes sanity checks */
4737 if (start
> end
|| start
>= llen
) {
4738 /* Out of range start or start > end result in empty list */
4739 addReply(c
,shared
.emptymultibulk
);
4742 if (end
>= llen
) end
= llen
-1;
4743 rangelen
= (end
-start
)+1;
4745 /* Return the result in form of a multi-bulk reply */
4746 ln
= listIndex(list
, start
);
4747 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4748 for (j
= 0; j
< rangelen
; j
++) {
4749 ele
= listNodeValue(ln
);
4750 addReplyBulk(c
,ele
);
4755 static void ltrimCommand(redisClient
*c
) {
4757 int start
= atoi(c
->argv
[2]->ptr
);
4758 int end
= atoi(c
->argv
[3]->ptr
);
4760 int j
, ltrim
, rtrim
;
4764 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4765 checkType(c
,o
,REDIS_LIST
)) return;
4767 llen
= listLength(list
);
4769 /* convert negative indexes */
4770 if (start
< 0) start
= llen
+start
;
4771 if (end
< 0) end
= llen
+end
;
4772 if (start
< 0) start
= 0;
4773 if (end
< 0) end
= 0;
4775 /* indexes sanity checks */
4776 if (start
> end
|| start
>= llen
) {
4777 /* Out of range start or start > end result in empty list */
4781 if (end
>= llen
) end
= llen
-1;
4786 /* Remove list elements to perform the trim */
4787 for (j
= 0; j
< ltrim
; j
++) {
4788 ln
= listFirst(list
);
4789 listDelNode(list
,ln
);
4791 for (j
= 0; j
< rtrim
; j
++) {
4792 ln
= listLast(list
);
4793 listDelNode(list
,ln
);
4795 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4797 addReply(c
,shared
.ok
);
4800 static void lremCommand(redisClient
*c
) {
4803 listNode
*ln
, *next
;
4804 int toremove
= atoi(c
->argv
[2]->ptr
);
4808 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4809 checkType(c
,o
,REDIS_LIST
)) return;
4813 toremove
= -toremove
;
4816 ln
= fromtail
? list
->tail
: list
->head
;
4818 robj
*ele
= listNodeValue(ln
);
4820 next
= fromtail
? ln
->prev
: ln
->next
;
4821 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4822 listDelNode(list
,ln
);
4825 if (toremove
&& removed
== toremove
) break;
4829 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4830 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4833 /* This is the semantic of this command:
4834 * RPOPLPUSH srclist dstlist:
4835 * IF LLEN(srclist) > 0
4836 * element = RPOP srclist
4837 * LPUSH dstlist element
4844 * The idea is to be able to get an element from a list in a reliable way
4845 * since the element is not just returned but pushed against another list
4846 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4848 static void rpoplpushcommand(redisClient
*c
) {
4853 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4854 checkType(c
,sobj
,REDIS_LIST
)) return;
4855 srclist
= sobj
->ptr
;
4856 ln
= listLast(srclist
);
4859 addReply(c
,shared
.nullbulk
);
4861 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4862 robj
*ele
= listNodeValue(ln
);
4865 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4866 addReply(c
,shared
.wrongtypeerr
);
4870 /* Add the element to the target list (unless it's directly
4871 * passed to some BLPOP-ing client */
4872 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4874 /* Create the list if the key does not exist */
4875 dobj
= createListObject();
4876 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4877 incrRefCount(c
->argv
[2]);
4879 dstlist
= dobj
->ptr
;
4880 listAddNodeHead(dstlist
,ele
);
4884 /* Send the element to the client as reply as well */
4885 addReplyBulk(c
,ele
);
4887 /* Finally remove the element from the source list */
4888 listDelNode(srclist
,ln
);
4889 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4894 /* ==================================== Sets ================================ */
4896 static void saddCommand(redisClient
*c
) {
4899 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4901 set
= createSetObject();
4902 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4903 incrRefCount(c
->argv
[1]);
4905 if (set
->type
!= REDIS_SET
) {
4906 addReply(c
,shared
.wrongtypeerr
);
4910 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4911 incrRefCount(c
->argv
[2]);
4913 addReply(c
,shared
.cone
);
4915 addReply(c
,shared
.czero
);
4919 static void sremCommand(redisClient
*c
) {
4922 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4923 checkType(c
,set
,REDIS_SET
)) return;
4925 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4927 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4928 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4929 addReply(c
,shared
.cone
);
4931 addReply(c
,shared
.czero
);
4935 static void smoveCommand(redisClient
*c
) {
4936 robj
*srcset
, *dstset
;
4938 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4939 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4941 /* If the source key does not exist return 0, if it's of the wrong type
4943 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4944 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4947 /* Error if the destination key is not a set as well */
4948 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4949 addReply(c
,shared
.wrongtypeerr
);
4952 /* Remove the element from the source set */
4953 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4954 /* Key not found in the src set! return zero */
4955 addReply(c
,shared
.czero
);
4958 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4959 deleteKey(c
->db
,c
->argv
[1]);
4961 /* Add the element to the destination set */
4963 dstset
= createSetObject();
4964 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4965 incrRefCount(c
->argv
[2]);
4967 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4968 incrRefCount(c
->argv
[3]);
4969 addReply(c
,shared
.cone
);
4972 static void sismemberCommand(redisClient
*c
) {
4975 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4976 checkType(c
,set
,REDIS_SET
)) return;
4978 if (dictFind(set
->ptr
,c
->argv
[2]))
4979 addReply(c
,shared
.cone
);
4981 addReply(c
,shared
.czero
);
4984 static void scardCommand(redisClient
*c
) {
4988 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4989 checkType(c
,o
,REDIS_SET
)) return;
4992 addReplyUlong(c
,dictSize(s
));
4995 static void spopCommand(redisClient
*c
) {
4999 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5000 checkType(c
,set
,REDIS_SET
)) return;
5002 de
= dictGetRandomKey(set
->ptr
);
5004 addReply(c
,shared
.nullbulk
);
5006 robj
*ele
= dictGetEntryKey(de
);
5008 addReplyBulk(c
,ele
);
5009 dictDelete(set
->ptr
,ele
);
5010 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5011 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5016 static void srandmemberCommand(redisClient
*c
) {
5020 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5021 checkType(c
,set
,REDIS_SET
)) return;
5023 de
= dictGetRandomKey(set
->ptr
);
5025 addReply(c
,shared
.nullbulk
);
5027 robj
*ele
= dictGetEntryKey(de
);
5029 addReplyBulk(c
,ele
);
5033 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5034 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5036 return dictSize(*d1
)-dictSize(*d2
);
5039 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5040 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5043 robj
*lenobj
= NULL
, *dstset
= NULL
;
5044 unsigned long j
, cardinality
= 0;
5046 for (j
= 0; j
< setsnum
; j
++) {
5050 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5051 lookupKeyRead(c
->db
,setskeys
[j
]);
5055 if (deleteKey(c
->db
,dstkey
))
5057 addReply(c
,shared
.czero
);
5059 addReply(c
,shared
.emptymultibulk
);
5063 if (setobj
->type
!= REDIS_SET
) {
5065 addReply(c
,shared
.wrongtypeerr
);
5068 dv
[j
] = setobj
->ptr
;
5070 /* Sort sets from the smallest to largest, this will improve our
5071 * algorithm's performace */
5072 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5074 /* The first thing we should output is the total number of elements...
5075 * since this is a multi-bulk write, but at this stage we don't know
5076 * the intersection set size, so we use a trick, append an empty object
5077 * to the output list and save the pointer to later modify it with the
5080 lenobj
= createObject(REDIS_STRING
,NULL
);
5082 decrRefCount(lenobj
);
5084 /* If we have a target key where to store the resulting set
5085 * create this key with an empty set inside */
5086 dstset
= createSetObject();
5089 /* Iterate all the elements of the first (smallest) set, and test
5090 * the element against all the other sets, if at least one set does
5091 * not include the element it is discarded */
5092 di
= dictGetIterator(dv
[0]);
5094 while((de
= dictNext(di
)) != NULL
) {
5097 for (j
= 1; j
< setsnum
; j
++)
5098 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5100 continue; /* at least one set does not contain the member */
5101 ele
= dictGetEntryKey(de
);
5103 addReplyBulk(c
,ele
);
5106 dictAdd(dstset
->ptr
,ele
,NULL
);
5110 dictReleaseIterator(di
);
5113 /* Store the resulting set into the target, if the intersection
5114 * is not an empty set. */
5115 deleteKey(c
->db
,dstkey
);
5116 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5117 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5118 incrRefCount(dstkey
);
5119 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5121 decrRefCount(dstset
);
5122 addReply(c
,shared
.czero
);
5126 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5131 static void sinterCommand(redisClient
*c
) {
5132 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5135 static void sinterstoreCommand(redisClient
*c
) {
5136 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5139 #define REDIS_OP_UNION 0
5140 #define REDIS_OP_DIFF 1
5141 #define REDIS_OP_INTER 2
5143 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5144 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5147 robj
*dstset
= NULL
;
5148 int j
, cardinality
= 0;
5150 for (j
= 0; j
< setsnum
; j
++) {
5154 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5155 lookupKeyRead(c
->db
,setskeys
[j
]);
5160 if (setobj
->type
!= REDIS_SET
) {
5162 addReply(c
,shared
.wrongtypeerr
);
5165 dv
[j
] = setobj
->ptr
;
5168 /* We need a temp set object to store our union. If the dstkey
5169 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5170 * this set object will be the resulting object to set into the target key*/
5171 dstset
= createSetObject();
5173 /* Iterate all the elements of all the sets, add every element a single
5174 * time to the result set */
5175 for (j
= 0; j
< setsnum
; j
++) {
5176 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5177 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5179 di
= dictGetIterator(dv
[j
]);
5181 while((de
= dictNext(di
)) != NULL
) {
5184 /* dictAdd will not add the same element multiple times */
5185 ele
= dictGetEntryKey(de
);
5186 if (op
== REDIS_OP_UNION
|| j
== 0) {
5187 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5191 } else if (op
== REDIS_OP_DIFF
) {
5192 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5197 dictReleaseIterator(di
);
5199 /* result set is empty? Exit asap. */
5200 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5203 /* Output the content of the resulting set, if not in STORE mode */
5205 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5206 di
= dictGetIterator(dstset
->ptr
);
5207 while((de
= dictNext(di
)) != NULL
) {
5210 ele
= dictGetEntryKey(de
);
5211 addReplyBulk(c
,ele
);
5213 dictReleaseIterator(di
);
5214 decrRefCount(dstset
);
5216 /* If we have a target key where to store the resulting set
5217 * create this key with the result set inside */
5218 deleteKey(c
->db
,dstkey
);
5219 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5220 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5221 incrRefCount(dstkey
);
5222 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5224 decrRefCount(dstset
);
5225 addReply(c
,shared
.czero
);
5232 static void sunionCommand(redisClient
*c
) {
5233 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5236 static void sunionstoreCommand(redisClient
*c
) {
5237 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5240 static void sdiffCommand(redisClient
*c
) {
5241 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5244 static void sdiffstoreCommand(redisClient
*c
) {
5245 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5248 /* ==================================== ZSets =============================== */
5250 /* ZSETs are ordered sets using two data structures to hold the same elements
5251 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5254 * The elements are added to an hash table mapping Redis objects to scores.
5255 * At the same time the elements are added to a skip list mapping scores
5256 * to Redis objects (so objects are sorted by scores in this "view"). */
5258 /* This skiplist implementation is almost a C translation of the original
5259 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5260 * Alternative to Balanced Trees", modified in three ways:
5261 * a) this implementation allows for repeated values.
5262 * b) the comparison is not just by key (our 'score') but by satellite data.
5263 * c) there is a back pointer, so it's a doubly linked list with the back
5264 * pointers being only at "level 1". This allows to traverse the list
5265 * from tail to head, useful for ZREVRANGE. */
5267 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5268 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5270 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5272 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5278 static zskiplist
*zslCreate(void) {
5282 zsl
= zmalloc(sizeof(*zsl
));
5285 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5286 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5287 zsl
->header
->forward
[j
] = NULL
;
5289 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5290 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5291 zsl
->header
->span
[j
] = 0;
5293 zsl
->header
->backward
= NULL
;
5298 static void zslFreeNode(zskiplistNode
*node
) {
5299 decrRefCount(node
->obj
);
5300 zfree(node
->forward
);
5305 static void zslFree(zskiplist
*zsl
) {
5306 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5308 zfree(zsl
->header
->forward
);
5309 zfree(zsl
->header
->span
);
5312 next
= node
->forward
[0];
5319 static int zslRandomLevel(void) {
5321 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5323 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5326 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5327 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5328 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5332 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5333 /* store rank that is crossed to reach the insert position */
5334 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5336 while (x
->forward
[i
] &&
5337 (x
->forward
[i
]->score
< score
||
5338 (x
->forward
[i
]->score
== score
&&
5339 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5340 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5345 /* we assume the key is not already inside, since we allow duplicated
5346 * scores, and the re-insertion of score and redis object should never
5347 * happpen since the caller of zslInsert() should test in the hash table
5348 * if the element is already inside or not. */
5349 level
= zslRandomLevel();
5350 if (level
> zsl
->level
) {
5351 for (i
= zsl
->level
; i
< level
; i
++) {
5353 update
[i
] = zsl
->header
;
5354 update
[i
]->span
[i
-1] = zsl
->length
;
5358 x
= zslCreateNode(level
,score
,obj
);
5359 for (i
= 0; i
< level
; i
++) {
5360 x
->forward
[i
] = update
[i
]->forward
[i
];
5361 update
[i
]->forward
[i
] = x
;
5363 /* update span covered by update[i] as x is inserted here */
5365 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5366 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5370 /* increment span for untouched levels */
5371 for (i
= level
; i
< zsl
->level
; i
++) {
5372 update
[i
]->span
[i
-1]++;
5375 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5377 x
->forward
[0]->backward
= x
;
5383 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5384 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5386 for (i
= 0; i
< zsl
->level
; i
++) {
5387 if (update
[i
]->forward
[i
] == x
) {
5389 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5391 update
[i
]->forward
[i
] = x
->forward
[i
];
5393 /* invariant: i > 0, because update[0]->forward[0]
5394 * is always equal to x */
5395 update
[i
]->span
[i
-1] -= 1;
5398 if (x
->forward
[0]) {
5399 x
->forward
[0]->backward
= x
->backward
;
5401 zsl
->tail
= x
->backward
;
5403 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5408 /* Delete an element with matching score/object from the skiplist. */
5409 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5410 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5414 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5415 while (x
->forward
[i
] &&
5416 (x
->forward
[i
]->score
< score
||
5417 (x
->forward
[i
]->score
== score
&&
5418 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5422 /* We may have multiple elements with the same score, what we need
5423 * is to find the element with both the right score and object. */
5425 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5426 zslDeleteNode(zsl
, x
, update
);
5430 return 0; /* not found */
5432 return 0; /* not found */
5435 /* Delete all the elements with score between min and max from the skiplist.
5436 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5437 * Note that this function takes the reference to the hash table view of the
5438 * sorted set, in order to remove the elements from the hash table too. */
5439 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5440 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5441 unsigned long removed
= 0;
5445 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5446 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5450 /* We may have multiple elements with the same score, what we need
5451 * is to find the element with both the right score and object. */
5453 while (x
&& x
->score
<= max
) {
5454 zskiplistNode
*next
= x
->forward
[0];
5455 zslDeleteNode(zsl
, x
, update
);
5456 dictDelete(dict
,x
->obj
);
5461 return removed
; /* not found */
5464 /* Delete all the elements with rank between start and end from the skiplist.
5465 * Start and end are inclusive. Note that start and end need to be 1-based */
5466 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5467 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5468 unsigned long traversed
= 0, removed
= 0;
5472 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5473 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5474 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5482 while (x
&& traversed
<= end
) {
5483 zskiplistNode
*next
= x
->forward
[0];
5484 zslDeleteNode(zsl
, x
, update
);
5485 dictDelete(dict
,x
->obj
);
5494 /* Find the first node having a score equal or greater than the specified one.
5495 * Returns NULL if there is no match. */
5496 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5501 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5502 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5505 /* We may have multiple elements with the same score, what we need
5506 * is to find the element with both the right score and object. */
5507 return x
->forward
[0];
5510 /* Find the rank for an element by both score and key.
5511 * Returns 0 when the element cannot be found, rank otherwise.
5512 * Note that the rank is 1-based due to the span of zsl->header to the
5514 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5516 unsigned long rank
= 0;
5520 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5521 while (x
->forward
[i
] &&
5522 (x
->forward
[i
]->score
< score
||
5523 (x
->forward
[i
]->score
== score
&&
5524 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5525 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5529 /* x might be equal to zsl->header, so test if obj is non-NULL */
5530 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5537 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5538 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5540 unsigned long traversed
= 0;
5544 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5545 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5547 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5550 if (traversed
== rank
) {
5557 /* The actual Z-commands implementations */
5559 /* This generic command implements both ZADD and ZINCRBY.
5560 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5561 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5562 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5567 zsetobj
= lookupKeyWrite(c
->db
,key
);
5568 if (zsetobj
== NULL
) {
5569 zsetobj
= createZsetObject();
5570 dictAdd(c
->db
->dict
,key
,zsetobj
);
5573 if (zsetobj
->type
!= REDIS_ZSET
) {
5574 addReply(c
,shared
.wrongtypeerr
);
5580 /* Ok now since we implement both ZADD and ZINCRBY here the code
5581 * needs to handle the two different conditions. It's all about setting
5582 * '*score', that is, the new score to set, to the right value. */
5583 score
= zmalloc(sizeof(double));
5587 /* Read the old score. If the element was not present starts from 0 */
5588 de
= dictFind(zs
->dict
,ele
);
5590 double *oldscore
= dictGetEntryVal(de
);
5591 *score
= *oldscore
+ scoreval
;
5599 /* What follows is a simple remove and re-insert operation that is common
5600 * to both ZADD and ZINCRBY... */
5601 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5602 /* case 1: New element */
5603 incrRefCount(ele
); /* added to hash */
5604 zslInsert(zs
->zsl
,*score
,ele
);
5605 incrRefCount(ele
); /* added to skiplist */
5608 addReplyDouble(c
,*score
);
5610 addReply(c
,shared
.cone
);
5615 /* case 2: Score update operation */
5616 de
= dictFind(zs
->dict
,ele
);
5617 redisAssert(de
!= NULL
);
5618 oldscore
= dictGetEntryVal(de
);
5619 if (*score
!= *oldscore
) {
5622 /* Remove and insert the element in the skip list with new score */
5623 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5624 redisAssert(deleted
!= 0);
5625 zslInsert(zs
->zsl
,*score
,ele
);
5627 /* Update the score in the hash table */
5628 dictReplace(zs
->dict
,ele
,score
);
5634 addReplyDouble(c
,*score
);
5636 addReply(c
,shared
.czero
);
5640 static void zaddCommand(redisClient
*c
) {
5643 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5644 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5647 static void zincrbyCommand(redisClient
*c
) {
5650 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5651 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5654 static void zremCommand(redisClient
*c
) {
5661 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5662 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5665 de
= dictFind(zs
->dict
,c
->argv
[2]);
5667 addReply(c
,shared
.czero
);
5670 /* Delete from the skiplist */
5671 oldscore
= dictGetEntryVal(de
);
5672 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5673 redisAssert(deleted
!= 0);
5675 /* Delete from the hash table */
5676 dictDelete(zs
->dict
,c
->argv
[2]);
5677 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5678 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5680 addReply(c
,shared
.cone
);
5683 static void zremrangebyscoreCommand(redisClient
*c
) {
5690 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5691 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5693 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5694 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5697 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5698 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5699 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5700 server
.dirty
+= deleted
;
5701 addReplyLong(c
,deleted
);
5704 static void zremrangebyrankCommand(redisClient
*c
) {
5712 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5713 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5715 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5716 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5718 llen
= zs
->zsl
->length
;
5720 /* convert negative indexes */
5721 if (start
< 0) start
= llen
+start
;
5722 if (end
< 0) end
= llen
+end
;
5723 if (start
< 0) start
= 0;
5724 if (end
< 0) end
= 0;
5726 /* indexes sanity checks */
5727 if (start
> end
|| start
>= llen
) {
5728 addReply(c
,shared
.czero
);
5731 if (end
>= llen
) end
= llen
-1;
5733 /* increment start and end because zsl*Rank functions
5734 * use 1-based rank */
5735 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5736 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5737 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5738 server
.dirty
+= deleted
;
5739 addReplyLong(c
, deleted
);
5747 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5748 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5749 unsigned long size1
, size2
;
5750 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5751 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5752 return size1
- size2
;
5755 #define REDIS_AGGR_SUM 1
5756 #define REDIS_AGGR_MIN 2
5757 #define REDIS_AGGR_MAX 3
5759 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5760 if (aggregate
== REDIS_AGGR_SUM
) {
5761 *target
= *target
+ val
;
5762 } else if (aggregate
== REDIS_AGGR_MIN
) {
5763 *target
= val
< *target
? val
: *target
;
5764 } else if (aggregate
== REDIS_AGGR_MAX
) {
5765 *target
= val
> *target
? val
: *target
;
5768 redisPanic("Unknown ZUNION/INTER aggregate type");
5772 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5774 int aggregate
= REDIS_AGGR_SUM
;
5781 /* expect zsetnum input keys to be given */
5782 zsetnum
= atoi(c
->argv
[2]->ptr
);
5784 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5788 /* test if the expected number of keys would overflow */
5789 if (3+zsetnum
> c
->argc
) {
5790 addReply(c
,shared
.syntaxerr
);
5794 /* read keys to be used for input */
5795 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5796 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5797 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5801 if (zsetobj
->type
!= REDIS_ZSET
) {
5803 addReply(c
,shared
.wrongtypeerr
);
5806 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5809 /* default all weights to 1 */
5810 src
[i
].weight
= 1.0;
5813 /* parse optional extra arguments */
5815 int remaining
= c
->argc
- j
;
5818 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5820 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5821 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5824 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5826 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5827 aggregate
= REDIS_AGGR_SUM
;
5828 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5829 aggregate
= REDIS_AGGR_MIN
;
5830 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5831 aggregate
= REDIS_AGGR_MAX
;
5834 addReply(c
,shared
.syntaxerr
);
5840 addReply(c
,shared
.syntaxerr
);
5846 /* sort sets from the smallest to largest, this will improve our
5847 * algorithm's performance */
5848 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5850 dstobj
= createZsetObject();
5851 dstzset
= dstobj
->ptr
;
5853 if (op
== REDIS_OP_INTER
) {
5854 /* skip going over all entries if the smallest zset is NULL or empty */
5855 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5856 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5857 * from small to large, all src[i > 0].dict are non-empty too */
5858 di
= dictGetIterator(src
[0].dict
);
5859 while((de
= dictNext(di
)) != NULL
) {
5860 double *score
= zmalloc(sizeof(double)), value
;
5861 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5863 for (j
= 1; j
< zsetnum
; j
++) {
5864 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5866 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5867 zunionInterAggregate(score
, value
, aggregate
);
5873 /* skip entry when not present in every source dict */
5877 robj
*o
= dictGetEntryKey(de
);
5878 dictAdd(dstzset
->dict
,o
,score
);
5879 incrRefCount(o
); /* added to dictionary */
5880 zslInsert(dstzset
->zsl
,*score
,o
);
5881 incrRefCount(o
); /* added to skiplist */
5884 dictReleaseIterator(di
);
5886 } else if (op
== REDIS_OP_UNION
) {
5887 for (i
= 0; i
< zsetnum
; i
++) {
5888 if (!src
[i
].dict
) continue;
5890 di
= dictGetIterator(src
[i
].dict
);
5891 while((de
= dictNext(di
)) != NULL
) {
5892 /* skip key when already processed */
5893 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5895 double *score
= zmalloc(sizeof(double)), value
;
5896 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5898 /* because the zsets are sorted by size, its only possible
5899 * for sets at larger indices to hold this entry */
5900 for (j
= (i
+1); j
< zsetnum
; j
++) {
5901 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5903 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5904 zunionInterAggregate(score
, value
, aggregate
);
5908 robj
*o
= dictGetEntryKey(de
);
5909 dictAdd(dstzset
->dict
,o
,score
);
5910 incrRefCount(o
); /* added to dictionary */
5911 zslInsert(dstzset
->zsl
,*score
,o
);
5912 incrRefCount(o
); /* added to skiplist */
5914 dictReleaseIterator(di
);
5917 /* unknown operator */
5918 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5921 deleteKey(c
->db
,dstkey
);
5922 if (dstzset
->zsl
->length
) {
5923 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5924 incrRefCount(dstkey
);
5925 addReplyLong(c
, dstzset
->zsl
->length
);
5928 decrRefCount(dstobj
);
5929 addReply(c
, shared
.czero
);
5934 static void zunionCommand(redisClient
*c
) {
5935 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5938 static void zinterCommand(redisClient
*c
) {
5939 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5942 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5954 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5955 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5957 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5959 } else if (c
->argc
>= 5) {
5960 addReply(c
,shared
.syntaxerr
);
5964 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5965 || checkType(c
,o
,REDIS_ZSET
)) return;
5970 /* convert negative indexes */
5971 if (start
< 0) start
= llen
+start
;
5972 if (end
< 0) end
= llen
+end
;
5973 if (start
< 0) start
= 0;
5974 if (end
< 0) end
= 0;
5976 /* indexes sanity checks */
5977 if (start
> end
|| start
>= llen
) {
5978 /* Out of range start or start > end result in empty list */
5979 addReply(c
,shared
.emptymultibulk
);
5982 if (end
>= llen
) end
= llen
-1;
5983 rangelen
= (end
-start
)+1;
5985 /* check if starting point is trivial, before searching
5986 * the element in log(N) time */
5988 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5991 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5994 /* Return the result in form of a multi-bulk reply */
5995 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5996 withscores
? (rangelen
*2) : rangelen
));
5997 for (j
= 0; j
< rangelen
; j
++) {
5999 addReplyBulk(c
,ele
);
6001 addReplyDouble(c
,ln
->score
);
6002 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6006 static void zrangeCommand(redisClient
*c
) {
6007 zrangeGenericCommand(c
,0);
6010 static void zrevrangeCommand(redisClient
*c
) {
6011 zrangeGenericCommand(c
,1);
6014 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6015 * If justcount is non-zero, just the count is returned. */
6016 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6019 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6020 int offset
= 0, limit
= -1;
6024 /* Parse the min-max interval. If one of the values is prefixed
6025 * by the "(" character, it's considered "open". For instance
6026 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6027 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6028 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6029 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6032 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6034 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6035 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6038 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6041 /* Parse "WITHSCORES": note that if the command was called with
6042 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6043 * enter the following paths to parse WITHSCORES and LIMIT. */
6044 if (c
->argc
== 5 || c
->argc
== 8) {
6045 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6050 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6054 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6059 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6060 addReply(c
,shared
.syntaxerr
);
6062 } else if (c
->argc
== (7 + withscores
)) {
6063 offset
= atoi(c
->argv
[5]->ptr
);
6064 limit
= atoi(c
->argv
[6]->ptr
);
6065 if (offset
< 0) offset
= 0;
6068 /* Ok, lookup the key and get the range */
6069 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6071 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6073 if (o
->type
!= REDIS_ZSET
) {
6074 addReply(c
,shared
.wrongtypeerr
);
6076 zset
*zsetobj
= o
->ptr
;
6077 zskiplist
*zsl
= zsetobj
->zsl
;
6079 robj
*ele
, *lenobj
= NULL
;
6080 unsigned long rangelen
= 0;
6082 /* Get the first node with the score >= min, or with
6083 * score > min if 'minex' is true. */
6084 ln
= zslFirstWithScore(zsl
,min
);
6085 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6088 /* No element matching the speciifed interval */
6089 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6093 /* We don't know in advance how many matching elements there
6094 * are in the list, so we push this object that will represent
6095 * the multi-bulk length in the output buffer, and will "fix"
6098 lenobj
= createObject(REDIS_STRING
,NULL
);
6100 decrRefCount(lenobj
);
6103 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6106 ln
= ln
->forward
[0];
6109 if (limit
== 0) break;
6112 addReplyBulk(c
,ele
);
6114 addReplyDouble(c
,ln
->score
);
6116 ln
= ln
->forward
[0];
6118 if (limit
> 0) limit
--;
6121 addReplyLong(c
,(long)rangelen
);
6123 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6124 withscores
? (rangelen
*2) : rangelen
);
6130 static void zrangebyscoreCommand(redisClient
*c
) {
6131 genericZrangebyscoreCommand(c
,0);
6134 static void zcountCommand(redisClient
*c
) {
6135 genericZrangebyscoreCommand(c
,1);
6138 static void zcardCommand(redisClient
*c
) {
6142 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6143 checkType(c
,o
,REDIS_ZSET
)) return;
6146 addReplyUlong(c
,zs
->zsl
->length
);
6149 static void zscoreCommand(redisClient
*c
) {
6154 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6155 checkType(c
,o
,REDIS_ZSET
)) return;
6158 de
= dictFind(zs
->dict
,c
->argv
[2]);
6160 addReply(c
,shared
.nullbulk
);
6162 double *score
= dictGetEntryVal(de
);
6164 addReplyDouble(c
,*score
);
6168 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6176 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6177 checkType(c
,o
,REDIS_ZSET
)) return;
6181 de
= dictFind(zs
->dict
,c
->argv
[2]);
6183 addReply(c
,shared
.nullbulk
);
6187 score
= dictGetEntryVal(de
);
6188 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6191 addReplyLong(c
, zsl
->length
- rank
);
6193 addReplyLong(c
, rank
-1);
6196 addReply(c
,shared
.nullbulk
);
6200 static void zrankCommand(redisClient
*c
) {
6201 zrankGenericCommand(c
, 0);
6204 static void zrevrankCommand(redisClient
*c
) {
6205 zrankGenericCommand(c
, 1);
6208 /* ========================= Hashes utility functions ======================= */
6209 #define REDIS_HASH_KEY 1
6210 #define REDIS_HASH_VALUE 2
6212 /* Check the length of a number of objects to see if we need to convert a
6213 * zipmap to a real hash. Note that we only check string encoded objects
6214 * as their string length can be queried in constant time. */
6215 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6217 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6219 for (i
= start
; i
<= end
; i
++) {
6220 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6221 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6223 convertToRealHash(subject
);
6229 /* Encode given objects in-place when the hash uses a dict. */
6230 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6231 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6232 if (o1
) *o1
= tryObjectEncoding(*o1
);
6233 if (o2
) *o2
= tryObjectEncoding(*o2
);
6237 /* Get the value from a hash identified by key. Returns either a string
6238 * object or NULL if the value cannot be found. The refcount of the object
6239 * is always increased by 1 when the value was found. */
6240 static robj
*hashGet(robj
*o
, robj
*key
) {
6242 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6245 key
= getDecodedObject(key
);
6246 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6247 value
= createStringObject((char*)v
,vlen
);
6251 dictEntry
*de
= dictFind(o
->ptr
,key
);
6253 value
= dictGetEntryVal(de
);
6254 incrRefCount(value
);
6260 /* Test if the key exists in the given hash. Returns 1 if the key
6261 * exists and 0 when it doesn't. */
6262 static int hashExists(robj
*o
, robj
*key
) {
6263 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6264 key
= getDecodedObject(key
);
6265 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6271 if (dictFind(o
->ptr
,key
) != NULL
) {
6278 /* Add an element, discard the old if the key already exists.
6279 * Return 0 on insert and 1 on update. */
6280 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6282 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6283 key
= getDecodedObject(key
);
6284 value
= getDecodedObject(value
);
6285 o
->ptr
= zipmapSet(o
->ptr
,
6286 key
->ptr
,sdslen(key
->ptr
),
6287 value
->ptr
,sdslen(value
->ptr
), &update
);
6289 decrRefCount(value
);
6291 /* Check if the zipmap needs to be upgraded to a real hash table */
6292 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6293 convertToRealHash(o
);
6295 if (dictReplace(o
->ptr
,key
,value
)) {
6302 incrRefCount(value
);
6307 /* Delete an element from a hash.
6308 * Return 1 on deleted and 0 on not found. */
6309 static int hashDelete(robj
*o
, robj
*key
) {
6311 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6312 key
= getDecodedObject(key
);
6313 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6316 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6317 /* Always check if the dictionary needs a resize after a delete. */
6318 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6323 /* Return the number of elements in a hash. */
6324 static unsigned long hashLength(robj
*o
) {
6325 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6326 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6329 /* Structure to hold hash iteration abstration. Note that iteration over
6330 * hashes involves both fields and values. Because it is possible that
6331 * not both are required, store pointers in the iterator to avoid
6332 * unnecessary memory allocation for fields/values. */
6336 unsigned char *zk
, *zv
;
6337 unsigned int zklen
, zvlen
;
6343 static hashIterator
*hashInitIterator(robj
*subject
) {
6344 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6345 hi
->encoding
= subject
->encoding
;
6346 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6347 hi
->zi
= zipmapRewind(subject
->ptr
);
6348 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6349 hi
->di
= dictGetIterator(subject
->ptr
);
6356 static void hashReleaseIterator(hashIterator
*hi
) {
6357 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6358 dictReleaseIterator(hi
->di
);
6363 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6364 * could be found and REDIS_ERR when the iterator reaches the end. */
6365 static int hashNext(hashIterator
*hi
) {
6366 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6367 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6368 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6370 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6375 /* Get key or value object at current iteration position.
6376 * This increases the refcount of the field object by 1. */
6377 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6379 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6380 if (what
& REDIS_HASH_KEY
) {
6381 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6383 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6386 if (what
& REDIS_HASH_KEY
) {
6387 o
= dictGetEntryKey(hi
->de
);
6389 o
= dictGetEntryVal(hi
->de
);
6396 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6397 robj
*o
= lookupKeyWrite(c
->db
,key
);
6399 o
= createHashObject();
6400 dictAdd(c
->db
->dict
,key
,o
);
6403 if (o
->type
!= REDIS_HASH
) {
6404 addReply(c
,shared
.wrongtypeerr
);
6411 /* ============================= Hash commands ============================== */
6412 static void hsetCommand(redisClient
*c
) {
6416 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6417 hashTryConversion(o
,c
->argv
,2,3);
6418 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6419 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6420 addReply(c
, update
? shared
.czero
: shared
.cone
);
6424 static void hsetnxCommand(redisClient
*c
) {
6426 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6427 hashTryConversion(o
,c
->argv
,2,3);
6429 if (hashExists(o
, c
->argv
[2])) {
6430 addReply(c
, shared
.czero
);
6432 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6433 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6434 addReply(c
, shared
.cone
);
6439 static void hmsetCommand(redisClient
*c
) {
6443 if ((c
->argc
% 2) == 1) {
6444 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6448 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6449 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6450 for (i
= 2; i
< c
->argc
; i
+= 2) {
6451 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6452 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6454 addReply(c
, shared
.ok
);
6458 static void hincrbyCommand(redisClient
*c
) {
6459 long long value
, incr
;
6460 robj
*o
, *current
, *new;
6462 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6463 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6464 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6465 if (current
->encoding
== REDIS_ENCODING_RAW
)
6466 value
= strtoll(current
->ptr
,NULL
,10);
6467 else if (current
->encoding
== REDIS_ENCODING_INT
)
6468 value
= (long)current
->ptr
;
6470 redisAssert(1 != 1);
6471 decrRefCount(current
);
6477 new = createStringObjectFromLongLong(value
);
6478 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6479 hashSet(o
,c
->argv
[2],new);
6481 addReplyLongLong(c
,value
);
6485 static void hgetCommand(redisClient
*c
) {
6487 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6488 checkType(c
,o
,REDIS_HASH
)) return;
6490 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6491 addReplyBulk(c
,value
);
6492 decrRefCount(value
);
6494 addReply(c
,shared
.nullbulk
);
6498 static void hmgetCommand(redisClient
*c
) {
6501 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6502 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6503 addReply(c
,shared
.wrongtypeerr
);
6506 /* Note the check for o != NULL happens inside the loop. This is
6507 * done because objects that cannot be found are considered to be
6508 * an empty hash. The reply should then be a series of NULLs. */
6509 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6510 for (i
= 2; i
< c
->argc
; i
++) {
6511 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6512 addReplyBulk(c
,value
);
6513 decrRefCount(value
);
6515 addReply(c
,shared
.nullbulk
);
6520 static void hdelCommand(redisClient
*c
) {
6522 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6523 checkType(c
,o
,REDIS_HASH
)) return;
6525 if (hashDelete(o
,c
->argv
[2])) {
6526 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6527 addReply(c
,shared
.cone
);
6530 addReply(c
,shared
.czero
);
6534 static void hlenCommand(redisClient
*c
) {
6536 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6537 checkType(c
,o
,REDIS_HASH
)) return;
6539 addReplyUlong(c
,hashLength(o
));
6542 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6543 robj
*o
, *lenobj
, *obj
;
6544 unsigned long count
= 0;
6547 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6548 || checkType(c
,o
,REDIS_HASH
)) return;
6550 lenobj
= createObject(REDIS_STRING
,NULL
);
6552 decrRefCount(lenobj
);
6554 hi
= hashInitIterator(o
);
6555 while (hashNext(hi
) != REDIS_ERR
) {
6556 if (flags
& REDIS_HASH_KEY
) {
6557 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6558 addReplyBulk(c
,obj
);
6562 if (flags
& REDIS_HASH_VALUE
) {
6563 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6564 addReplyBulk(c
,obj
);
6569 hashReleaseIterator(hi
);
6571 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6574 static void hkeysCommand(redisClient
*c
) {
6575 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6578 static void hvalsCommand(redisClient
*c
) {
6579 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6582 static void hgetallCommand(redisClient
*c
) {
6583 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6586 static void hexistsCommand(redisClient
*c
) {
6588 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6589 checkType(c
,o
,REDIS_HASH
)) return;
6591 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6594 static void convertToRealHash(robj
*o
) {
6595 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6596 unsigned int klen
, vlen
;
6597 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6599 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6600 p
= zipmapRewind(zm
);
6601 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6602 robj
*keyobj
, *valobj
;
6604 keyobj
= createStringObject((char*)key
,klen
);
6605 valobj
= createStringObject((char*)val
,vlen
);
6606 keyobj
= tryObjectEncoding(keyobj
);
6607 valobj
= tryObjectEncoding(valobj
);
6608 dictAdd(dict
,keyobj
,valobj
);
6610 o
->encoding
= REDIS_ENCODING_HT
;
6615 /* ========================= Non type-specific commands ==================== */
6617 static void flushdbCommand(redisClient
*c
) {
6618 server
.dirty
+= dictSize(c
->db
->dict
);
6619 dictEmpty(c
->db
->dict
);
6620 dictEmpty(c
->db
->expires
);
6621 addReply(c
,shared
.ok
);
6624 static void flushallCommand(redisClient
*c
) {
6625 server
.dirty
+= emptyDb();
6626 addReply(c
,shared
.ok
);
6627 if (server
.bgsavechildpid
!= -1) {
6628 kill(server
.bgsavechildpid
,SIGKILL
);
6629 rdbRemoveTempFile(server
.bgsavechildpid
);
6631 rdbSave(server
.dbfilename
);
6635 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6636 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6638 so
->pattern
= pattern
;
6642 /* Return the value associated to the key with a name obtained
6643 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6644 * The returned object will always have its refcount increased by 1
6645 * when it is non-NULL. */
6646 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6649 robj keyobj
, fieldobj
, *o
;
6650 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6651 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6655 char buf
[REDIS_SORTKEY_MAX
+1];
6656 } keyname
, fieldname
;
6658 /* If the pattern is "#" return the substitution object itself in order
6659 * to implement the "SORT ... GET #" feature. */
6660 spat
= pattern
->ptr
;
6661 if (spat
[0] == '#' && spat
[1] == '\0') {
6662 incrRefCount(subst
);
6666 /* The substitution object may be specially encoded. If so we create
6667 * a decoded object on the fly. Otherwise getDecodedObject will just
6668 * increment the ref count, that we'll decrement later. */
6669 subst
= getDecodedObject(subst
);
6672 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6673 p
= strchr(spat
,'*');
6675 decrRefCount(subst
);
6679 /* Find out if we're dealing with a hash dereference. */
6680 if ((f
= strstr(p
+1, "->")) != NULL
) {
6681 fieldlen
= sdslen(spat
)-(f
-spat
);
6682 /* this also copies \0 character */
6683 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6684 fieldname
.len
= fieldlen
-2;
6690 sublen
= sdslen(ssub
);
6691 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6692 memcpy(keyname
.buf
,spat
,prefixlen
);
6693 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6694 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6695 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6696 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6697 decrRefCount(subst
);
6699 /* Lookup substituted key */
6700 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6701 o
= lookupKeyRead(db
,&keyobj
);
6702 if (o
== NULL
) return NULL
;
6705 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6707 /* Retrieve value from hash by the field name. This operation
6708 * already increases the refcount of the returned object. */
6709 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6710 o
= hashGet(o
, &fieldobj
);
6712 if (o
->type
!= REDIS_STRING
) return NULL
;
6714 /* Every object that this function returns needs to have its refcount
6715 * increased. sortCommand decreases it again. */
6722 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6723 * the additional parameter is not standard but a BSD-specific we have to
6724 * pass sorting parameters via the global 'server' structure */
6725 static int sortCompare(const void *s1
, const void *s2
) {
6726 const redisSortObject
*so1
= s1
, *so2
= s2
;
6729 if (!server
.sort_alpha
) {
6730 /* Numeric sorting. Here it's trivial as we precomputed scores */
6731 if (so1
->u
.score
> so2
->u
.score
) {
6733 } else if (so1
->u
.score
< so2
->u
.score
) {
6739 /* Alphanumeric sorting */
6740 if (server
.sort_bypattern
) {
6741 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6742 /* At least one compare object is NULL */
6743 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6745 else if (so1
->u
.cmpobj
== NULL
)
6750 /* We have both the objects, use strcoll */
6751 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6754 /* Compare elements directly. */
6755 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6758 return server
.sort_desc
? -cmp
: cmp
;
6761 /* The SORT command is the most complex command in Redis. Warning: this code
6762 * is optimized for speed and a bit less for readability */
6763 static void sortCommand(redisClient
*c
) {
6766 int desc
= 0, alpha
= 0;
6767 int limit_start
= 0, limit_count
= -1, start
, end
;
6768 int j
, dontsort
= 0, vectorlen
;
6769 int getop
= 0; /* GET operation counter */
6770 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6771 redisSortObject
*vector
; /* Resulting vector to sort */
6773 /* Lookup the key to sort. It must be of the right types */
6774 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6775 if (sortval
== NULL
) {
6776 addReply(c
,shared
.emptymultibulk
);
6779 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6780 sortval
->type
!= REDIS_ZSET
)
6782 addReply(c
,shared
.wrongtypeerr
);
6786 /* Create a list of operations to perform for every sorted element.
6787 * Operations can be GET/DEL/INCR/DECR */
6788 operations
= listCreate();
6789 listSetFreeMethod(operations
,zfree
);
6792 /* Now we need to protect sortval incrementing its count, in the future
6793 * SORT may have options able to overwrite/delete keys during the sorting
6794 * and the sorted key itself may get destroied */
6795 incrRefCount(sortval
);
6797 /* The SORT command has an SQL-alike syntax, parse it */
6798 while(j
< c
->argc
) {
6799 int leftargs
= c
->argc
-j
-1;
6800 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6802 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6804 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6806 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6807 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6808 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6810 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6811 storekey
= c
->argv
[j
+1];
6813 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6814 sortby
= c
->argv
[j
+1];
6815 /* If the BY pattern does not contain '*', i.e. it is constant,
6816 * we don't need to sort nor to lookup the weight keys. */
6817 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6819 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6820 listAddNodeTail(operations
,createSortOperation(
6821 REDIS_SORT_GET
,c
->argv
[j
+1]));
6825 decrRefCount(sortval
);
6826 listRelease(operations
);
6827 addReply(c
,shared
.syntaxerr
);
6833 /* Load the sorting vector with all the objects to sort */
6834 switch(sortval
->type
) {
6835 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6836 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6837 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6838 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
6840 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6843 if (sortval
->type
== REDIS_LIST
) {
6844 list
*list
= sortval
->ptr
;
6848 listRewind(list
,&li
);
6849 while((ln
= listNext(&li
))) {
6850 robj
*ele
= ln
->value
;
6851 vector
[j
].obj
= ele
;
6852 vector
[j
].u
.score
= 0;
6853 vector
[j
].u
.cmpobj
= NULL
;
6861 if (sortval
->type
== REDIS_SET
) {
6864 zset
*zs
= sortval
->ptr
;
6868 di
= dictGetIterator(set
);
6869 while((setele
= dictNext(di
)) != NULL
) {
6870 vector
[j
].obj
= dictGetEntryKey(setele
);
6871 vector
[j
].u
.score
= 0;
6872 vector
[j
].u
.cmpobj
= NULL
;
6875 dictReleaseIterator(di
);
6877 redisAssert(j
== vectorlen
);
6879 /* Now it's time to load the right scores in the sorting vector */
6880 if (dontsort
== 0) {
6881 for (j
= 0; j
< vectorlen
; j
++) {
6884 /* lookup value to sort by */
6885 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6886 if (!byval
) continue;
6888 /* use object itself to sort by */
6889 byval
= vector
[j
].obj
;
6893 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6895 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6896 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6897 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
6898 /* Don't need to decode the object if it's
6899 * integer-encoded (the only encoding supported) so
6900 * far. We can just cast it */
6901 vector
[j
].u
.score
= (long)byval
->ptr
;
6903 redisAssert(1 != 1);
6907 /* when the object was retrieved using lookupKeyByPattern,
6908 * its refcount needs to be decreased. */
6910 decrRefCount(byval
);
6915 /* We are ready to sort the vector... perform a bit of sanity check
6916 * on the LIMIT option too. We'll use a partial version of quicksort. */
6917 start
= (limit_start
< 0) ? 0 : limit_start
;
6918 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6919 if (start
>= vectorlen
) {
6920 start
= vectorlen
-1;
6923 if (end
>= vectorlen
) end
= vectorlen
-1;
6925 if (dontsort
== 0) {
6926 server
.sort_desc
= desc
;
6927 server
.sort_alpha
= alpha
;
6928 server
.sort_bypattern
= sortby
? 1 : 0;
6929 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6930 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6932 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6935 /* Send command output to the output buffer, performing the specified
6936 * GET/DEL/INCR/DECR operations if any. */
6937 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6938 if (storekey
== NULL
) {
6939 /* STORE option not specified, sent the sorting result to client */
6940 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6941 for (j
= start
; j
<= end
; j
++) {
6945 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6946 listRewind(operations
,&li
);
6947 while((ln
= listNext(&li
))) {
6948 redisSortOperation
*sop
= ln
->value
;
6949 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6952 if (sop
->type
== REDIS_SORT_GET
) {
6954 addReply(c
,shared
.nullbulk
);
6956 addReplyBulk(c
,val
);
6960 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6965 robj
*listObject
= createListObject();
6966 list
*listPtr
= (list
*) listObject
->ptr
;
6968 /* STORE option specified, set the sorting result as a List object */
6969 for (j
= start
; j
<= end
; j
++) {
6974 listAddNodeTail(listPtr
,vector
[j
].obj
);
6975 incrRefCount(vector
[j
].obj
);
6977 listRewind(operations
,&li
);
6978 while((ln
= listNext(&li
))) {
6979 redisSortOperation
*sop
= ln
->value
;
6980 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6983 if (sop
->type
== REDIS_SORT_GET
) {
6985 listAddNodeTail(listPtr
,createStringObject("",0));
6987 /* We should do a incrRefCount on val because it is
6988 * added to the list, but also a decrRefCount because
6989 * it is returned by lookupKeyByPattern. This results
6990 * in doing nothing at all. */
6991 listAddNodeTail(listPtr
,val
);
6994 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6998 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6999 incrRefCount(storekey
);
7001 /* Note: we add 1 because the DB is dirty anyway since even if the
7002 * SORT result is empty a new key is set and maybe the old content
7004 server
.dirty
+= 1+outputlen
;
7005 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7009 decrRefCount(sortval
);
7010 listRelease(operations
);
7011 for (j
= 0; j
< vectorlen
; j
++) {
7012 if (alpha
&& vector
[j
].u
.cmpobj
)
7013 decrRefCount(vector
[j
].u
.cmpobj
);
7018 /* Convert an amount of bytes into a human readable string in the form
7019 * of 100B, 2G, 100M, 4K, and so forth. */
7020 static void bytesToHuman(char *s
, unsigned long long n
) {
7025 sprintf(s
,"%lluB",n
);
7027 } else if (n
< (1024*1024)) {
7028 d
= (double)n
/(1024);
7029 sprintf(s
,"%.2fK",d
);
7030 } else if (n
< (1024LL*1024*1024)) {
7031 d
= (double)n
/(1024*1024);
7032 sprintf(s
,"%.2fM",d
);
7033 } else if (n
< (1024LL*1024*1024*1024)) {
7034 d
= (double)n
/(1024LL*1024*1024);
7035 sprintf(s
,"%.2fG",d
);
7039 /* Create the string returned by the INFO command. This is decoupled
7040 * by the INFO command itself as we need to report the same information
7041 * on memory corruption problems. */
7042 static sds
genRedisInfoString(void) {
7044 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7048 bytesToHuman(hmem
,zmalloc_used_memory());
7049 info
= sdscatprintf(sdsempty(),
7050 "redis_version:%s\r\n"
7052 "multiplexing_api:%s\r\n"
7053 "process_id:%ld\r\n"
7054 "uptime_in_seconds:%ld\r\n"
7055 "uptime_in_days:%ld\r\n"
7056 "connected_clients:%d\r\n"
7057 "connected_slaves:%d\r\n"
7058 "blocked_clients:%d\r\n"
7059 "used_memory:%zu\r\n"
7060 "used_memory_human:%s\r\n"
7061 "changes_since_last_save:%lld\r\n"
7062 "bgsave_in_progress:%d\r\n"
7063 "last_save_time:%ld\r\n"
7064 "bgrewriteaof_in_progress:%d\r\n"
7065 "total_connections_received:%lld\r\n"
7066 "total_commands_processed:%lld\r\n"
7067 "expired_keys:%lld\r\n"
7068 "hash_max_zipmap_entries:%ld\r\n"
7069 "hash_max_zipmap_value:%ld\r\n"
7070 "pubsub_channels:%ld\r\n"
7071 "pubsub_patterns:%u\r\n"
7075 (sizeof(long) == 8) ? "64" : "32",
7080 listLength(server
.clients
)-listLength(server
.slaves
),
7081 listLength(server
.slaves
),
7082 server
.blpop_blocked_clients
,
7083 zmalloc_used_memory(),
7086 server
.bgsavechildpid
!= -1,
7088 server
.bgrewritechildpid
!= -1,
7089 server
.stat_numconnections
,
7090 server
.stat_numcommands
,
7091 server
.stat_expiredkeys
,
7092 server
.hash_max_zipmap_entries
,
7093 server
.hash_max_zipmap_value
,
7094 dictSize(server
.pubsub_channels
),
7095 listLength(server
.pubsub_patterns
),
7096 server
.vm_enabled
!= 0,
7097 server
.masterhost
== NULL
? "master" : "slave"
7099 if (server
.masterhost
) {
7100 info
= sdscatprintf(info
,
7101 "master_host:%s\r\n"
7102 "master_port:%d\r\n"
7103 "master_link_status:%s\r\n"
7104 "master_last_io_seconds_ago:%d\r\n"
7107 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7109 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7112 if (server
.vm_enabled
) {
7114 info
= sdscatprintf(info
,
7115 "vm_conf_max_memory:%llu\r\n"
7116 "vm_conf_page_size:%llu\r\n"
7117 "vm_conf_pages:%llu\r\n"
7118 "vm_stats_used_pages:%llu\r\n"
7119 "vm_stats_swapped_objects:%llu\r\n"
7120 "vm_stats_swappin_count:%llu\r\n"
7121 "vm_stats_swappout_count:%llu\r\n"
7122 "vm_stats_io_newjobs_len:%lu\r\n"
7123 "vm_stats_io_processing_len:%lu\r\n"
7124 "vm_stats_io_processed_len:%lu\r\n"
7125 "vm_stats_io_active_threads:%lu\r\n"
7126 "vm_stats_blocked_clients:%lu\r\n"
7127 ,(unsigned long long) server
.vm_max_memory
,
7128 (unsigned long long) server
.vm_page_size
,
7129 (unsigned long long) server
.vm_pages
,
7130 (unsigned long long) server
.vm_stats_used_pages
,
7131 (unsigned long long) server
.vm_stats_swapped_objects
,
7132 (unsigned long long) server
.vm_stats_swapins
,
7133 (unsigned long long) server
.vm_stats_swapouts
,
7134 (unsigned long) listLength(server
.io_newjobs
),
7135 (unsigned long) listLength(server
.io_processing
),
7136 (unsigned long) listLength(server
.io_processed
),
7137 (unsigned long) server
.io_active_threads
,
7138 (unsigned long) server
.vm_blocked_clients
7142 for (j
= 0; j
< server
.dbnum
; j
++) {
7143 long long keys
, vkeys
;
7145 keys
= dictSize(server
.db
[j
].dict
);
7146 vkeys
= dictSize(server
.db
[j
].expires
);
7147 if (keys
|| vkeys
) {
7148 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7155 static void infoCommand(redisClient
*c
) {
7156 sds info
= genRedisInfoString();
7157 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7158 (unsigned long)sdslen(info
)));
7159 addReplySds(c
,info
);
7160 addReply(c
,shared
.crlf
);
7163 static void monitorCommand(redisClient
*c
) {
7164 /* ignore MONITOR if aleady slave or in monitor mode */
7165 if (c
->flags
& REDIS_SLAVE
) return;
7167 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7169 listAddNodeTail(server
.monitors
,c
);
7170 addReply(c
,shared
.ok
);
7173 /* ================================= Expire ================================= */
7174 static int removeExpire(redisDb
*db
, robj
*key
) {
7175 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7182 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7183 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7191 /* Return the expire time of the specified key, or -1 if no expire
7192 * is associated with this key (i.e. the key is non volatile) */
7193 static time_t getExpire(redisDb
*db
, robj
*key
) {
7196 /* No expire? return ASAP */
7197 if (dictSize(db
->expires
) == 0 ||
7198 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7200 return (time_t) dictGetEntryVal(de
);
7203 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7207 /* No expire? return ASAP */
7208 if (dictSize(db
->expires
) == 0 ||
7209 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7211 /* Lookup the expire */
7212 when
= (time_t) dictGetEntryVal(de
);
7213 if (time(NULL
) <= when
) return 0;
7215 /* Delete the key */
7216 dictDelete(db
->expires
,key
);
7217 server
.stat_expiredkeys
++;
7218 return dictDelete(db
->dict
,key
) == DICT_OK
;
7221 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7224 /* No expire? return ASAP */
7225 if (dictSize(db
->expires
) == 0 ||
7226 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7228 /* Delete the key */
7230 server
.stat_expiredkeys
++;
7231 dictDelete(db
->expires
,key
);
7232 return dictDelete(db
->dict
,key
) == DICT_OK
;
7235 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7239 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7243 de
= dictFind(c
->db
->dict
,key
);
7245 addReply(c
,shared
.czero
);
7249 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7250 addReply(c
, shared
.cone
);
7253 time_t when
= time(NULL
)+seconds
;
7254 if (setExpire(c
->db
,key
,when
)) {
7255 addReply(c
,shared
.cone
);
7258 addReply(c
,shared
.czero
);
7264 static void expireCommand(redisClient
*c
) {
7265 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7268 static void expireatCommand(redisClient
*c
) {
7269 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7272 static void ttlCommand(redisClient
*c
) {
7276 expire
= getExpire(c
->db
,c
->argv
[1]);
7278 ttl
= (int) (expire
-time(NULL
));
7279 if (ttl
< 0) ttl
= -1;
7281 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7284 /* ================================ MULTI/EXEC ============================== */
7286 /* Client state initialization for MULTI/EXEC */
7287 static void initClientMultiState(redisClient
*c
) {
7288 c
->mstate
.commands
= NULL
;
7289 c
->mstate
.count
= 0;
7292 /* Release all the resources associated with MULTI/EXEC state */
7293 static void freeClientMultiState(redisClient
*c
) {
7296 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7298 multiCmd
*mc
= c
->mstate
.commands
+j
;
7300 for (i
= 0; i
< mc
->argc
; i
++)
7301 decrRefCount(mc
->argv
[i
]);
7304 zfree(c
->mstate
.commands
);
7307 /* Add a new command into the MULTI commands queue */
7308 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7312 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7313 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7314 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7317 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7318 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7319 for (j
= 0; j
< c
->argc
; j
++)
7320 incrRefCount(mc
->argv
[j
]);
7324 static void multiCommand(redisClient
*c
) {
7325 c
->flags
|= REDIS_MULTI
;
7326 addReply(c
,shared
.ok
);
7329 static void discardCommand(redisClient
*c
) {
7330 if (!(c
->flags
& REDIS_MULTI
)) {
7331 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7335 freeClientMultiState(c
);
7336 initClientMultiState(c
);
7337 c
->flags
&= (~REDIS_MULTI
);
7338 addReply(c
,shared
.ok
);
7341 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7342 * implememntation for more information. */
7343 static void execCommandReplicateMulti(redisClient
*c
) {
7344 struct redisCommand
*cmd
;
7345 robj
*multistring
= createStringObject("MULTI",5);
7347 cmd
= lookupCommand("multi");
7348 if (server
.appendonly
)
7349 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7350 if (listLength(server
.slaves
))
7351 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7352 decrRefCount(multistring
);
7355 static void execCommand(redisClient
*c
) {
7360 if (!(c
->flags
& REDIS_MULTI
)) {
7361 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7365 /* Replicate a MULTI request now that we are sure the block is executed.
7366 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7367 * both the AOF and the replication link will have the same consistency
7368 * and atomicity guarantees. */
7369 execCommandReplicateMulti(c
);
7371 /* Exec all the queued commands */
7372 orig_argv
= c
->argv
;
7373 orig_argc
= c
->argc
;
7374 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7375 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7376 c
->argc
= c
->mstate
.commands
[j
].argc
;
7377 c
->argv
= c
->mstate
.commands
[j
].argv
;
7378 call(c
,c
->mstate
.commands
[j
].cmd
);
7380 c
->argv
= orig_argv
;
7381 c
->argc
= orig_argc
;
7382 freeClientMultiState(c
);
7383 initClientMultiState(c
);
7384 c
->flags
&= (~REDIS_MULTI
);
7385 /* Make sure the EXEC command is always replicated / AOF, since we
7386 * always send the MULTI command (we can't know beforehand if the
7387 * next operations will contain at least a modification to the DB). */
7391 /* =========================== Blocking Operations ========================= */
7393 /* Currently Redis blocking operations support is limited to list POP ops,
7394 * so the current implementation is not fully generic, but it is also not
7395 * completely specific so it will not require a rewrite to support new
7396 * kind of blocking operations in the future.
7398 * Still it's important to note that list blocking operations can be already
7399 * used as a notification mechanism in order to implement other blocking
7400 * operations at application level, so there must be a very strong evidence
7401 * of usefulness and generality before new blocking operations are implemented.
7403 * This is how the current blocking POP works, we use BLPOP as example:
7404 * - If the user calls BLPOP and the key exists and contains a non empty list
7405 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7406 * if there is not to block.
7407 * - If instead BLPOP is called and the key does not exists or the list is
7408 * empty we need to block. In order to do so we remove the notification for
7409 * new data to read in the client socket (so that we'll not serve new
7410 * requests if the blocking request is not served). Also we put the client
7411 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7412 * blocking for this keys.
7413 * - If a PUSH operation against a key with blocked clients waiting is
7414 * performed, we serve the first in the list: basically instead to push
7415 * the new element inside the list we return it to the (first / oldest)
7416 * blocking client, unblock the client, and remove it form the list.
7418 * The above comment and the source code should be enough in order to understand
7419 * the implementation and modify / fix it later.
7422 /* Set a client in blocking mode for the specified key, with the specified
7424 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7429 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7430 c
->blockingkeysnum
= numkeys
;
7431 c
->blockingto
= timeout
;
7432 for (j
= 0; j
< numkeys
; j
++) {
7433 /* Add the key in the client structure, to map clients -> keys */
7434 c
->blockingkeys
[j
] = keys
[j
];
7435 incrRefCount(keys
[j
]);
7437 /* And in the other "side", to map keys -> clients */
7438 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7442 /* For every key we take a list of clients blocked for it */
7444 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7445 incrRefCount(keys
[j
]);
7446 assert(retval
== DICT_OK
);
7448 l
= dictGetEntryVal(de
);
7450 listAddNodeTail(l
,c
);
7452 /* Mark the client as a blocked client */
7453 c
->flags
|= REDIS_BLOCKED
;
7454 server
.blpop_blocked_clients
++;
7457 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7458 static void unblockClientWaitingData(redisClient
*c
) {
7463 assert(c
->blockingkeys
!= NULL
);
7464 /* The client may wait for multiple keys, so unblock it for every key. */
7465 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7466 /* Remove this client from the list of clients waiting for this key. */
7467 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7469 l
= dictGetEntryVal(de
);
7470 listDelNode(l
,listSearchKey(l
,c
));
7471 /* If the list is empty we need to remove it to avoid wasting memory */
7472 if (listLength(l
) == 0)
7473 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7474 decrRefCount(c
->blockingkeys
[j
]);
7476 /* Cleanup the client structure */
7477 zfree(c
->blockingkeys
);
7478 c
->blockingkeys
= NULL
;
7479 c
->flags
&= (~REDIS_BLOCKED
);
7480 server
.blpop_blocked_clients
--;
7481 /* We want to process data if there is some command waiting
7482 * in the input buffer. Note that this is safe even if
7483 * unblockClientWaitingData() gets called from freeClient() because
7484 * freeClient() will be smart enough to call this function
7485 * *after* c->querybuf was set to NULL. */
7486 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7489 /* This should be called from any function PUSHing into lists.
7490 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7491 * 'ele' is the element pushed.
7493 * If the function returns 0 there was no client waiting for a list push
7496 * If the function returns 1 there was a client waiting for a list push
7497 * against this key, the element was passed to this client thus it's not
7498 * needed to actually add it to the list and the caller should return asap. */
7499 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7500 struct dictEntry
*de
;
7501 redisClient
*receiver
;
7505 de
= dictFind(c
->db
->blockingkeys
,key
);
7506 if (de
== NULL
) return 0;
7507 l
= dictGetEntryVal(de
);
7510 receiver
= ln
->value
;
7512 addReplySds(receiver
,sdsnew("*2\r\n"));
7513 addReplyBulk(receiver
,key
);
7514 addReplyBulk(receiver
,ele
);
7515 unblockClientWaitingData(receiver
);
7519 /* Blocking RPOP/LPOP */
7520 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7525 for (j
= 1; j
< c
->argc
-1; j
++) {
7526 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7528 if (o
->type
!= REDIS_LIST
) {
7529 addReply(c
,shared
.wrongtypeerr
);
7532 list
*list
= o
->ptr
;
7533 if (listLength(list
) != 0) {
7534 /* If the list contains elements fall back to the usual
7535 * non-blocking POP operation */
7536 robj
*argv
[2], **orig_argv
;
7539 /* We need to alter the command arguments before to call
7540 * popGenericCommand() as the command takes a single key. */
7541 orig_argv
= c
->argv
;
7542 orig_argc
= c
->argc
;
7543 argv
[1] = c
->argv
[j
];
7547 /* Also the return value is different, we need to output
7548 * the multi bulk reply header and the key name. The
7549 * "real" command will add the last element (the value)
7550 * for us. If this souds like an hack to you it's just
7551 * because it is... */
7552 addReplySds(c
,sdsnew("*2\r\n"));
7553 addReplyBulk(c
,argv
[1]);
7554 popGenericCommand(c
,where
);
7556 /* Fix the client structure with the original stuff */
7557 c
->argv
= orig_argv
;
7558 c
->argc
= orig_argc
;
7564 /* If the list is empty or the key does not exists we must block */
7565 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7566 if (timeout
> 0) timeout
+= time(NULL
);
7567 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7570 static void blpopCommand(redisClient
*c
) {
7571 blockingPopGenericCommand(c
,REDIS_HEAD
);
7574 static void brpopCommand(redisClient
*c
) {
7575 blockingPopGenericCommand(c
,REDIS_TAIL
);
7578 /* =============================== Replication ============================= */
7580 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7581 ssize_t nwritten
, ret
= size
;
7582 time_t start
= time(NULL
);
7586 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7587 nwritten
= write(fd
,ptr
,size
);
7588 if (nwritten
== -1) return -1;
7592 if ((time(NULL
)-start
) > timeout
) {
7600 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7601 ssize_t nread
, totread
= 0;
7602 time_t start
= time(NULL
);
7606 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7607 nread
= read(fd
,ptr
,size
);
7608 if (nread
== -1) return -1;
7613 if ((time(NULL
)-start
) > timeout
) {
7621 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7628 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7631 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7642 static void syncCommand(redisClient
*c
) {
7643 /* ignore SYNC if aleady slave or in monitor mode */
7644 if (c
->flags
& REDIS_SLAVE
) return;
7646 /* SYNC can't be issued when the server has pending data to send to
7647 * the client about already issued commands. We need a fresh reply
7648 * buffer registering the differences between the BGSAVE and the current
7649 * dataset, so that we can copy to other slaves if needed. */
7650 if (listLength(c
->reply
) != 0) {
7651 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7655 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7656 /* Here we need to check if there is a background saving operation
7657 * in progress, or if it is required to start one */
7658 if (server
.bgsavechildpid
!= -1) {
7659 /* Ok a background save is in progress. Let's check if it is a good
7660 * one for replication, i.e. if there is another slave that is
7661 * registering differences since the server forked to save */
7666 listRewind(server
.slaves
,&li
);
7667 while((ln
= listNext(&li
))) {
7669 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7672 /* Perfect, the server is already registering differences for
7673 * another slave. Set the right state, and copy the buffer. */
7674 listRelease(c
->reply
);
7675 c
->reply
= listDup(slave
->reply
);
7676 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7677 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7679 /* No way, we need to wait for the next BGSAVE in order to
7680 * register differences */
7681 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7682 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7685 /* Ok we don't have a BGSAVE in progress, let's start one */
7686 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7687 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7688 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7689 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7692 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7695 c
->flags
|= REDIS_SLAVE
;
7697 listAddNodeTail(server
.slaves
,c
);
7701 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7702 redisClient
*slave
= privdata
;
7704 REDIS_NOTUSED(mask
);
7705 char buf
[REDIS_IOBUF_LEN
];
7706 ssize_t nwritten
, buflen
;
7708 if (slave
->repldboff
== 0) {
7709 /* Write the bulk write count before to transfer the DB. In theory here
7710 * we don't know how much room there is in the output buffer of the
7711 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7712 * operations) will never be smaller than the few bytes we need. */
7715 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7717 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7725 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7726 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7728 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7729 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7733 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7734 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7739 slave
->repldboff
+= nwritten
;
7740 if (slave
->repldboff
== slave
->repldbsize
) {
7741 close(slave
->repldbfd
);
7742 slave
->repldbfd
= -1;
7743 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7744 slave
->replstate
= REDIS_REPL_ONLINE
;
7745 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7746 sendReplyToClient
, slave
) == AE_ERR
) {
7750 addReplySds(slave
,sdsempty());
7751 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7755 /* This function is called at the end of every backgrond saving.
7756 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7757 * otherwise REDIS_ERR is passed to the function.
7759 * The goal of this function is to handle slaves waiting for a successful
7760 * background saving in order to perform non-blocking synchronization. */
7761 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7763 int startbgsave
= 0;
7766 listRewind(server
.slaves
,&li
);
7767 while((ln
= listNext(&li
))) {
7768 redisClient
*slave
= ln
->value
;
7770 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7772 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7773 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7774 struct redis_stat buf
;
7776 if (bgsaveerr
!= REDIS_OK
) {
7778 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7781 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7782 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7784 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7787 slave
->repldboff
= 0;
7788 slave
->repldbsize
= buf
.st_size
;
7789 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7790 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7791 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7798 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7801 listRewind(server
.slaves
,&li
);
7802 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7803 while((ln
= listNext(&li
))) {
7804 redisClient
*slave
= ln
->value
;
7806 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7813 static int syncWithMaster(void) {
7814 char buf
[1024], tmpfile
[256], authcmd
[1024];
7816 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7817 int dfd
, maxtries
= 5;
7820 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7825 /* AUTH with the master if required. */
7826 if(server
.masterauth
) {
7827 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7828 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7830 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7834 /* Read the AUTH result. */
7835 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7837 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7841 if (buf
[0] != '+') {
7843 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7848 /* Issue the SYNC command */
7849 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7851 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7855 /* Read the bulk write count */
7856 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7858 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7862 if (buf
[0] != '$') {
7864 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7867 dumpsize
= strtol(buf
+1,NULL
,10);
7868 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7869 /* Read the bulk write data on a temp file */
7871 snprintf(tmpfile
,256,
7872 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7873 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7874 if (dfd
!= -1) break;
7879 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7883 int nread
, nwritten
;
7885 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7887 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7893 nwritten
= write(dfd
,buf
,nread
);
7894 if (nwritten
== -1) {
7895 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7903 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7904 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7910 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7911 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7915 server
.master
= createClient(fd
);
7916 server
.master
->flags
|= REDIS_MASTER
;
7917 server
.master
->authenticated
= 1;
7918 server
.replstate
= REDIS_REPL_CONNECTED
;
7922 static void slaveofCommand(redisClient
*c
) {
7923 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7924 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7925 if (server
.masterhost
) {
7926 sdsfree(server
.masterhost
);
7927 server
.masterhost
= NULL
;
7928 if (server
.master
) freeClient(server
.master
);
7929 server
.replstate
= REDIS_REPL_NONE
;
7930 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7933 sdsfree(server
.masterhost
);
7934 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7935 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7936 if (server
.master
) freeClient(server
.master
);
7937 server
.replstate
= REDIS_REPL_CONNECT
;
7938 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7939 server
.masterhost
, server
.masterport
);
7941 addReply(c
,shared
.ok
);
7944 /* ============================ Maxmemory directive ======================== */
7946 /* Try to free one object form the pre-allocated objects free list.
7947 * This is useful under low mem conditions as by default we take 1 million
7948 * free objects allocated. On success REDIS_OK is returned, otherwise
7950 static int tryFreeOneObjectFromFreelist(void) {
7953 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7954 if (listLength(server
.objfreelist
)) {
7955 listNode
*head
= listFirst(server
.objfreelist
);
7956 o
= listNodeValue(head
);
7957 listDelNode(server
.objfreelist
,head
);
7958 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7962 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7967 /* This function gets called when 'maxmemory' is set on the config file to limit
7968 * the max memory used by the server, and we are out of memory.
7969 * This function will try to, in order:
7971 * - Free objects from the free list
7972 * - Try to remove keys with an EXPIRE set
7974 * It is not possible to free enough memory to reach used-memory < maxmemory
7975 * the server will start refusing commands that will enlarge even more the
7978 static void freeMemoryIfNeeded(void) {
7979 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7980 int j
, k
, freed
= 0;
7982 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7983 for (j
= 0; j
< server
.dbnum
; j
++) {
7985 robj
*minkey
= NULL
;
7986 struct dictEntry
*de
;
7988 if (dictSize(server
.db
[j
].expires
)) {
7990 /* From a sample of three keys drop the one nearest to
7991 * the natural expire */
7992 for (k
= 0; k
< 3; k
++) {
7995 de
= dictGetRandomKey(server
.db
[j
].expires
);
7996 t
= (time_t) dictGetEntryVal(de
);
7997 if (minttl
== -1 || t
< minttl
) {
7998 minkey
= dictGetEntryKey(de
);
8002 deleteKey(server
.db
+j
,minkey
);
8005 if (!freed
) return; /* nothing to free... */
8009 /* ============================== Append Only file ========================== */
8011 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8012 sds buf
= sdsempty();
8018 /* The DB this command was targetting is not the same as the last command
8019 * we appendend. To issue a SELECT command is needed. */
8020 if (dictid
!= server
.appendseldb
) {
8023 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8024 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8025 (unsigned long)strlen(seldb
),seldb
);
8026 server
.appendseldb
= dictid
;
8029 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
8030 * EXPIREs into EXPIREATs calls */
8031 if (cmd
->proc
== expireCommand
) {
8034 tmpargv
[0] = createStringObject("EXPIREAT",8);
8035 tmpargv
[1] = argv
[1];
8036 incrRefCount(argv
[1]);
8037 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
8038 tmpargv
[2] = createObject(REDIS_STRING
,
8039 sdscatprintf(sdsempty(),"%ld",when
));
8043 /* Append the actual command */
8044 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8045 for (j
= 0; j
< argc
; j
++) {
8048 o
= getDecodedObject(o
);
8049 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8050 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8051 buf
= sdscatlen(buf
,"\r\n",2);
8055 /* Free the objects from the modified argv for EXPIREAT */
8056 if (cmd
->proc
== expireCommand
) {
8057 for (j
= 0; j
< 3; j
++)
8058 decrRefCount(argv
[j
]);
8061 /* We want to perform a single write. This should be guaranteed atomic
8062 * at least if the filesystem we are writing is a real physical one.
8063 * While this will save us against the server being killed I don't think
8064 * there is much to do about the whole server stopping for power problems
8066 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
8067 if (nwritten
!= (signed)sdslen(buf
)) {
8068 /* Ooops, we are in troubles. The best thing to do for now is
8069 * to simply exit instead to give the illusion that everything is
8070 * working as expected. */
8071 if (nwritten
== -1) {
8072 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8074 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8078 /* If a background append only file rewriting is in progress we want to
8079 * accumulate the differences between the child DB and the current one
8080 * in a buffer, so that when the child process will do its work we
8081 * can append the differences to the new append only file. */
8082 if (server
.bgrewritechildpid
!= -1)
8083 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8087 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8088 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8089 now
-server
.lastfsync
> 1))
8091 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8092 * flushing metadata. */
8093 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8094 server
.lastfsync
= now
;
8098 /* In Redis commands are always executed in the context of a client, so in
8099 * order to load the append only file we need to create a fake client. */
8100 static struct redisClient
*createFakeClient(void) {
8101 struct redisClient
*c
= zmalloc(sizeof(*c
));
8105 c
->querybuf
= sdsempty();
8109 /* We set the fake client as a slave waiting for the synchronization
8110 * so that Redis will not try to send replies to this client. */
8111 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8112 c
->reply
= listCreate();
8113 listSetFreeMethod(c
->reply
,decrRefCount
);
8114 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8118 static void freeFakeClient(struct redisClient
*c
) {
8119 sdsfree(c
->querybuf
);
8120 listRelease(c
->reply
);
8124 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8125 * error (the append only file is zero-length) REDIS_ERR is returned. On
8126 * fatal error an error message is logged and the program exists. */
8127 int loadAppendOnlyFile(char *filename
) {
8128 struct redisClient
*fakeClient
;
8129 FILE *fp
= fopen(filename
,"r");
8130 struct redis_stat sb
;
8131 unsigned long long loadedkeys
= 0;
8133 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8137 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8141 fakeClient
= createFakeClient();
8148 struct redisCommand
*cmd
;
8150 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8156 if (buf
[0] != '*') goto fmterr
;
8158 argv
= zmalloc(sizeof(robj
*)*argc
);
8159 for (j
= 0; j
< argc
; j
++) {
8160 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8161 if (buf
[0] != '$') goto fmterr
;
8162 len
= strtol(buf
+1,NULL
,10);
8163 argsds
= sdsnewlen(NULL
,len
);
8164 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8165 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8166 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8169 /* Command lookup */
8170 cmd
= lookupCommand(argv
[0]->ptr
);
8172 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8175 /* Try object encoding */
8176 if (cmd
->flags
& REDIS_CMD_BULK
)
8177 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8178 /* Run the command in the context of a fake client */
8179 fakeClient
->argc
= argc
;
8180 fakeClient
->argv
= argv
;
8181 cmd
->proc(fakeClient
);
8182 /* Discard the reply objects list from the fake client */
8183 while(listLength(fakeClient
->reply
))
8184 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8185 /* Clean up, ready for the next command */
8186 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8188 /* Handle swapping while loading big datasets when VM is on */
8190 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8191 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8192 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8197 freeFakeClient(fakeClient
);
8202 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8204 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8208 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8212 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8213 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8217 /* Avoid the incr/decr ref count business if possible to help
8218 * copy-on-write (we are often in a child process when this function
8220 * Also makes sure that key objects don't get incrRefCount-ed when VM
8222 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8223 obj
= getDecodedObject(obj
);
8226 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8227 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8228 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8230 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8231 if (decrrc
) decrRefCount(obj
);
8234 if (decrrc
) decrRefCount(obj
);
8238 /* Write binary-safe string into a file in the bulkformat
8239 * $<count>\r\n<payload>\r\n */
8240 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8243 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8244 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8245 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8246 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8250 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8251 static int fwriteBulkDouble(FILE *fp
, double d
) {
8252 char buf
[128], dbuf
[128];
8254 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8255 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8256 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8257 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8261 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8262 static int fwriteBulkLong(FILE *fp
, long l
) {
8263 char buf
[128], lbuf
[128];
8265 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8266 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8267 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8268 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8272 /* Write a sequence of commands able to fully rebuild the dataset into
8273 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8274 static int rewriteAppendOnlyFile(char *filename
) {
8275 dictIterator
*di
= NULL
;
8280 time_t now
= time(NULL
);
8282 /* Note that we have to use a different temp name here compared to the
8283 * one used by rewriteAppendOnlyFileBackground() function. */
8284 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8285 fp
= fopen(tmpfile
,"w");
8287 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8290 for (j
= 0; j
< server
.dbnum
; j
++) {
8291 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8292 redisDb
*db
= server
.db
+j
;
8294 if (dictSize(d
) == 0) continue;
8295 di
= dictGetIterator(d
);
8301 /* SELECT the new DB */
8302 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8303 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8305 /* Iterate this DB writing every entry */
8306 while((de
= dictNext(di
)) != NULL
) {
8311 key
= dictGetEntryKey(de
);
8312 /* If the value for this key is swapped, load a preview in memory.
8313 * We use a "swapped" flag to remember if we need to free the
8314 * value object instead to just increment the ref count anyway
8315 * in order to avoid copy-on-write of pages if we are forked() */
8316 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8317 key
->storage
== REDIS_VM_SWAPPING
) {
8318 o
= dictGetEntryVal(de
);
8321 o
= vmPreviewObject(key
);
8324 expiretime
= getExpire(db
,key
);
8326 /* Save the key and associated value */
8327 if (o
->type
== REDIS_STRING
) {
8328 /* Emit a SET command */
8329 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8330 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8332 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8333 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8334 } else if (o
->type
== REDIS_LIST
) {
8335 /* Emit the RPUSHes needed to rebuild the list */
8336 list
*list
= o
->ptr
;
8340 listRewind(list
,&li
);
8341 while((ln
= listNext(&li
))) {
8342 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8343 robj
*eleobj
= listNodeValue(ln
);
8345 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8346 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8347 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8349 } else if (o
->type
== REDIS_SET
) {
8350 /* Emit the SADDs needed to rebuild the set */
8352 dictIterator
*di
= dictGetIterator(set
);
8355 while((de
= dictNext(di
)) != NULL
) {
8356 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8357 robj
*eleobj
= dictGetEntryKey(de
);
8359 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8360 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8361 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8363 dictReleaseIterator(di
);
8364 } else if (o
->type
== REDIS_ZSET
) {
8365 /* Emit the ZADDs needed to rebuild the sorted set */
8367 dictIterator
*di
= dictGetIterator(zs
->dict
);
8370 while((de
= dictNext(di
)) != NULL
) {
8371 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8372 robj
*eleobj
= dictGetEntryKey(de
);
8373 double *score
= dictGetEntryVal(de
);
8375 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8376 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8377 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8378 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8380 dictReleaseIterator(di
);
8381 } else if (o
->type
== REDIS_HASH
) {
8382 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8384 /* Emit the HSETs needed to rebuild the hash */
8385 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8386 unsigned char *p
= zipmapRewind(o
->ptr
);
8387 unsigned char *field
, *val
;
8388 unsigned int flen
, vlen
;
8390 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8391 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8392 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8393 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8395 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8399 dictIterator
*di
= dictGetIterator(o
->ptr
);
8402 while((de
= dictNext(di
)) != NULL
) {
8403 robj
*field
= dictGetEntryKey(de
);
8404 robj
*val
= dictGetEntryVal(de
);
8406 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8407 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8408 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8409 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8411 dictReleaseIterator(di
);
8414 redisPanic("Unknown object type");
8416 /* Save the expire time */
8417 if (expiretime
!= -1) {
8418 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8419 /* If this key is already expired skip it */
8420 if (expiretime
< now
) continue;
8421 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8422 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8423 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8425 if (swapped
) decrRefCount(o
);
8427 dictReleaseIterator(di
);
8430 /* Make sure data will not remain on the OS's output buffers */
8435 /* Use RENAME to make sure the DB file is changed atomically only
8436 * if the generate DB file is ok. */
8437 if (rename(tmpfile
,filename
) == -1) {
8438 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8442 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8448 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8449 if (di
) dictReleaseIterator(di
);
8453 /* This is how rewriting of the append only file in background works:
8455 * 1) The user calls BGREWRITEAOF
8456 * 2) Redis calls this function, that forks():
8457 * 2a) the child rewrite the append only file in a temp file.
8458 * 2b) the parent accumulates differences in server.bgrewritebuf.
8459 * 3) When the child finished '2a' exists.
8460 * 4) The parent will trap the exit code, if it's OK, will append the
8461 * data accumulated into server.bgrewritebuf into the temp file, and
8462 * finally will rename(2) the temp file in the actual file name.
8463 * The the new file is reopened as the new append only file. Profit!
8465 static int rewriteAppendOnlyFileBackground(void) {
8468 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8469 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8470 if ((childpid
= fork()) == 0) {
8474 if (server
.vm_enabled
) vmReopenSwapFile();
8476 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8477 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8484 if (childpid
== -1) {
8485 redisLog(REDIS_WARNING
,
8486 "Can't rewrite append only file in background: fork: %s",
8490 redisLog(REDIS_NOTICE
,
8491 "Background append only file rewriting started by pid %d",childpid
);
8492 server
.bgrewritechildpid
= childpid
;
8493 updateDictResizePolicy();
8494 /* We set appendseldb to -1 in order to force the next call to the
8495 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8496 * accumulated by the parent into server.bgrewritebuf will start
8497 * with a SELECT statement and it will be safe to merge. */
8498 server
.appendseldb
= -1;
8501 return REDIS_OK
; /* unreached */
8504 static void bgrewriteaofCommand(redisClient
*c
) {
8505 if (server
.bgrewritechildpid
!= -1) {
8506 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8509 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8510 char *status
= "+Background append only file rewriting started\r\n";
8511 addReplySds(c
,sdsnew(status
));
8513 addReply(c
,shared
.err
);
8517 static void aofRemoveTempFile(pid_t childpid
) {
8520 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8524 /* Virtual Memory is composed mainly of two subsystems:
8525 * - Blocking Virutal Memory
8526 * - Threaded Virtual Memory I/O
8527 * The two parts are not fully decoupled, but functions are split among two
8528 * different sections of the source code (delimited by comments) in order to
8529 * make more clear what functionality is about the blocking VM and what about
8530 * the threaded (not blocking) VM.
8534 * Redis VM is a blocking VM (one that blocks reading swapped values from
8535 * disk into memory when a value swapped out is needed in memory) that is made
8536 * unblocking by trying to examine the command argument vector in order to
8537 * load in background values that will likely be needed in order to exec
8538 * the command. The command is executed only once all the relevant keys
8539 * are loaded into memory.
8541 * This basically is almost as simple of a blocking VM, but almost as parallel
8542 * as a fully non-blocking VM.
8545 /* =================== Virtual Memory - Blocking Side ====================== */
8547 /* substitute the first occurrence of '%p' with the process pid in the
8548 * swap file name. */
8549 static void expandVmSwapFilename(void) {
8550 char *p
= strstr(server
.vm_swap_file
,"%p");
8556 new = sdscat(new,server
.vm_swap_file
);
8557 new = sdscatprintf(new,"%ld",(long) getpid());
8558 new = sdscat(new,p
+2);
8559 zfree(server
.vm_swap_file
);
8560 server
.vm_swap_file
= new;
8563 static void vmInit(void) {
8568 if (server
.vm_max_threads
!= 0)
8569 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8571 expandVmSwapFilename();
8572 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8573 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8574 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8576 if (server
.vm_fp
== NULL
) {
8577 redisLog(REDIS_WARNING
,
8578 "Impossible to open the swap file: %s. Exiting.",
8582 server
.vm_fd
= fileno(server
.vm_fp
);
8583 server
.vm_next_page
= 0;
8584 server
.vm_near_pages
= 0;
8585 server
.vm_stats_used_pages
= 0;
8586 server
.vm_stats_swapped_objects
= 0;
8587 server
.vm_stats_swapouts
= 0;
8588 server
.vm_stats_swapins
= 0;
8589 totsize
= server
.vm_pages
*server
.vm_page_size
;
8590 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8591 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8592 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8596 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8598 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8599 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8600 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8601 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8603 /* Initialize threaded I/O (used by Virtual Memory) */
8604 server
.io_newjobs
= listCreate();
8605 server
.io_processing
= listCreate();
8606 server
.io_processed
= listCreate();
8607 server
.io_ready_clients
= listCreate();
8608 pthread_mutex_init(&server
.io_mutex
,NULL
);
8609 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8610 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8611 server
.io_active_threads
= 0;
8612 if (pipe(pipefds
) == -1) {
8613 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8617 server
.io_ready_pipe_read
= pipefds
[0];
8618 server
.io_ready_pipe_write
= pipefds
[1];
8619 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8620 /* LZF requires a lot of stack */
8621 pthread_attr_init(&server
.io_threads_attr
);
8622 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8623 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8624 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8625 /* Listen for events in the threaded I/O pipe */
8626 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8627 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8628 oom("creating file event");
8631 /* Mark the page as used */
8632 static void vmMarkPageUsed(off_t page
) {
8633 off_t byte
= page
/8;
8635 redisAssert(vmFreePage(page
) == 1);
8636 server
.vm_bitmap
[byte
] |= 1<<bit
;
8639 /* Mark N contiguous pages as used, with 'page' being the first. */
8640 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8643 for (j
= 0; j
< count
; j
++)
8644 vmMarkPageUsed(page
+j
);
8645 server
.vm_stats_used_pages
+= count
;
8646 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8647 (long long)count
, (long long)page
);
8650 /* Mark the page as free */
8651 static void vmMarkPageFree(off_t page
) {
8652 off_t byte
= page
/8;
8654 redisAssert(vmFreePage(page
) == 0);
8655 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8658 /* Mark N contiguous pages as free, with 'page' being the first. */
8659 static void vmMarkPagesFree(off_t page
, off_t count
) {
8662 for (j
= 0; j
< count
; j
++)
8663 vmMarkPageFree(page
+j
);
8664 server
.vm_stats_used_pages
-= count
;
8665 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8666 (long long)count
, (long long)page
);
8669 /* Test if the page is free */
8670 static int vmFreePage(off_t page
) {
8671 off_t byte
= page
/8;
8673 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8676 /* Find N contiguous free pages storing the first page of the cluster in *first.
8677 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8678 * REDIS_ERR is returned.
8680 * This function uses a simple algorithm: we try to allocate
8681 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8682 * again from the start of the swap file searching for free spaces.
8684 * If it looks pretty clear that there are no free pages near our offset
8685 * we try to find less populated places doing a forward jump of
8686 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8687 * without hurry, and then we jump again and so forth...
8689 * This function can be improved using a free list to avoid to guess
8690 * too much, since we could collect data about freed pages.
8692 * note: I implemented this function just after watching an episode of
8693 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8695 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8696 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8698 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8699 server
.vm_near_pages
= 0;
8700 server
.vm_next_page
= 0;
8702 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8703 base
= server
.vm_next_page
;
8705 while(offset
< server
.vm_pages
) {
8706 off_t
this = base
+offset
;
8708 /* If we overflow, restart from page zero */
8709 if (this >= server
.vm_pages
) {
8710 this -= server
.vm_pages
;
8712 /* Just overflowed, what we found on tail is no longer
8713 * interesting, as it's no longer contiguous. */
8717 if (vmFreePage(this)) {
8718 /* This is a free page */
8720 /* Already got N free pages? Return to the caller, with success */
8722 *first
= this-(n
-1);
8723 server
.vm_next_page
= this+1;
8724 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8728 /* The current one is not a free page */
8732 /* Fast-forward if the current page is not free and we already
8733 * searched enough near this place. */
8735 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8736 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8738 /* Note that even if we rewind after the jump, we are don't need
8739 * to make sure numfree is set to zero as we only jump *if* it
8740 * is set to zero. */
8742 /* Otherwise just check the next page */
8749 /* Write the specified object at the specified page of the swap file */
8750 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8751 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8752 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8753 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8754 redisLog(REDIS_WARNING
,
8755 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8759 rdbSaveObject(server
.vm_fp
,o
);
8760 fflush(server
.vm_fp
);
8761 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8765 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8766 * needed to later retrieve the object into the key object.
8767 * If we can't find enough contiguous empty pages to swap the object on disk
8768 * REDIS_ERR is returned. */
8769 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8770 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8773 assert(key
->storage
== REDIS_VM_MEMORY
);
8774 assert(key
->refcount
== 1);
8775 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8776 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8777 key
->vm
.page
= page
;
8778 key
->vm
.usedpages
= pages
;
8779 key
->storage
= REDIS_VM_SWAPPED
;
8780 key
->vtype
= val
->type
;
8781 decrRefCount(val
); /* Deallocate the object from memory. */
8782 vmMarkPagesUsed(page
,pages
);
8783 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8784 (unsigned char*) key
->ptr
,
8785 (unsigned long long) page
, (unsigned long long) pages
);
8786 server
.vm_stats_swapped_objects
++;
8787 server
.vm_stats_swapouts
++;
8791 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8794 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8795 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8796 redisLog(REDIS_WARNING
,
8797 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8801 o
= rdbLoadObject(type
,server
.vm_fp
);
8803 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8806 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8810 /* Load the value object relative to the 'key' object from swap to memory.
8811 * The newly allocated object is returned.
8813 * If preview is true the unserialized object is returned to the caller but
8814 * no changes are made to the key object, nor the pages are marked as freed */
8815 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8818 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8819 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8821 key
->storage
= REDIS_VM_MEMORY
;
8822 key
->vm
.atime
= server
.unixtime
;
8823 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8824 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8825 (unsigned char*) key
->ptr
);
8826 server
.vm_stats_swapped_objects
--;
8828 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8829 (unsigned char*) key
->ptr
);
8831 server
.vm_stats_swapins
++;
8835 /* Plain object loading, from swap to memory */
8836 static robj
*vmLoadObject(robj
*key
) {
8837 /* If we are loading the object in background, stop it, we
8838 * need to load this object synchronously ASAP. */
8839 if (key
->storage
== REDIS_VM_LOADING
)
8840 vmCancelThreadedIOJob(key
);
8841 return vmGenericLoadObject(key
,0);
8844 /* Just load the value on disk, without to modify the key.
8845 * This is useful when we want to perform some operation on the value
8846 * without to really bring it from swap to memory, like while saving the
8847 * dataset or rewriting the append only log. */
8848 static robj
*vmPreviewObject(robj
*key
) {
8849 return vmGenericLoadObject(key
,1);
8852 /* How a good candidate is this object for swapping?
8853 * The better candidate it is, the greater the returned value.
8855 * Currently we try to perform a fast estimation of the object size in
8856 * memory, and combine it with aging informations.
8858 * Basically swappability = idle-time * log(estimated size)
8860 * Bigger objects are preferred over smaller objects, but not
8861 * proportionally, this is why we use the logarithm. This algorithm is
8862 * just a first try and will probably be tuned later. */
8863 static double computeObjectSwappability(robj
*o
) {
8864 time_t age
= server
.unixtime
- o
->vm
.atime
;
8868 struct dictEntry
*de
;
8871 if (age
<= 0) return 0;
8874 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8877 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8882 listNode
*ln
= listFirst(l
);
8884 asize
= sizeof(list
);
8886 robj
*ele
= ln
->value
;
8889 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8890 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8892 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8897 z
= (o
->type
== REDIS_ZSET
);
8898 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8900 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8901 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8906 de
= dictGetRandomKey(d
);
8907 ele
= dictGetEntryKey(de
);
8908 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8909 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8911 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8912 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8916 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8917 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8918 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8919 unsigned int klen
, vlen
;
8920 unsigned char *key
, *val
;
8922 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8926 asize
= len
*(klen
+vlen
+3);
8927 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8929 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8934 de
= dictGetRandomKey(d
);
8935 ele
= dictGetEntryKey(de
);
8936 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8937 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8939 ele
= dictGetEntryVal(de
);
8940 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8941 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8943 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8948 return (double)age
*log(1+asize
);
8951 /* Try to swap an object that's a good candidate for swapping.
8952 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8953 * to swap any object at all.
8955 * If 'usethreaded' is true, Redis will try to swap the object in background
8956 * using I/O threads. */
8957 static int vmSwapOneObject(int usethreads
) {
8959 struct dictEntry
*best
= NULL
;
8960 double best_swappability
= 0;
8961 redisDb
*best_db
= NULL
;
8964 for (j
= 0; j
< server
.dbnum
; j
++) {
8965 redisDb
*db
= server
.db
+j
;
8966 /* Why maxtries is set to 100?
8967 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8968 * are swappable objects */
8971 if (dictSize(db
->dict
) == 0) continue;
8972 for (i
= 0; i
< 5; i
++) {
8974 double swappability
;
8976 if (maxtries
) maxtries
--;
8977 de
= dictGetRandomKey(db
->dict
);
8978 key
= dictGetEntryKey(de
);
8979 val
= dictGetEntryVal(de
);
8980 /* Only swap objects that are currently in memory.
8982 * Also don't swap shared objects if threaded VM is on, as we
8983 * try to ensure that the main thread does not touch the
8984 * object while the I/O thread is using it, but we can't
8985 * control other keys without adding additional mutex. */
8986 if (key
->storage
!= REDIS_VM_MEMORY
||
8987 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8988 if (maxtries
) i
--; /* don't count this try */
8991 swappability
= computeObjectSwappability(val
);
8992 if (!best
|| swappability
> best_swappability
) {
8994 best_swappability
= swappability
;
8999 if (best
== NULL
) return REDIS_ERR
;
9000 key
= dictGetEntryKey(best
);
9001 val
= dictGetEntryVal(best
);
9003 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9004 key
->ptr
, best_swappability
);
9006 /* Unshare the key if needed */
9007 if (key
->refcount
> 1) {
9008 robj
*newkey
= dupStringObject(key
);
9010 key
= dictGetEntryKey(best
) = newkey
;
9014 vmSwapObjectThreaded(key
,val
,best_db
);
9017 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9018 dictGetEntryVal(best
) = NULL
;
9026 static int vmSwapOneObjectBlocking() {
9027 return vmSwapOneObject(0);
9030 static int vmSwapOneObjectThreaded() {
9031 return vmSwapOneObject(1);
9034 /* Return true if it's safe to swap out objects in a given moment.
9035 * Basically we don't want to swap objects out while there is a BGSAVE
9036 * or a BGAEOREWRITE running in backgroud. */
9037 static int vmCanSwapOut(void) {
9038 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9041 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
9042 * and was deleted. Otherwise 0 is returned. */
9043 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
9047 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
9048 foundkey
= dictGetEntryKey(de
);
9049 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
9054 /* =================== Virtual Memory - Threaded I/O ======================= */
9056 static void freeIOJob(iojob
*j
) {
9057 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9058 j
->type
== REDIS_IOJOB_DO_SWAP
||
9059 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9060 decrRefCount(j
->val
);
9061 /* We don't decrRefCount the j->key field as we did't incremented
9062 * the count creating IO Jobs. This is because the key field here is
9063 * just used as an indentifier and if a key is removed the Job should
9064 * never be touched again. */
9068 /* Every time a thread finished a Job, it writes a byte into the write side
9069 * of an unix pipe in order to "awake" the main thread, and this function
9071 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9075 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9077 REDIS_NOTUSED(mask
);
9078 REDIS_NOTUSED(privdata
);
9080 /* For every byte we read in the read side of the pipe, there is one
9081 * I/O job completed to process. */
9082 while((retval
= read(fd
,buf
,1)) == 1) {
9086 struct dictEntry
*de
;
9088 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9090 /* Get the processed element (the oldest one) */
9092 assert(listLength(server
.io_processed
) != 0);
9093 if (toprocess
== -1) {
9094 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9095 if (toprocess
<= 0) toprocess
= 1;
9097 ln
= listFirst(server
.io_processed
);
9099 listDelNode(server
.io_processed
,ln
);
9101 /* If this job is marked as canceled, just ignore it */
9106 /* Post process it in the main thread, as there are things we
9107 * can do just here to avoid race conditions and/or invasive locks */
9108 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9109 de
= dictFind(j
->db
->dict
,j
->key
);
9111 key
= dictGetEntryKey(de
);
9112 if (j
->type
== REDIS_IOJOB_LOAD
) {
9115 /* Key loaded, bring it at home */
9116 key
->storage
= REDIS_VM_MEMORY
;
9117 key
->vm
.atime
= server
.unixtime
;
9118 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9119 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9120 (unsigned char*) key
->ptr
);
9121 server
.vm_stats_swapped_objects
--;
9122 server
.vm_stats_swapins
++;
9123 dictGetEntryVal(de
) = j
->val
;
9124 incrRefCount(j
->val
);
9127 /* Handle clients waiting for this key to be loaded. */
9128 handleClientsBlockedOnSwappedKey(db
,key
);
9129 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9130 /* Now we know the amount of pages required to swap this object.
9131 * Let's find some space for it, and queue this task again
9132 * rebranded as REDIS_IOJOB_DO_SWAP. */
9133 if (!vmCanSwapOut() ||
9134 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9136 /* Ooops... no space or we can't swap as there is
9137 * a fork()ed Redis trying to save stuff on disk. */
9139 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9141 /* Note that we need to mark this pages as used now,
9142 * if the job will be canceled, we'll mark them as freed
9144 vmMarkPagesUsed(j
->page
,j
->pages
);
9145 j
->type
= REDIS_IOJOB_DO_SWAP
;
9150 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9153 /* Key swapped. We can finally free some memory. */
9154 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9155 printf("key->storage: %d\n",key
->storage
);
9156 printf("key->name: %s\n",(char*)key
->ptr
);
9157 printf("key->refcount: %d\n",key
->refcount
);
9158 printf("val: %p\n",(void*)j
->val
);
9159 printf("val->type: %d\n",j
->val
->type
);
9160 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9162 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9163 val
= dictGetEntryVal(de
);
9164 key
->vm
.page
= j
->page
;
9165 key
->vm
.usedpages
= j
->pages
;
9166 key
->storage
= REDIS_VM_SWAPPED
;
9167 key
->vtype
= j
->val
->type
;
9168 decrRefCount(val
); /* Deallocate the object from memory. */
9169 dictGetEntryVal(de
) = NULL
;
9170 redisLog(REDIS_DEBUG
,
9171 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9172 (unsigned char*) key
->ptr
,
9173 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9174 server
.vm_stats_swapped_objects
++;
9175 server
.vm_stats_swapouts
++;
9177 /* Put a few more swap requests in queue if we are still
9179 if (trytoswap
&& vmCanSwapOut() &&
9180 zmalloc_used_memory() > server
.vm_max_memory
)
9185 more
= listLength(server
.io_newjobs
) <
9186 (unsigned) server
.vm_max_threads
;
9188 /* Don't waste CPU time if swappable objects are rare. */
9189 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9197 if (processed
== toprocess
) return;
9199 if (retval
< 0 && errno
!= EAGAIN
) {
9200 redisLog(REDIS_WARNING
,
9201 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9206 static void lockThreadedIO(void) {
9207 pthread_mutex_lock(&server
.io_mutex
);
9210 static void unlockThreadedIO(void) {
9211 pthread_mutex_unlock(&server
.io_mutex
);
9214 /* Remove the specified object from the threaded I/O queue if still not
9215 * processed, otherwise make sure to flag it as canceled. */
9216 static void vmCancelThreadedIOJob(robj
*o
) {
9218 server
.io_newjobs
, /* 0 */
9219 server
.io_processing
, /* 1 */
9220 server
.io_processed
/* 2 */
9224 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9227 /* Search for a matching key in one of the queues */
9228 for (i
= 0; i
< 3; i
++) {
9232 listRewind(lists
[i
],&li
);
9233 while ((ln
= listNext(&li
)) != NULL
) {
9234 iojob
*job
= ln
->value
;
9236 if (job
->canceled
) continue; /* Skip this, already canceled. */
9237 if (job
->key
== o
) {
9238 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9239 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9240 /* Mark the pages as free since the swap didn't happened
9241 * or happened but is now discarded. */
9242 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9243 vmMarkPagesFree(job
->page
,job
->pages
);
9244 /* Cancel the job. It depends on the list the job is
9247 case 0: /* io_newjobs */
9248 /* If the job was yet not processed the best thing to do
9249 * is to remove it from the queue at all */
9251 listDelNode(lists
[i
],ln
);
9253 case 1: /* io_processing */
9254 /* Oh Shi- the thread is messing with the Job:
9256 * Probably it's accessing the object if this is a
9257 * PREPARE_SWAP or DO_SWAP job.
9258 * If it's a LOAD job it may be reading from disk and
9259 * if we don't wait for the job to terminate before to
9260 * cancel it, maybe in a few microseconds data can be
9261 * corrupted in this pages. So the short story is:
9263 * Better to wait for the job to move into the
9264 * next queue (processed)... */
9266 /* We try again and again until the job is completed. */
9268 /* But let's wait some time for the I/O thread
9269 * to finish with this job. After all this condition
9270 * should be very rare. */
9273 case 2: /* io_processed */
9274 /* The job was already processed, that's easy...
9275 * just mark it as canceled so that we'll ignore it
9276 * when processing completed jobs. */
9280 /* Finally we have to adjust the storage type of the object
9281 * in order to "UNDO" the operaiton. */
9282 if (o
->storage
== REDIS_VM_LOADING
)
9283 o
->storage
= REDIS_VM_SWAPPED
;
9284 else if (o
->storage
== REDIS_VM_SWAPPING
)
9285 o
->storage
= REDIS_VM_MEMORY
;
9292 assert(1 != 1); /* We should never reach this */
9295 static void *IOThreadEntryPoint(void *arg
) {
9300 pthread_detach(pthread_self());
9302 /* Get a new job to process */
9304 if (listLength(server
.io_newjobs
) == 0) {
9305 /* No new jobs in queue, exit. */
9306 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9307 (long) pthread_self());
9308 server
.io_active_threads
--;
9312 ln
= listFirst(server
.io_newjobs
);
9314 listDelNode(server
.io_newjobs
,ln
);
9315 /* Add the job in the processing queue */
9316 j
->thread
= pthread_self();
9317 listAddNodeTail(server
.io_processing
,j
);
9318 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9320 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9321 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9323 /* Process the Job */
9324 if (j
->type
== REDIS_IOJOB_LOAD
) {
9325 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9326 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9327 FILE *fp
= fopen("/dev/null","w+");
9328 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9330 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9331 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9335 /* Done: insert the job into the processed queue */
9336 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9337 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9339 listDelNode(server
.io_processing
,ln
);
9340 listAddNodeTail(server
.io_processed
,j
);
9343 /* Signal the main thread there is new stuff to process */
9344 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9346 return NULL
; /* never reached */
9349 static void spawnIOThread(void) {
9351 sigset_t mask
, omask
;
9355 sigaddset(&mask
,SIGCHLD
);
9356 sigaddset(&mask
,SIGHUP
);
9357 sigaddset(&mask
,SIGPIPE
);
9358 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9359 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9360 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9364 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9365 server
.io_active_threads
++;
9368 /* We need to wait for the last thread to exit before we are able to
9369 * fork() in order to BGSAVE or BGREWRITEAOF. */
9370 static void waitEmptyIOJobsQueue(void) {
9372 int io_processed_len
;
9375 if (listLength(server
.io_newjobs
) == 0 &&
9376 listLength(server
.io_processing
) == 0 &&
9377 server
.io_active_threads
== 0)
9382 /* While waiting for empty jobs queue condition we post-process some
9383 * finshed job, as I/O threads may be hanging trying to write against
9384 * the io_ready_pipe_write FD but there are so much pending jobs that
9386 io_processed_len
= listLength(server
.io_processed
);
9388 if (io_processed_len
) {
9389 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9390 usleep(1000); /* 1 millisecond */
9392 usleep(10000); /* 10 milliseconds */
9397 static void vmReopenSwapFile(void) {
9398 /* Note: we don't close the old one as we are in the child process
9399 * and don't want to mess at all with the original file object. */
9400 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9401 if (server
.vm_fp
== NULL
) {
9402 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9403 server
.vm_swap_file
);
9406 server
.vm_fd
= fileno(server
.vm_fp
);
9409 /* This function must be called while with threaded IO locked */
9410 static void queueIOJob(iojob
*j
) {
9411 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9412 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9413 listAddNodeTail(server
.io_newjobs
,j
);
9414 if (server
.io_active_threads
< server
.vm_max_threads
)
9418 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9421 assert(key
->storage
== REDIS_VM_MEMORY
);
9422 assert(key
->refcount
== 1);
9424 j
= zmalloc(sizeof(*j
));
9425 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9431 j
->thread
= (pthread_t
) -1;
9432 key
->storage
= REDIS_VM_SWAPPING
;
9440 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9442 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9443 * If there is not already a job loading the key, it is craeted.
9444 * The key is added to the io_keys list in the client structure, and also
9445 * in the hash table mapping swapped keys to waiting clients, that is,
9446 * server.io_waited_keys. */
9447 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9448 struct dictEntry
*de
;
9452 /* If the key does not exist or is already in RAM we don't need to
9453 * block the client at all. */
9454 de
= dictFind(c
->db
->dict
,key
);
9455 if (de
== NULL
) return 0;
9456 o
= dictGetEntryKey(de
);
9457 if (o
->storage
== REDIS_VM_MEMORY
) {
9459 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9460 /* We were swapping the key, undo it! */
9461 vmCancelThreadedIOJob(o
);
9465 /* OK: the key is either swapped, or being loaded just now. */
9467 /* Add the key to the list of keys this client is waiting for.
9468 * This maps clients to keys they are waiting for. */
9469 listAddNodeTail(c
->io_keys
,key
);
9472 /* Add the client to the swapped keys => clients waiting map. */
9473 de
= dictFind(c
->db
->io_keys
,key
);
9477 /* For every key we take a list of clients blocked for it */
9479 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9481 assert(retval
== DICT_OK
);
9483 l
= dictGetEntryVal(de
);
9485 listAddNodeTail(l
,c
);
9487 /* Are we already loading the key from disk? If not create a job */
9488 if (o
->storage
== REDIS_VM_SWAPPED
) {
9491 o
->storage
= REDIS_VM_LOADING
;
9492 j
= zmalloc(sizeof(*j
));
9493 j
->type
= REDIS_IOJOB_LOAD
;
9496 j
->key
->vtype
= o
->vtype
;
9497 j
->page
= o
->vm
.page
;
9500 j
->thread
= (pthread_t
) -1;
9508 /* Preload keys needed for the ZUNION and ZINTER commands. */
9509 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9511 num
= atoi(c
->argv
[2]->ptr
);
9512 for (i
= 0; i
< num
; i
++) {
9513 waitForSwappedKey(c
,c
->argv
[3+i
]);
9517 /* Is this client attempting to run a command against swapped keys?
9518 * If so, block it ASAP, load the keys in background, then resume it.
9520 * The important idea about this function is that it can fail! If keys will
9521 * still be swapped when the client is resumed, this key lookups will
9522 * just block loading keys from disk. In practical terms this should only
9523 * happen with SORT BY command or if there is a bug in this function.
9525 * Return 1 if the client is marked as blocked, 0 if the client can
9526 * continue as the keys it is going to access appear to be in memory. */
9527 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9530 if (cmd
->vm_preload_proc
!= NULL
) {
9531 cmd
->vm_preload_proc(c
);
9533 if (cmd
->vm_firstkey
== 0) return 0;
9534 last
= cmd
->vm_lastkey
;
9535 if (last
< 0) last
= c
->argc
+last
;
9536 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9537 waitForSwappedKey(c
,c
->argv
[j
]);
9540 /* If the client was blocked for at least one key, mark it as blocked. */
9541 if (listLength(c
->io_keys
)) {
9542 c
->flags
|= REDIS_IO_WAIT
;
9543 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9544 server
.vm_blocked_clients
++;
9551 /* Remove the 'key' from the list of blocked keys for a given client.
9553 * The function returns 1 when there are no longer blocking keys after
9554 * the current one was removed (and the client can be unblocked). */
9555 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9559 struct dictEntry
*de
;
9561 /* Remove the key from the list of keys this client is waiting for. */
9562 listRewind(c
->io_keys
,&li
);
9563 while ((ln
= listNext(&li
)) != NULL
) {
9564 if (compareStringObjects(ln
->value
,key
) == 0) {
9565 listDelNode(c
->io_keys
,ln
);
9571 /* Remove the client form the key => waiting clients map. */
9572 de
= dictFind(c
->db
->io_keys
,key
);
9574 l
= dictGetEntryVal(de
);
9575 ln
= listSearchKey(l
,c
);
9578 if (listLength(l
) == 0)
9579 dictDelete(c
->db
->io_keys
,key
);
9581 return listLength(c
->io_keys
) == 0;
9584 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9585 struct dictEntry
*de
;
9590 de
= dictFind(db
->io_keys
,key
);
9593 l
= dictGetEntryVal(de
);
9594 len
= listLength(l
);
9595 /* Note: we can't use something like while(listLength(l)) as the list
9596 * can be freed by the calling function when we remove the last element. */
9599 redisClient
*c
= ln
->value
;
9601 if (dontWaitForSwappedKey(c
,key
)) {
9602 /* Put the client in the list of clients ready to go as we
9603 * loaded all the keys about it. */
9604 listAddNodeTail(server
.io_ready_clients
,c
);
9609 /* =========================== Remote Configuration ========================= */
9611 static void configSetCommand(redisClient
*c
) {
9612 robj
*o
= getDecodedObject(c
->argv
[3]);
9613 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9614 zfree(server
.dbfilename
);
9615 server
.dbfilename
= zstrdup(o
->ptr
);
9616 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9617 zfree(server
.requirepass
);
9618 server
.requirepass
= zstrdup(o
->ptr
);
9619 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9620 zfree(server
.masterauth
);
9621 server
.masterauth
= zstrdup(o
->ptr
);
9622 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9623 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9625 addReplySds(c
,sdscatprintf(sdsempty(),
9626 "-ERR not supported CONFIG parameter %s\r\n",
9627 (char*)c
->argv
[2]->ptr
));
9632 addReply(c
,shared
.ok
);
9635 static void configGetCommand(redisClient
*c
) {
9636 robj
*o
= getDecodedObject(c
->argv
[2]);
9637 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9638 char *pattern
= o
->ptr
;
9642 decrRefCount(lenobj
);
9644 if (stringmatch(pattern
,"dbfilename",0)) {
9645 addReplyBulkCString(c
,"dbfilename");
9646 addReplyBulkCString(c
,server
.dbfilename
);
9649 if (stringmatch(pattern
,"requirepass",0)) {
9650 addReplyBulkCString(c
,"requirepass");
9651 addReplyBulkCString(c
,server
.requirepass
);
9654 if (stringmatch(pattern
,"masterauth",0)) {
9655 addReplyBulkCString(c
,"masterauth");
9656 addReplyBulkCString(c
,server
.masterauth
);
9659 if (stringmatch(pattern
,"maxmemory",0)) {
9662 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9663 addReplyBulkCString(c
,"maxmemory");
9664 addReplyBulkCString(c
,buf
);
9668 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9671 static void configCommand(redisClient
*c
) {
9672 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9673 if (c
->argc
!= 4) goto badarity
;
9674 configSetCommand(c
);
9675 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9676 if (c
->argc
!= 3) goto badarity
;
9677 configGetCommand(c
);
9678 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9679 if (c
->argc
!= 2) goto badarity
;
9680 server
.stat_numcommands
= 0;
9681 server
.stat_numconnections
= 0;
9682 server
.stat_expiredkeys
= 0;
9683 server
.stat_starttime
= time(NULL
);
9684 addReply(c
,shared
.ok
);
9686 addReplySds(c
,sdscatprintf(sdsempty(),
9687 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9692 addReplySds(c
,sdscatprintf(sdsempty(),
9693 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9694 (char*) c
->argv
[1]->ptr
));
9697 /* =========================== Pubsub implementation ======================== */
9699 static void freePubsubPattern(void *p
) {
9700 pubsubPattern
*pat
= p
;
9702 decrRefCount(pat
->pattern
);
9706 static int listMatchPubsubPattern(void *a
, void *b
) {
9707 pubsubPattern
*pa
= a
, *pb
= b
;
9709 return (pa
->client
== pb
->client
) &&
9710 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9713 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9714 * 0 if the client was already subscribed to that channel. */
9715 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9716 struct dictEntry
*de
;
9717 list
*clients
= NULL
;
9720 /* Add the channel to the client -> channels hash table */
9721 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9723 incrRefCount(channel
);
9724 /* Add the client to the channel -> list of clients hash table */
9725 de
= dictFind(server
.pubsub_channels
,channel
);
9727 clients
= listCreate();
9728 dictAdd(server
.pubsub_channels
,channel
,clients
);
9729 incrRefCount(channel
);
9731 clients
= dictGetEntryVal(de
);
9733 listAddNodeTail(clients
,c
);
9735 /* Notify the client */
9736 addReply(c
,shared
.mbulk3
);
9737 addReply(c
,shared
.subscribebulk
);
9738 addReplyBulk(c
,channel
);
9739 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9743 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9744 * 0 if the client was not subscribed to the specified channel. */
9745 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9746 struct dictEntry
*de
;
9751 /* Remove the channel from the client -> channels hash table */
9752 incrRefCount(channel
); /* channel may be just a pointer to the same object
9753 we have in the hash tables. Protect it... */
9754 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9756 /* Remove the client from the channel -> clients list hash table */
9757 de
= dictFind(server
.pubsub_channels
,channel
);
9759 clients
= dictGetEntryVal(de
);
9760 ln
= listSearchKey(clients
,c
);
9762 listDelNode(clients
,ln
);
9763 if (listLength(clients
) == 0) {
9764 /* Free the list and associated hash entry at all if this was
9765 * the latest client, so that it will be possible to abuse
9766 * Redis PUBSUB creating millions of channels. */
9767 dictDelete(server
.pubsub_channels
,channel
);
9770 /* Notify the client */
9772 addReply(c
,shared
.mbulk3
);
9773 addReply(c
,shared
.unsubscribebulk
);
9774 addReplyBulk(c
,channel
);
9775 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9776 listLength(c
->pubsub_patterns
));
9779 decrRefCount(channel
); /* it is finally safe to release it */
9783 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9784 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9787 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9790 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9791 incrRefCount(pattern
);
9792 pat
= zmalloc(sizeof(*pat
));
9793 pat
->pattern
= getDecodedObject(pattern
);
9795 listAddNodeTail(server
.pubsub_patterns
,pat
);
9797 /* Notify the client */
9798 addReply(c
,shared
.mbulk3
);
9799 addReply(c
,shared
.psubscribebulk
);
9800 addReplyBulk(c
,pattern
);
9801 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9805 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9806 * 0 if the client was not subscribed to the specified channel. */
9807 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9812 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9813 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9815 listDelNode(c
->pubsub_patterns
,ln
);
9817 pat
.pattern
= pattern
;
9818 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9819 listDelNode(server
.pubsub_patterns
,ln
);
9821 /* Notify the client */
9823 addReply(c
,shared
.mbulk3
);
9824 addReply(c
,shared
.punsubscribebulk
);
9825 addReplyBulk(c
,pattern
);
9826 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9827 listLength(c
->pubsub_patterns
));
9829 decrRefCount(pattern
);
9833 /* Unsubscribe from all the channels. Return the number of channels the
9834 * client was subscribed from. */
9835 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9836 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9840 while((de
= dictNext(di
)) != NULL
) {
9841 robj
*channel
= dictGetEntryKey(de
);
9843 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9845 dictReleaseIterator(di
);
9849 /* Unsubscribe from all the patterns. Return the number of patterns the
9850 * client was subscribed from. */
9851 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9856 listRewind(c
->pubsub_patterns
,&li
);
9857 while ((ln
= listNext(&li
)) != NULL
) {
9858 robj
*pattern
= ln
->value
;
9860 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9865 /* Publish a message */
9866 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9868 struct dictEntry
*de
;
9872 /* Send to clients listening for that channel */
9873 de
= dictFind(server
.pubsub_channels
,channel
);
9875 list
*list
= dictGetEntryVal(de
);
9879 listRewind(list
,&li
);
9880 while ((ln
= listNext(&li
)) != NULL
) {
9881 redisClient
*c
= ln
->value
;
9883 addReply(c
,shared
.mbulk3
);
9884 addReply(c
,shared
.messagebulk
);
9885 addReplyBulk(c
,channel
);
9886 addReplyBulk(c
,message
);
9890 /* Send to clients listening to matching channels */
9891 if (listLength(server
.pubsub_patterns
)) {
9892 listRewind(server
.pubsub_patterns
,&li
);
9893 channel
= getDecodedObject(channel
);
9894 while ((ln
= listNext(&li
)) != NULL
) {
9895 pubsubPattern
*pat
= ln
->value
;
9897 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9898 sdslen(pat
->pattern
->ptr
),
9899 (char*)channel
->ptr
,
9900 sdslen(channel
->ptr
),0)) {
9901 addReply(pat
->client
,shared
.mbulk4
);
9902 addReply(pat
->client
,shared
.pmessagebulk
);
9903 addReplyBulk(pat
->client
,pat
->pattern
);
9904 addReplyBulk(pat
->client
,channel
);
9905 addReplyBulk(pat
->client
,message
);
9909 decrRefCount(channel
);
9914 static void subscribeCommand(redisClient
*c
) {
9917 for (j
= 1; j
< c
->argc
; j
++)
9918 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9921 static void unsubscribeCommand(redisClient
*c
) {
9923 pubsubUnsubscribeAllChannels(c
,1);
9928 for (j
= 1; j
< c
->argc
; j
++)
9929 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9933 static void psubscribeCommand(redisClient
*c
) {
9936 for (j
= 1; j
< c
->argc
; j
++)
9937 pubsubSubscribePattern(c
,c
->argv
[j
]);
9940 static void punsubscribeCommand(redisClient
*c
) {
9942 pubsubUnsubscribeAllPatterns(c
,1);
9947 for (j
= 1; j
< c
->argc
; j
++)
9948 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9952 static void publishCommand(redisClient
*c
) {
9953 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9954 addReplyLong(c
,receivers
);
9957 /* ================================= Debugging ============================== */
9959 static void debugCommand(redisClient
*c
) {
9960 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9962 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9963 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9964 addReply(c
,shared
.err
);
9968 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9969 addReply(c
,shared
.err
);
9972 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9973 addReply(c
,shared
.ok
);
9974 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9976 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9977 addReply(c
,shared
.err
);
9980 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9981 addReply(c
,shared
.ok
);
9982 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9983 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9987 addReply(c
,shared
.nokeyerr
);
9990 key
= dictGetEntryKey(de
);
9991 val
= dictGetEntryVal(de
);
9992 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9993 key
->storage
== REDIS_VM_SWAPPING
)) {
9997 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9998 strenc
= strencoding
[val
->encoding
];
10000 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
10003 addReplySds(c
,sdscatprintf(sdsempty(),
10004 "+Key at:%p refcount:%d, value at:%p refcount:%d "
10005 "encoding:%s serializedlength:%lld\r\n",
10006 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
10007 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
10009 addReplySds(c
,sdscatprintf(sdsempty(),
10010 "+Key at:%p refcount:%d, value swapped at: page %llu "
10011 "using %llu pages\r\n",
10012 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
10013 (unsigned long long) key
->vm
.usedpages
));
10015 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
10016 lookupKeyRead(c
->db
,c
->argv
[2]);
10017 addReply(c
,shared
.ok
);
10018 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
10019 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
10022 if (!server
.vm_enabled
) {
10023 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10027 addReply(c
,shared
.nokeyerr
);
10030 key
= dictGetEntryKey(de
);
10031 val
= dictGetEntryVal(de
);
10032 /* If the key is shared we want to create a copy */
10033 if (key
->refcount
> 1) {
10034 robj
*newkey
= dupStringObject(key
);
10036 key
= dictGetEntryKey(de
) = newkey
;
10039 if (key
->storage
!= REDIS_VM_MEMORY
) {
10040 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
10041 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
10042 dictGetEntryVal(de
) = NULL
;
10043 addReply(c
,shared
.ok
);
10045 addReply(c
,shared
.err
);
10048 addReplySds(c
,sdsnew(
10049 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
10053 static void _redisAssert(char *estr
, char *file
, int line
) {
10054 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
10055 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
10056 #ifdef HAVE_BACKTRACE
10057 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10058 *((char*)-1) = 'x';
10062 static void _redisPanic(char *msg
, char *file
, int line
) {
10063 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
10064 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
10065 #ifdef HAVE_BACKTRACE
10066 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
10067 *((char*)-1) = 'x';
10071 /* =================================== Main! ================================ */
10074 int linuxOvercommitMemoryValue(void) {
10075 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
10078 if (!fp
) return -1;
10079 if (fgets(buf
,64,fp
) == NULL
) {
10088 void linuxOvercommitMemoryWarning(void) {
10089 if (linuxOvercommitMemoryValue() == 0) {
10090 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10093 #endif /* __linux__ */
10095 static void daemonize(void) {
10099 if (fork() != 0) exit(0); /* parent exits */
10100 setsid(); /* create a new session */
10102 /* Every output goes to /dev/null. If Redis is daemonized but
10103 * the 'logfile' is set to 'stdout' in the configuration file
10104 * it will not log at all. */
10105 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10106 dup2(fd
, STDIN_FILENO
);
10107 dup2(fd
, STDOUT_FILENO
);
10108 dup2(fd
, STDERR_FILENO
);
10109 if (fd
> STDERR_FILENO
) close(fd
);
10111 /* Try to write the pid file */
10112 fp
= fopen(server
.pidfile
,"w");
10114 fprintf(fp
,"%d\n",getpid());
10119 static void version() {
10120 printf("Redis server version %s\n", REDIS_VERSION
);
10124 static void usage() {
10125 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10126 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10130 int main(int argc
, char **argv
) {
10133 initServerConfig();
10135 if (strcmp(argv
[1], "-v") == 0 ||
10136 strcmp(argv
[1], "--version") == 0) version();
10137 if (strcmp(argv
[1], "--help") == 0) usage();
10138 resetServerSaveParams();
10139 loadServerConfig(argv
[1]);
10140 } else if ((argc
> 2)) {
10143 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10145 if (server
.daemonize
) daemonize();
10147 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10149 linuxOvercommitMemoryWarning();
10151 start
= time(NULL
);
10152 if (server
.appendonly
) {
10153 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10154 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10156 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10157 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10159 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10160 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10162 aeDeleteEventLoop(server
.el
);
10166 /* ============================= Backtrace support ========================= */
10168 #ifdef HAVE_BACKTRACE
10169 static char *findFuncName(void *pointer
, unsigned long *offset
);
10171 static void *getMcontextEip(ucontext_t
*uc
) {
10172 #if defined(__FreeBSD__)
10173 return (void*) uc
->uc_mcontext
.mc_eip
;
10174 #elif defined(__dietlibc__)
10175 return (void*) uc
->uc_mcontext
.eip
;
10176 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10178 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10180 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10182 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10183 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10184 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10186 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10188 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10189 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10190 #elif defined(__ia64__) /* Linux IA64 */
10191 return (void*) uc
->uc_mcontext
.sc_ip
;
10197 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10199 char **messages
= NULL
;
10200 int i
, trace_size
= 0;
10201 unsigned long offset
=0;
10202 ucontext_t
*uc
= (ucontext_t
*) secret
;
10204 REDIS_NOTUSED(info
);
10206 redisLog(REDIS_WARNING
,
10207 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10208 infostring
= genRedisInfoString();
10209 redisLog(REDIS_WARNING
, "%s",infostring
);
10210 /* It's not safe to sdsfree() the returned string under memory
10211 * corruption conditions. Let it leak as we are going to abort */
10213 trace_size
= backtrace(trace
, 100);
10214 /* overwrite sigaction with caller's address */
10215 if (getMcontextEip(uc
) != NULL
) {
10216 trace
[1] = getMcontextEip(uc
);
10218 messages
= backtrace_symbols(trace
, trace_size
);
10220 for (i
=1; i
<trace_size
; ++i
) {
10221 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10223 p
= strchr(messages
[i
],'+');
10224 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10225 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10227 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10230 /* free(messages); Don't call free() with possibly corrupted memory. */
10234 static void setupSigSegvAction(void) {
10235 struct sigaction act
;
10237 sigemptyset (&act
.sa_mask
);
10238 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10239 * is used. Otherwise, sa_handler is used */
10240 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10241 act
.sa_sigaction
= segvHandler
;
10242 sigaction (SIGSEGV
, &act
, NULL
);
10243 sigaction (SIGBUS
, &act
, NULL
);
10244 sigaction (SIGFPE
, &act
, NULL
);
10245 sigaction (SIGILL
, &act
, NULL
);
10246 sigaction (SIGBUS
, &act
, NULL
);
10250 #include "staticsymbols.h"
10251 /* This function try to convert a pointer into a function name. It's used in
10252 * oreder to provide a backtrace under segmentation fault that's able to
10253 * display functions declared as static (otherwise the backtrace is useless). */
10254 static char *findFuncName(void *pointer
, unsigned long *offset
){
10256 unsigned long off
, minoff
= 0;
10258 /* Try to match against the Symbol with the smallest offset */
10259 for (i
=0; symsTable
[i
].pointer
; i
++) {
10260 unsigned long lp
= (unsigned long) pointer
;
10262 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10263 off
=lp
-symsTable
[i
].pointer
;
10264 if (ret
< 0 || off
< minoff
) {
10270 if (ret
== -1) return NULL
;
10272 return symsTable
[ret
].name
;
10274 #else /* HAVE_BACKTRACE */
10275 static void setupSigSegvAction(void) {
10277 #endif /* HAVE_BACKTRACE */