2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.10"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
241 static void _redisAssert(char *estr
, char *file
, int line
);
242 static void _redisPanic(char *msg
, char *file
, int line
);
244 /*================================= Data types ============================== */
246 /* A redis object, that is a type able to hold a string / list / set */
248 /* The VM object structure */
249 struct redisObjectVM
{
250 off_t page
; /* the page at witch the object is stored on disk */
251 off_t usedpages
; /* number of pages used on disk */
252 time_t atime
; /* Last access time */
255 /* The actual Redis Object */
256 typedef struct redisObject
{
259 unsigned char encoding
;
260 unsigned char storage
; /* If this object is a key, where is the value?
261 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
262 unsigned char vtype
; /* If this object is a key, and value is swapped out,
263 * this is the type of the swapped out object. */
265 /* VM fields, this are only allocated if VM is active, otherwise the
266 * object allocation function will just allocate
267 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
268 * Redis without VM active will not have any overhead. */
269 struct redisObjectVM vm
;
272 /* Macro used to initalize a Redis object allocated on the stack.
273 * Note that this macro is taken near the structure definition to make sure
274 * we'll update it when the structure is changed, to avoid bugs like
275 * bug #85 introduced exactly in this way. */
276 #define initStaticStringObject(_var,_ptr) do { \
278 _var.type = REDIS_STRING; \
279 _var.encoding = REDIS_ENCODING_RAW; \
281 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
284 typedef struct redisDb
{
285 dict
*dict
; /* The keyspace for this DB */
286 dict
*expires
; /* Timeout of keys with a timeout set */
287 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
288 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
292 /* Client MULTI/EXEC state */
293 typedef struct multiCmd
{
296 struct redisCommand
*cmd
;
299 typedef struct multiState
{
300 multiCmd
*commands
; /* Array of MULTI commands */
301 int count
; /* Total number of MULTI commands */
304 /* With multiplexing we need to take per-clinet state.
305 * Clients are taken in a liked list. */
306 typedef struct redisClient
{
311 robj
**argv
, **mbargv
;
313 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
314 int multibulk
; /* multi bulk command format active */
317 time_t lastinteraction
; /* time of the last interaction, used for timeout */
318 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
319 int slaveseldb
; /* slave selected db, if this client is a slave */
320 int authenticated
; /* when requirepass is non-NULL */
321 int replstate
; /* replication state if this is a slave */
322 int repldbfd
; /* replication DB file descriptor */
323 long repldboff
; /* replication DB file offset */
324 off_t repldbsize
; /* replication DB file size */
325 multiState mstate
; /* MULTI/EXEC state */
326 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
327 * operation such as BLPOP. Otherwise NULL. */
328 int blockingkeysnum
; /* Number of blocking keys */
329 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
330 * is >= blockingto then the operation timed out. */
331 list
*io_keys
; /* Keys this client is waiting to be loaded from the
332 * swap file in order to continue. */
333 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
334 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
342 /* Global server state structure */
347 long long dirty
; /* changes to DB from the last save */
349 list
*slaves
, *monitors
;
350 char neterr
[ANET_ERR_LEN
];
352 int cronloops
; /* number of times the cron function run */
353 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
354 time_t lastsave
; /* Unix time of last save succeeede */
355 /* Fields used only for stats */
356 time_t stat_starttime
; /* server start time */
357 long long stat_numcommands
; /* number of processed commands */
358 long long stat_numconnections
; /* number of connections received */
359 long long stat_expiredkeys
; /* number of expired keys */
372 pid_t bgsavechildpid
;
373 pid_t bgrewritechildpid
;
374 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
375 struct saveparam
*saveparams
;
380 char *appendfilename
;
384 /* Replication related */
389 redisClient
*master
; /* client that is master for this slave */
391 unsigned int maxclients
;
392 unsigned long long maxmemory
;
393 unsigned int blpop_blocked_clients
;
394 unsigned int vm_blocked_clients
;
395 /* Sort parameters - qsort_r() is only available under BSD so we
396 * have to take this state global, in order to pass it to sortCompare() */
400 /* Virtual memory configuration */
405 unsigned long long vm_max_memory
;
407 size_t hash_max_zipmap_entries
;
408 size_t hash_max_zipmap_value
;
409 /* Virtual memory state */
412 off_t vm_next_page
; /* Next probably empty page */
413 off_t vm_near_pages
; /* Number of pages allocated sequentially */
414 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
415 time_t unixtime
; /* Unix time sampled every second. */
416 /* Virtual memory I/O threads stuff */
417 /* An I/O thread process an element taken from the io_jobs queue and
418 * put the result of the operation in the io_done list. While the
419 * job is being processed, it's put on io_processing queue. */
420 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
421 list
*io_processing
; /* List of VM I/O jobs being processed */
422 list
*io_processed
; /* List of VM I/O jobs already processed */
423 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
424 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
425 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
426 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
427 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
428 int io_active_threads
; /* Number of running I/O threads */
429 int vm_max_threads
; /* Max number of I/O threads running at the same time */
430 /* Our main thread is blocked on the event loop, locking for sockets ready
431 * to be read or written, so when a threaded I/O operation is ready to be
432 * processed by the main thread, the I/O thread will use a unix pipe to
433 * awake the main thread. The followings are the two pipe FDs. */
434 int io_ready_pipe_read
;
435 int io_ready_pipe_write
;
436 /* Virtual memory stats */
437 unsigned long long vm_stats_used_pages
;
438 unsigned long long vm_stats_swapped_objects
;
439 unsigned long long vm_stats_swapouts
;
440 unsigned long long vm_stats_swapins
;
442 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
443 list
*pubsub_patterns
; /* A list of pubsub_patterns */
448 typedef struct pubsubPattern
{
453 typedef void redisCommandProc(redisClient
*c
);
454 struct redisCommand
{
456 redisCommandProc
*proc
;
459 /* Use a function to determine which keys need to be loaded
460 * in the background prior to executing this command. Takes precedence
461 * over vm_firstkey and others, ignored when NULL */
462 redisCommandProc
*vm_preload_proc
;
463 /* What keys should be loaded in background when calling this command? */
464 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
465 int vm_lastkey
; /* THe last argument that's a key */
466 int vm_keystep
; /* The step between first and last key */
469 struct redisFunctionSym
{
471 unsigned long pointer
;
474 typedef struct _redisSortObject
{
482 typedef struct _redisSortOperation
{
485 } redisSortOperation
;
487 /* ZSETs use a specialized version of Skiplists */
489 typedef struct zskiplistNode
{
490 struct zskiplistNode
**forward
;
491 struct zskiplistNode
*backward
;
497 typedef struct zskiplist
{
498 struct zskiplistNode
*header
, *tail
;
499 unsigned long length
;
503 typedef struct zset
{
508 /* Our shared "common" objects */
510 #define REDIS_SHARED_INTEGERS 10000
511 struct sharedObjectsStruct
{
512 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
513 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
514 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
515 *outofrangeerr
, *plus
,
516 *select0
, *select1
, *select2
, *select3
, *select4
,
517 *select5
, *select6
, *select7
, *select8
, *select9
,
518 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
519 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
522 /* Global vars that are actally used as constants. The following double
523 * values are used for double on-disk serialization, and are initialized
524 * at runtime to avoid strange compiler optimizations. */
526 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
528 /* VM threaded I/O request message */
529 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
530 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
531 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
532 typedef struct iojob
{
533 int type
; /* Request type, REDIS_IOJOB_* */
534 redisDb
*db
;/* Redis database */
535 robj
*key
; /* This I/O request is about swapping this key */
536 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
537 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
538 off_t page
; /* Swap page where to read/write the object */
539 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
540 int canceled
; /* True if this command was canceled by blocking side of VM */
541 pthread_t thread
; /* ID of the thread processing this entry */
544 /*================================ Prototypes =============================== */
546 static void freeStringObject(robj
*o
);
547 static void freeListObject(robj
*o
);
548 static void freeSetObject(robj
*o
);
549 static void decrRefCount(void *o
);
550 static robj
*createObject(int type
, void *ptr
);
551 static void freeClient(redisClient
*c
);
552 static int rdbLoad(char *filename
);
553 static void addReply(redisClient
*c
, robj
*obj
);
554 static void addReplySds(redisClient
*c
, sds s
);
555 static void incrRefCount(robj
*o
);
556 static int rdbSaveBackground(char *filename
);
557 static robj
*createStringObject(char *ptr
, size_t len
);
558 static robj
*dupStringObject(robj
*o
);
559 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
560 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
561 static int syncWithMaster(void);
562 static robj
*tryObjectEncoding(robj
*o
);
563 static robj
*getDecodedObject(robj
*o
);
564 static int removeExpire(redisDb
*db
, robj
*key
);
565 static int expireIfNeeded(redisDb
*db
, robj
*key
);
566 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
567 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
568 static int deleteKey(redisDb
*db
, robj
*key
);
569 static time_t getExpire(redisDb
*db
, robj
*key
);
570 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
571 static void updateSlavesWaitingBgsave(int bgsaveerr
);
572 static void freeMemoryIfNeeded(void);
573 static int processCommand(redisClient
*c
);
574 static void setupSigSegvAction(void);
575 static void rdbRemoveTempFile(pid_t childpid
);
576 static void aofRemoveTempFile(pid_t childpid
);
577 static size_t stringObjectLen(robj
*o
);
578 static void processInputBuffer(redisClient
*c
);
579 static zskiplist
*zslCreate(void);
580 static void zslFree(zskiplist
*zsl
);
581 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
582 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
583 static void initClientMultiState(redisClient
*c
);
584 static void freeClientMultiState(redisClient
*c
);
585 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
586 static void unblockClientWaitingData(redisClient
*c
);
587 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
588 static void vmInit(void);
589 static void vmMarkPagesFree(off_t page
, off_t count
);
590 static robj
*vmLoadObject(robj
*key
);
591 static robj
*vmPreviewObject(robj
*key
);
592 static int vmSwapOneObjectBlocking(void);
593 static int vmSwapOneObjectThreaded(void);
594 static int vmCanSwapOut(void);
595 static int tryFreeOneObjectFromFreelist(void);
596 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
597 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
598 static void vmCancelThreadedIOJob(robj
*o
);
599 static void lockThreadedIO(void);
600 static void unlockThreadedIO(void);
601 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
602 static void freeIOJob(iojob
*j
);
603 static void queueIOJob(iojob
*j
);
604 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
605 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
606 static void waitEmptyIOJobsQueue(void);
607 static void vmReopenSwapFile(void);
608 static int vmFreePage(off_t page
);
609 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
610 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
611 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
612 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
613 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
614 static struct redisCommand
*lookupCommand(char *name
);
615 static void call(redisClient
*c
, struct redisCommand
*cmd
);
616 static void resetClient(redisClient
*c
);
617 static void convertToRealHash(robj
*o
);
618 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
619 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
620 static void freePubsubPattern(void *p
);
621 static int listMatchPubsubPattern(void *a
, void *b
);
622 static int compareStringObjects(robj
*a
, robj
*b
);
625 static void authCommand(redisClient
*c
);
626 static void pingCommand(redisClient
*c
);
627 static void echoCommand(redisClient
*c
);
628 static void setCommand(redisClient
*c
);
629 static void setnxCommand(redisClient
*c
);
630 static void getCommand(redisClient
*c
);
631 static void delCommand(redisClient
*c
);
632 static void existsCommand(redisClient
*c
);
633 static void incrCommand(redisClient
*c
);
634 static void decrCommand(redisClient
*c
);
635 static void incrbyCommand(redisClient
*c
);
636 static void decrbyCommand(redisClient
*c
);
637 static void selectCommand(redisClient
*c
);
638 static void randomkeyCommand(redisClient
*c
);
639 static void keysCommand(redisClient
*c
);
640 static void dbsizeCommand(redisClient
*c
);
641 static void lastsaveCommand(redisClient
*c
);
642 static void saveCommand(redisClient
*c
);
643 static void bgsaveCommand(redisClient
*c
);
644 static void bgrewriteaofCommand(redisClient
*c
);
645 static void shutdownCommand(redisClient
*c
);
646 static void moveCommand(redisClient
*c
);
647 static void renameCommand(redisClient
*c
);
648 static void renamenxCommand(redisClient
*c
);
649 static void lpushCommand(redisClient
*c
);
650 static void rpushCommand(redisClient
*c
);
651 static void lpopCommand(redisClient
*c
);
652 static void rpopCommand(redisClient
*c
);
653 static void llenCommand(redisClient
*c
);
654 static void lindexCommand(redisClient
*c
);
655 static void lrangeCommand(redisClient
*c
);
656 static void ltrimCommand(redisClient
*c
);
657 static void typeCommand(redisClient
*c
);
658 static void lsetCommand(redisClient
*c
);
659 static void saddCommand(redisClient
*c
);
660 static void sremCommand(redisClient
*c
);
661 static void smoveCommand(redisClient
*c
);
662 static void sismemberCommand(redisClient
*c
);
663 static void scardCommand(redisClient
*c
);
664 static void spopCommand(redisClient
*c
);
665 static void srandmemberCommand(redisClient
*c
);
666 static void sinterCommand(redisClient
*c
);
667 static void sinterstoreCommand(redisClient
*c
);
668 static void sunionCommand(redisClient
*c
);
669 static void sunionstoreCommand(redisClient
*c
);
670 static void sdiffCommand(redisClient
*c
);
671 static void sdiffstoreCommand(redisClient
*c
);
672 static void syncCommand(redisClient
*c
);
673 static void flushdbCommand(redisClient
*c
);
674 static void flushallCommand(redisClient
*c
);
675 static void sortCommand(redisClient
*c
);
676 static void lremCommand(redisClient
*c
);
677 static void rpoplpushcommand(redisClient
*c
);
678 static void infoCommand(redisClient
*c
);
679 static void mgetCommand(redisClient
*c
);
680 static void monitorCommand(redisClient
*c
);
681 static void expireCommand(redisClient
*c
);
682 static void expireatCommand(redisClient
*c
);
683 static void getsetCommand(redisClient
*c
);
684 static void ttlCommand(redisClient
*c
);
685 static void slaveofCommand(redisClient
*c
);
686 static void debugCommand(redisClient
*c
);
687 static void msetCommand(redisClient
*c
);
688 static void msetnxCommand(redisClient
*c
);
689 static void zaddCommand(redisClient
*c
);
690 static void zincrbyCommand(redisClient
*c
);
691 static void zrangeCommand(redisClient
*c
);
692 static void zrangebyscoreCommand(redisClient
*c
);
693 static void zcountCommand(redisClient
*c
);
694 static void zrevrangeCommand(redisClient
*c
);
695 static void zcardCommand(redisClient
*c
);
696 static void zremCommand(redisClient
*c
);
697 static void zscoreCommand(redisClient
*c
);
698 static void zremrangebyscoreCommand(redisClient
*c
);
699 static void multiCommand(redisClient
*c
);
700 static void execCommand(redisClient
*c
);
701 static void discardCommand(redisClient
*c
);
702 static void blpopCommand(redisClient
*c
);
703 static void brpopCommand(redisClient
*c
);
704 static void appendCommand(redisClient
*c
);
705 static void substrCommand(redisClient
*c
);
706 static void zrankCommand(redisClient
*c
);
707 static void zrevrankCommand(redisClient
*c
);
708 static void hsetCommand(redisClient
*c
);
709 static void hsetnxCommand(redisClient
*c
);
710 static void hgetCommand(redisClient
*c
);
711 static void hmsetCommand(redisClient
*c
);
712 static void hmgetCommand(redisClient
*c
);
713 static void hdelCommand(redisClient
*c
);
714 static void hlenCommand(redisClient
*c
);
715 static void zremrangebyrankCommand(redisClient
*c
);
716 static void zunionCommand(redisClient
*c
);
717 static void zinterCommand(redisClient
*c
);
718 static void hkeysCommand(redisClient
*c
);
719 static void hvalsCommand(redisClient
*c
);
720 static void hgetallCommand(redisClient
*c
);
721 static void hexistsCommand(redisClient
*c
);
722 static void configCommand(redisClient
*c
);
723 static void hincrbyCommand(redisClient
*c
);
724 static void subscribeCommand(redisClient
*c
);
725 static void unsubscribeCommand(redisClient
*c
);
726 static void psubscribeCommand(redisClient
*c
);
727 static void punsubscribeCommand(redisClient
*c
);
728 static void publishCommand(redisClient
*c
);
730 /*================================= Globals ================================= */
733 static struct redisServer server
; /* server global state */
734 static struct redisCommand cmdTable
[] = {
735 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
736 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
737 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
738 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
739 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
740 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
741 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
742 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
743 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
745 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
746 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
747 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
751 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
754 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
755 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
756 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
758 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
759 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
760 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
761 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
762 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
763 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
764 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
765 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
766 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
767 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
768 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
769 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
770 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
771 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
772 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
773 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
774 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
775 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
776 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
778 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
779 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
781 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
782 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
783 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
784 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
785 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
787 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
788 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
789 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
791 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
792 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
793 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
794 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
797 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
799 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
800 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
801 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
802 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
803 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
804 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
805 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
809 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
812 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
816 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
818 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
820 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
822 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
824 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
825 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
828 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
829 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
831 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
832 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
835 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
836 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
837 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
838 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
839 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
840 {NULL
,NULL
,0,0,NULL
,0,0,0}
843 /*============================ Utility functions ============================ */
845 /* Glob-style pattern matching. */
846 static int stringmatchlen(const char *pattern
, int patternLen
,
847 const char *string
, int stringLen
, int nocase
)
852 while (pattern
[1] == '*') {
857 return 1; /* match */
859 if (stringmatchlen(pattern
+1, patternLen
-1,
860 string
, stringLen
, nocase
))
861 return 1; /* match */
865 return 0; /* no match */
869 return 0; /* no match */
879 not = pattern
[0] == '^';
886 if (pattern
[0] == '\\') {
889 if (pattern
[0] == string
[0])
891 } else if (pattern
[0] == ']') {
893 } else if (patternLen
== 0) {
897 } else if (pattern
[1] == '-' && patternLen
>= 3) {
898 int start
= pattern
[0];
899 int end
= pattern
[2];
907 start
= tolower(start
);
913 if (c
>= start
&& c
<= end
)
917 if (pattern
[0] == string
[0])
920 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
930 return 0; /* no match */
936 if (patternLen
>= 2) {
943 if (pattern
[0] != string
[0])
944 return 0; /* no match */
946 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
947 return 0; /* no match */
955 if (stringLen
== 0) {
956 while(*pattern
== '*') {
963 if (patternLen
== 0 && stringLen
== 0)
968 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
969 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
972 /* Convert a string representing an amount of memory into the number of
973 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
976 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
978 static long long memtoll(const char *p
, int *err
) {
981 long mul
; /* unit multiplier */
986 /* Search the first non digit character. */
989 while(*u
&& isdigit(*u
)) u
++;
990 if (*u
== '\0' || !strcasecmp(u
,"b")) {
992 } else if (!strcasecmp(u
,"k")) {
994 } else if (!strcasecmp(u
,"kb")) {
996 } else if (!strcasecmp(u
,"m")) {
998 } else if (!strcasecmp(u
,"mb")) {
1000 } else if (!strcasecmp(u
,"g")) {
1001 mul
= 1000L*1000*1000;
1002 } else if (!strcasecmp(u
,"gb")) {
1003 mul
= 1024L*1024*1024;
1009 if (digits
>= sizeof(buf
)) {
1013 memcpy(buf
,p
,digits
);
1015 val
= strtoll(buf
,NULL
,10);
1019 static void redisLog(int level
, const char *fmt
, ...) {
1023 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1027 if (level
>= server
.verbosity
) {
1033 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1034 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1035 vfprintf(fp
, fmt
, ap
);
1041 if (server
.logfile
) fclose(fp
);
1044 /*====================== Hash table type implementation ==================== */
1046 /* This is an hash table type that uses the SDS dynamic strings libary as
1047 * keys and radis objects as values (objects can hold SDS strings,
1050 static void dictVanillaFree(void *privdata
, void *val
)
1052 DICT_NOTUSED(privdata
);
1056 static void dictListDestructor(void *privdata
, void *val
)
1058 DICT_NOTUSED(privdata
);
1059 listRelease((list
*)val
);
1062 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1066 DICT_NOTUSED(privdata
);
1068 l1
= sdslen((sds
)key1
);
1069 l2
= sdslen((sds
)key2
);
1070 if (l1
!= l2
) return 0;
1071 return memcmp(key1
, key2
, l1
) == 0;
1074 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1076 DICT_NOTUSED(privdata
);
1078 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1082 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1085 const robj
*o1
= key1
, *o2
= key2
;
1086 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1089 static unsigned int dictObjHash(const void *key
) {
1090 const robj
*o
= key
;
1091 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1094 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1097 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1100 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1101 o2
->encoding
== REDIS_ENCODING_INT
&&
1102 o1
->ptr
== o2
->ptr
) return 1;
1104 o1
= getDecodedObject(o1
);
1105 o2
= getDecodedObject(o2
);
1106 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1112 static unsigned int dictEncObjHash(const void *key
) {
1113 robj
*o
= (robj
*) key
;
1115 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1116 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1118 if (o
->encoding
== REDIS_ENCODING_INT
) {
1122 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1123 return dictGenHashFunction((unsigned char*)buf
, len
);
1127 o
= getDecodedObject(o
);
1128 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1135 /* Sets type and expires */
1136 static dictType setDictType
= {
1137 dictEncObjHash
, /* hash function */
1140 dictEncObjKeyCompare
, /* key compare */
1141 dictRedisObjectDestructor
, /* key destructor */
1142 NULL
/* val destructor */
1145 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1146 static dictType zsetDictType
= {
1147 dictEncObjHash
, /* hash function */
1150 dictEncObjKeyCompare
, /* key compare */
1151 dictRedisObjectDestructor
, /* key destructor */
1152 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1156 static dictType dbDictType
= {
1157 dictObjHash
, /* hash function */
1160 dictObjKeyCompare
, /* key compare */
1161 dictRedisObjectDestructor
, /* key destructor */
1162 dictRedisObjectDestructor
/* val destructor */
1166 static dictType keyptrDictType
= {
1167 dictObjHash
, /* hash function */
1170 dictObjKeyCompare
, /* key compare */
1171 dictRedisObjectDestructor
, /* key destructor */
1172 NULL
/* val destructor */
1175 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1176 static dictType hashDictType
= {
1177 dictEncObjHash
, /* hash function */
1180 dictEncObjKeyCompare
, /* key compare */
1181 dictRedisObjectDestructor
, /* key destructor */
1182 dictRedisObjectDestructor
/* val destructor */
1185 /* Keylist hash table type has unencoded redis objects as keys and
1186 * lists as values. It's used for blocking operations (BLPOP) and to
1187 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1188 static dictType keylistDictType
= {
1189 dictObjHash
, /* hash function */
1192 dictObjKeyCompare
, /* key compare */
1193 dictRedisObjectDestructor
, /* key destructor */
1194 dictListDestructor
/* val destructor */
1197 static void version();
1199 /* ========================= Random utility functions ======================= */
1201 /* Redis generally does not try to recover from out of memory conditions
1202 * when allocating objects or strings, it is not clear if it will be possible
1203 * to report this condition to the client since the networking layer itself
1204 * is based on heap allocation for send buffers, so we simply abort.
1205 * At least the code will be simpler to read... */
1206 static void oom(const char *msg
) {
1207 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1212 /* ====================== Redis server networking stuff ===================== */
1213 static void closeTimedoutClients(void) {
1216 time_t now
= time(NULL
);
1219 listRewind(server
.clients
,&li
);
1220 while ((ln
= listNext(&li
)) != NULL
) {
1221 c
= listNodeValue(ln
);
1222 if (server
.maxidletime
&&
1223 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1224 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1225 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1226 listLength(c
->pubsub_patterns
) == 0 &&
1227 (now
- c
->lastinteraction
> server
.maxidletime
))
1229 redisLog(REDIS_VERBOSE
,"Closing idle client");
1231 } else if (c
->flags
& REDIS_BLOCKED
) {
1232 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1233 addReply(c
,shared
.nullmultibulk
);
1234 unblockClientWaitingData(c
);
1240 static int htNeedsResize(dict
*dict
) {
1241 long long size
, used
;
1243 size
= dictSlots(dict
);
1244 used
= dictSize(dict
);
1245 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1246 (used
*100/size
< REDIS_HT_MINFILL
));
1249 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1250 * we resize the hash table to save memory */
1251 static void tryResizeHashTables(void) {
1254 for (j
= 0; j
< server
.dbnum
; j
++) {
1255 if (htNeedsResize(server
.db
[j
].dict
))
1256 dictResize(server
.db
[j
].dict
);
1257 if (htNeedsResize(server
.db
[j
].expires
))
1258 dictResize(server
.db
[j
].expires
);
1262 /* Our hash table implementation performs rehashing incrementally while
1263 * we write/read from the hash table. Still if the server is idle, the hash
1264 * table will use two tables for a long time. So we try to use 1 millisecond
1265 * of CPU time at every serverCron() loop in order to rehash some key. */
1266 static void incrementallyRehash(void) {
1269 for (j
= 0; j
< server
.dbnum
; j
++) {
1270 if (dictIsRehashing(server
.db
[j
].dict
)) {
1271 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1272 break; /* already used our millisecond for this loop... */
1277 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1278 void backgroundSaveDoneHandler(int statloc
) {
1279 int exitcode
= WEXITSTATUS(statloc
);
1280 int bysignal
= WIFSIGNALED(statloc
);
1282 if (!bysignal
&& exitcode
== 0) {
1283 redisLog(REDIS_NOTICE
,
1284 "Background saving terminated with success");
1286 server
.lastsave
= time(NULL
);
1287 } else if (!bysignal
&& exitcode
!= 0) {
1288 redisLog(REDIS_WARNING
, "Background saving error");
1290 redisLog(REDIS_WARNING
,
1291 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1292 rdbRemoveTempFile(server
.bgsavechildpid
);
1294 server
.bgsavechildpid
= -1;
1295 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1296 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1297 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1300 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1302 void backgroundRewriteDoneHandler(int statloc
) {
1303 int exitcode
= WEXITSTATUS(statloc
);
1304 int bysignal
= WIFSIGNALED(statloc
);
1306 if (!bysignal
&& exitcode
== 0) {
1310 redisLog(REDIS_NOTICE
,
1311 "Background append only file rewriting terminated with success");
1312 /* Now it's time to flush the differences accumulated by the parent */
1313 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1314 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1316 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1319 /* Flush our data... */
1320 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1321 (signed) sdslen(server
.bgrewritebuf
)) {
1322 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1326 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1327 /* Now our work is to rename the temp file into the stable file. And
1328 * switch the file descriptor used by the server for append only. */
1329 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1330 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1334 /* Mission completed... almost */
1335 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1336 if (server
.appendfd
!= -1) {
1337 /* If append only is actually enabled... */
1338 close(server
.appendfd
);
1339 server
.appendfd
= fd
;
1341 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1342 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1344 /* If append only is disabled we just generate a dump in this
1345 * format. Why not? */
1348 } else if (!bysignal
&& exitcode
!= 0) {
1349 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1351 redisLog(REDIS_WARNING
,
1352 "Background append only file rewriting terminated by signal %d",
1356 sdsfree(server
.bgrewritebuf
);
1357 server
.bgrewritebuf
= sdsempty();
1358 aofRemoveTempFile(server
.bgrewritechildpid
);
1359 server
.bgrewritechildpid
= -1;
1362 /* This function is called once a background process of some kind terminates,
1363 * as we want to avoid resizing the hash tables when there is a child in order
1364 * to play well with copy-on-write (otherwise when a resize happens lots of
1365 * memory pages are copied). The goal of this function is to update the ability
1366 * for dict.c to resize the hash tables accordingly to the fact we have o not
1367 * running childs. */
1368 static void updateDictResizePolicy(void) {
1369 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1372 dictDisableResize();
1375 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1376 int j
, loops
= server
.cronloops
++;
1377 REDIS_NOTUSED(eventLoop
);
1379 REDIS_NOTUSED(clientData
);
1381 /* We take a cached value of the unix time in the global state because
1382 * with virtual memory and aging there is to store the current time
1383 * in objects at every object access, and accuracy is not needed.
1384 * To access a global var is faster than calling time(NULL) */
1385 server
.unixtime
= time(NULL
);
1387 /* Show some info about non-empty databases */
1388 for (j
= 0; j
< server
.dbnum
; j
++) {
1389 long long size
, used
, vkeys
;
1391 size
= dictSlots(server
.db
[j
].dict
);
1392 used
= dictSize(server
.db
[j
].dict
);
1393 vkeys
= dictSize(server
.db
[j
].expires
);
1394 if (!(loops
% 50) && (used
|| vkeys
)) {
1395 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1396 /* dictPrintStats(server.dict); */
1400 /* We don't want to resize the hash tables while a bacground saving
1401 * is in progress: the saving child is created using fork() that is
1402 * implemented with a copy-on-write semantic in most modern systems, so
1403 * if we resize the HT while there is the saving child at work actually
1404 * a lot of memory movements in the parent will cause a lot of pages
1406 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1407 if (!(loops
% 10)) tryResizeHashTables();
1408 if (server
.activerehashing
) incrementallyRehash();
1411 /* Show information about connected clients */
1412 if (!(loops
% 50)) {
1413 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1414 listLength(server
.clients
)-listLength(server
.slaves
),
1415 listLength(server
.slaves
),
1416 zmalloc_used_memory());
1419 /* Close connections of timedout clients */
1420 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1421 closeTimedoutClients();
1423 /* Check if a background saving or AOF rewrite in progress terminated */
1424 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1428 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1429 if (pid
== server
.bgsavechildpid
) {
1430 backgroundSaveDoneHandler(statloc
);
1432 backgroundRewriteDoneHandler(statloc
);
1434 updateDictResizePolicy();
1437 /* If there is not a background saving in progress check if
1438 * we have to save now */
1439 time_t now
= time(NULL
);
1440 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1441 struct saveparam
*sp
= server
.saveparams
+j
;
1443 if (server
.dirty
>= sp
->changes
&&
1444 now
-server
.lastsave
> sp
->seconds
) {
1445 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1446 sp
->changes
, sp
->seconds
);
1447 rdbSaveBackground(server
.dbfilename
);
1453 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1454 * will use few CPU cycles if there are few expiring keys, otherwise
1455 * it will get more aggressive to avoid that too much memory is used by
1456 * keys that can be removed from the keyspace. */
1457 for (j
= 0; j
< server
.dbnum
; j
++) {
1459 redisDb
*db
= server
.db
+j
;
1461 /* Continue to expire if at the end of the cycle more than 25%
1462 * of the keys were expired. */
1464 long num
= dictSize(db
->expires
);
1465 time_t now
= time(NULL
);
1468 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1469 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1474 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1475 t
= (time_t) dictGetEntryVal(de
);
1477 deleteKey(db
,dictGetEntryKey(de
));
1479 server
.stat_expiredkeys
++;
1482 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1485 /* Swap a few keys on disk if we are over the memory limit and VM
1486 * is enbled. Try to free objects from the free list first. */
1487 if (vmCanSwapOut()) {
1488 while (server
.vm_enabled
&& zmalloc_used_memory() >
1489 server
.vm_max_memory
)
1493 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1494 retval
= (server
.vm_max_threads
== 0) ?
1495 vmSwapOneObjectBlocking() :
1496 vmSwapOneObjectThreaded();
1497 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1498 zmalloc_used_memory() >
1499 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1501 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1503 /* Note that when using threade I/O we free just one object,
1504 * because anyway when the I/O thread in charge to swap this
1505 * object out will finish, the handler of completed jobs
1506 * will try to swap more objects if we are still out of memory. */
1507 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1511 /* Check if we should connect to a MASTER */
1512 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1513 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1514 if (syncWithMaster() == REDIS_OK
) {
1515 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1521 /* This function gets called every time Redis is entering the
1522 * main loop of the event driven library, that is, before to sleep
1523 * for ready file descriptors. */
1524 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1525 REDIS_NOTUSED(eventLoop
);
1527 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1531 listRewind(server
.io_ready_clients
,&li
);
1532 while((ln
= listNext(&li
))) {
1533 redisClient
*c
= ln
->value
;
1534 struct redisCommand
*cmd
;
1536 /* Resume the client. */
1537 listDelNode(server
.io_ready_clients
,ln
);
1538 c
->flags
&= (~REDIS_IO_WAIT
);
1539 server
.vm_blocked_clients
--;
1540 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1541 readQueryFromClient
, c
);
1542 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1543 assert(cmd
!= NULL
);
1546 /* There may be more data to process in the input buffer. */
1547 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1548 processInputBuffer(c
);
1553 static void createSharedObjects(void) {
1556 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1557 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1558 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1559 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1560 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1561 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1562 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1563 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1564 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1565 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1566 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1567 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1568 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1569 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1570 "-ERR no such key\r\n"));
1571 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1572 "-ERR syntax error\r\n"));
1573 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1574 "-ERR source and destination objects are the same\r\n"));
1575 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1576 "-ERR index out of range\r\n"));
1577 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1578 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1579 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1580 shared
.select0
= createStringObject("select 0\r\n",10);
1581 shared
.select1
= createStringObject("select 1\r\n",10);
1582 shared
.select2
= createStringObject("select 2\r\n",10);
1583 shared
.select3
= createStringObject("select 3\r\n",10);
1584 shared
.select4
= createStringObject("select 4\r\n",10);
1585 shared
.select5
= createStringObject("select 5\r\n",10);
1586 shared
.select6
= createStringObject("select 6\r\n",10);
1587 shared
.select7
= createStringObject("select 7\r\n",10);
1588 shared
.select8
= createStringObject("select 8\r\n",10);
1589 shared
.select9
= createStringObject("select 9\r\n",10);
1590 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1591 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1592 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1593 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1594 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1595 shared
.mbulk3
= createStringObject("*3\r\n",4);
1596 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1597 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1598 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1602 static void appendServerSaveParams(time_t seconds
, int changes
) {
1603 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1604 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1605 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1606 server
.saveparamslen
++;
1609 static void resetServerSaveParams() {
1610 zfree(server
.saveparams
);
1611 server
.saveparams
= NULL
;
1612 server
.saveparamslen
= 0;
1615 static void initServerConfig() {
1616 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1617 server
.port
= REDIS_SERVERPORT
;
1618 server
.verbosity
= REDIS_VERBOSE
;
1619 server
.maxidletime
= REDIS_MAXIDLETIME
;
1620 server
.saveparams
= NULL
;
1621 server
.logfile
= NULL
; /* NULL = log on standard output */
1622 server
.bindaddr
= NULL
;
1623 server
.glueoutputbuf
= 1;
1624 server
.daemonize
= 0;
1625 server
.appendonly
= 0;
1626 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1627 server
.lastfsync
= time(NULL
);
1628 server
.appendfd
= -1;
1629 server
.appendseldb
= -1; /* Make sure the first time will not match */
1630 server
.pidfile
= zstrdup("/var/run/redis.pid");
1631 server
.dbfilename
= zstrdup("dump.rdb");
1632 server
.appendfilename
= zstrdup("appendonly.aof");
1633 server
.requirepass
= NULL
;
1634 server
.rdbcompression
= 1;
1635 server
.activerehashing
= 1;
1636 server
.maxclients
= 0;
1637 server
.blpop_blocked_clients
= 0;
1638 server
.maxmemory
= 0;
1639 server
.vm_enabled
= 0;
1640 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1641 server
.vm_page_size
= 256; /* 256 bytes per page */
1642 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1643 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1644 server
.vm_max_threads
= 4;
1645 server
.vm_blocked_clients
= 0;
1646 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1647 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1649 resetServerSaveParams();
1651 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1652 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1653 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1654 /* Replication related */
1656 server
.masterauth
= NULL
;
1657 server
.masterhost
= NULL
;
1658 server
.masterport
= 6379;
1659 server
.master
= NULL
;
1660 server
.replstate
= REDIS_REPL_NONE
;
1662 /* Double constants initialization */
1664 R_PosInf
= 1.0/R_Zero
;
1665 R_NegInf
= -1.0/R_Zero
;
1666 R_Nan
= R_Zero
/R_Zero
;
1669 static void initServer() {
1672 signal(SIGHUP
, SIG_IGN
);
1673 signal(SIGPIPE
, SIG_IGN
);
1674 setupSigSegvAction();
1676 server
.devnull
= fopen("/dev/null","w");
1677 if (server
.devnull
== NULL
) {
1678 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1681 server
.clients
= listCreate();
1682 server
.slaves
= listCreate();
1683 server
.monitors
= listCreate();
1684 server
.objfreelist
= listCreate();
1685 createSharedObjects();
1686 server
.el
= aeCreateEventLoop();
1687 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1688 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1689 if (server
.fd
== -1) {
1690 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1693 for (j
= 0; j
< server
.dbnum
; j
++) {
1694 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1695 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1696 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1697 if (server
.vm_enabled
)
1698 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1699 server
.db
[j
].id
= j
;
1701 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1702 server
.pubsub_patterns
= listCreate();
1703 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1704 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1705 server
.cronloops
= 0;
1706 server
.bgsavechildpid
= -1;
1707 server
.bgrewritechildpid
= -1;
1708 server
.bgrewritebuf
= sdsempty();
1709 server
.lastsave
= time(NULL
);
1711 server
.stat_numcommands
= 0;
1712 server
.stat_numconnections
= 0;
1713 server
.stat_expiredkeys
= 0;
1714 server
.stat_starttime
= time(NULL
);
1715 server
.unixtime
= time(NULL
);
1716 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1717 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1718 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1720 if (server
.appendonly
) {
1721 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1722 if (server
.appendfd
== -1) {
1723 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1729 if (server
.vm_enabled
) vmInit();
1732 /* Empty the whole database */
1733 static long long emptyDb() {
1735 long long removed
= 0;
1737 for (j
= 0; j
< server
.dbnum
; j
++) {
1738 removed
+= dictSize(server
.db
[j
].dict
);
1739 dictEmpty(server
.db
[j
].dict
);
1740 dictEmpty(server
.db
[j
].expires
);
1745 static int yesnotoi(char *s
) {
1746 if (!strcasecmp(s
,"yes")) return 1;
1747 else if (!strcasecmp(s
,"no")) return 0;
1751 /* I agree, this is a very rudimental way to load a configuration...
1752 will improve later if the config gets more complex */
1753 static void loadServerConfig(char *filename
) {
1755 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1759 if (filename
[0] == '-' && filename
[1] == '\0')
1762 if ((fp
= fopen(filename
,"r")) == NULL
) {
1763 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1768 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1774 line
= sdstrim(line
," \t\r\n");
1776 /* Skip comments and blank lines*/
1777 if (line
[0] == '#' || line
[0] == '\0') {
1782 /* Split into arguments */
1783 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1784 sdstolower(argv
[0]);
1786 /* Execute config directives */
1787 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1788 server
.maxidletime
= atoi(argv
[1]);
1789 if (server
.maxidletime
< 0) {
1790 err
= "Invalid timeout value"; goto loaderr
;
1792 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1793 server
.port
= atoi(argv
[1]);
1794 if (server
.port
< 1 || server
.port
> 65535) {
1795 err
= "Invalid port"; goto loaderr
;
1797 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1798 server
.bindaddr
= zstrdup(argv
[1]);
1799 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1800 int seconds
= atoi(argv
[1]);
1801 int changes
= atoi(argv
[2]);
1802 if (seconds
< 1 || changes
< 0) {
1803 err
= "Invalid save parameters"; goto loaderr
;
1805 appendServerSaveParams(seconds
,changes
);
1806 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1807 if (chdir(argv
[1]) == -1) {
1808 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1809 argv
[1], strerror(errno
));
1812 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1813 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1814 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1815 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1816 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1818 err
= "Invalid log level. Must be one of debug, notice, warning";
1821 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1824 server
.logfile
= zstrdup(argv
[1]);
1825 if (!strcasecmp(server
.logfile
,"stdout")) {
1826 zfree(server
.logfile
);
1827 server
.logfile
= NULL
;
1829 if (server
.logfile
) {
1830 /* Test if we are able to open the file. The server will not
1831 * be able to abort just for this problem later... */
1832 logfp
= fopen(server
.logfile
,"a");
1833 if (logfp
== NULL
) {
1834 err
= sdscatprintf(sdsempty(),
1835 "Can't open the log file: %s", strerror(errno
));
1840 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1841 server
.dbnum
= atoi(argv
[1]);
1842 if (server
.dbnum
< 1) {
1843 err
= "Invalid number of databases"; goto loaderr
;
1845 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1846 loadServerConfig(argv
[1]);
1847 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1848 server
.maxclients
= atoi(argv
[1]);
1849 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1850 server
.maxmemory
= memtoll(argv
[1],NULL
);
1851 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1852 server
.masterhost
= sdsnew(argv
[1]);
1853 server
.masterport
= atoi(argv
[2]);
1854 server
.replstate
= REDIS_REPL_CONNECT
;
1855 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1856 server
.masterauth
= zstrdup(argv
[1]);
1857 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1858 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1859 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1861 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1862 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1863 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1865 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1866 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1867 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1869 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1870 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1871 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1873 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1874 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1875 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1877 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1878 if (!strcasecmp(argv
[1],"no")) {
1879 server
.appendfsync
= APPENDFSYNC_NO
;
1880 } else if (!strcasecmp(argv
[1],"always")) {
1881 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1882 } else if (!strcasecmp(argv
[1],"everysec")) {
1883 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1885 err
= "argument must be 'no', 'always' or 'everysec'";
1888 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1889 server
.requirepass
= zstrdup(argv
[1]);
1890 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1891 zfree(server
.pidfile
);
1892 server
.pidfile
= zstrdup(argv
[1]);
1893 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1894 zfree(server
.dbfilename
);
1895 server
.dbfilename
= zstrdup(argv
[1]);
1896 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1897 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1898 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1900 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1901 zfree(server
.vm_swap_file
);
1902 server
.vm_swap_file
= zstrdup(argv
[1]);
1903 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1904 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
1905 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1906 server
.vm_page_size
= memtoll(argv
[1], NULL
);
1907 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1908 server
.vm_pages
= memtoll(argv
[1], NULL
);
1909 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1910 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1911 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1912 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
1913 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1914 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
1916 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1918 for (j
= 0; j
< argc
; j
++)
1923 if (fp
!= stdin
) fclose(fp
);
1927 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1928 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1929 fprintf(stderr
, ">>> '%s'\n", line
);
1930 fprintf(stderr
, "%s\n", err
);
1934 static void freeClientArgv(redisClient
*c
) {
1937 for (j
= 0; j
< c
->argc
; j
++)
1938 decrRefCount(c
->argv
[j
]);
1939 for (j
= 0; j
< c
->mbargc
; j
++)
1940 decrRefCount(c
->mbargv
[j
]);
1945 static void freeClient(redisClient
*c
) {
1948 /* Note that if the client we are freeing is blocked into a blocking
1949 * call, we have to set querybuf to NULL *before* to call
1950 * unblockClientWaitingData() to avoid processInputBuffer() will get
1951 * called. Also it is important to remove the file events after
1952 * this, because this call adds the READABLE event. */
1953 sdsfree(c
->querybuf
);
1955 if (c
->flags
& REDIS_BLOCKED
)
1956 unblockClientWaitingData(c
);
1958 /* Unsubscribe from all the pubsub channels */
1959 pubsubUnsubscribeAllChannels(c
,0);
1960 pubsubUnsubscribeAllPatterns(c
,0);
1961 dictRelease(c
->pubsub_channels
);
1962 listRelease(c
->pubsub_patterns
);
1963 /* Obvious cleanup */
1964 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1965 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1966 listRelease(c
->reply
);
1969 /* Remove from the list of clients */
1970 ln
= listSearchKey(server
.clients
,c
);
1971 redisAssert(ln
!= NULL
);
1972 listDelNode(server
.clients
,ln
);
1973 /* Remove from the list of clients waiting for swapped keys */
1974 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1975 ln
= listSearchKey(server
.io_ready_clients
,c
);
1977 listDelNode(server
.io_ready_clients
,ln
);
1978 server
.vm_blocked_clients
--;
1981 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1982 ln
= listFirst(c
->io_keys
);
1983 dontWaitForSwappedKey(c
,ln
->value
);
1985 listRelease(c
->io_keys
);
1986 /* Master/slave cleanup */
1987 if (c
->flags
& REDIS_SLAVE
) {
1988 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1990 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1991 ln
= listSearchKey(l
,c
);
1992 redisAssert(ln
!= NULL
);
1995 if (c
->flags
& REDIS_MASTER
) {
1996 server
.master
= NULL
;
1997 server
.replstate
= REDIS_REPL_CONNECT
;
1999 /* Release memory */
2002 freeClientMultiState(c
);
2006 #define GLUEREPLY_UP_TO (1024)
2007 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2009 char buf
[GLUEREPLY_UP_TO
];
2014 listRewind(c
->reply
,&li
);
2015 while((ln
= listNext(&li
))) {
2019 objlen
= sdslen(o
->ptr
);
2020 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2021 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2023 listDelNode(c
->reply
,ln
);
2025 if (copylen
== 0) return;
2029 /* Now the output buffer is empty, add the new single element */
2030 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2031 listAddNodeHead(c
->reply
,o
);
2034 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2035 redisClient
*c
= privdata
;
2036 int nwritten
= 0, totwritten
= 0, objlen
;
2039 REDIS_NOTUSED(mask
);
2041 /* Use writev() if we have enough buffers to send */
2042 if (!server
.glueoutputbuf
&&
2043 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2044 !(c
->flags
& REDIS_MASTER
))
2046 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2050 while(listLength(c
->reply
)) {
2051 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2052 glueReplyBuffersIfNeeded(c
);
2054 o
= listNodeValue(listFirst(c
->reply
));
2055 objlen
= sdslen(o
->ptr
);
2058 listDelNode(c
->reply
,listFirst(c
->reply
));
2062 if (c
->flags
& REDIS_MASTER
) {
2063 /* Don't reply to a master */
2064 nwritten
= objlen
- c
->sentlen
;
2066 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2067 if (nwritten
<= 0) break;
2069 c
->sentlen
+= nwritten
;
2070 totwritten
+= nwritten
;
2071 /* If we fully sent the object on head go to the next one */
2072 if (c
->sentlen
== objlen
) {
2073 listDelNode(c
->reply
,listFirst(c
->reply
));
2076 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2077 * bytes, in a single threaded server it's a good idea to serve
2078 * other clients as well, even if a very large request comes from
2079 * super fast link that is always able to accept data (in real world
2080 * scenario think about 'KEYS *' against the loopback interfae) */
2081 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2083 if (nwritten
== -1) {
2084 if (errno
== EAGAIN
) {
2087 redisLog(REDIS_VERBOSE
,
2088 "Error writing to client: %s", strerror(errno
));
2093 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2094 if (listLength(c
->reply
) == 0) {
2096 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2100 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2102 redisClient
*c
= privdata
;
2103 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2105 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2106 int offset
, ion
= 0;
2108 REDIS_NOTUSED(mask
);
2111 while (listLength(c
->reply
)) {
2112 offset
= c
->sentlen
;
2116 /* fill-in the iov[] array */
2117 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2118 o
= listNodeValue(node
);
2119 objlen
= sdslen(o
->ptr
);
2121 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2124 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2125 break; /* no more iovecs */
2127 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2128 iov
[ion
].iov_len
= objlen
- offset
;
2129 willwrite
+= objlen
- offset
;
2130 offset
= 0; /* just for the first item */
2137 /* write all collected blocks at once */
2138 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2139 if (errno
!= EAGAIN
) {
2140 redisLog(REDIS_VERBOSE
,
2141 "Error writing to client: %s", strerror(errno
));
2148 totwritten
+= nwritten
;
2149 offset
= c
->sentlen
;
2151 /* remove written robjs from c->reply */
2152 while (nwritten
&& listLength(c
->reply
)) {
2153 o
= listNodeValue(listFirst(c
->reply
));
2154 objlen
= sdslen(o
->ptr
);
2156 if(nwritten
>= objlen
- offset
) {
2157 listDelNode(c
->reply
, listFirst(c
->reply
));
2158 nwritten
-= objlen
- offset
;
2162 c
->sentlen
+= nwritten
;
2170 c
->lastinteraction
= time(NULL
);
2172 if (listLength(c
->reply
) == 0) {
2174 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2178 static struct redisCommand
*lookupCommand(char *name
) {
2180 while(cmdTable
[j
].name
!= NULL
) {
2181 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2187 /* resetClient prepare the client to process the next command */
2188 static void resetClient(redisClient
*c
) {
2194 /* Call() is the core of Redis execution of a command */
2195 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2198 dirty
= server
.dirty
;
2200 dirty
= server
.dirty
-dirty
;
2202 if (server
.appendonly
&& dirty
)
2203 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2204 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2205 listLength(server
.slaves
))
2206 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2207 if (listLength(server
.monitors
))
2208 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2209 server
.stat_numcommands
++;
2212 /* If this function gets called we already read a whole
2213 * command, argments are in the client argv/argc fields.
2214 * processCommand() execute the command or prepare the
2215 * server for a bulk read from the client.
2217 * If 1 is returned the client is still alive and valid and
2218 * and other operations can be performed by the caller. Otherwise
2219 * if 0 is returned the client was destroied (i.e. after QUIT). */
2220 static int processCommand(redisClient
*c
) {
2221 struct redisCommand
*cmd
;
2223 /* Free some memory if needed (maxmemory setting) */
2224 if (server
.maxmemory
) freeMemoryIfNeeded();
2226 /* Handle the multi bulk command type. This is an alternative protocol
2227 * supported by Redis in order to receive commands that are composed of
2228 * multiple binary-safe "bulk" arguments. The latency of processing is
2229 * a bit higher but this allows things like multi-sets, so if this
2230 * protocol is used only for MSET and similar commands this is a big win. */
2231 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2232 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2233 if (c
->multibulk
<= 0) {
2237 decrRefCount(c
->argv
[c
->argc
-1]);
2241 } else if (c
->multibulk
) {
2242 if (c
->bulklen
== -1) {
2243 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2244 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2248 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2249 decrRefCount(c
->argv
[0]);
2250 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2252 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2257 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2261 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2262 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2266 if (c
->multibulk
== 0) {
2270 /* Here we need to swap the multi-bulk argc/argv with the
2271 * normal argc/argv of the client structure. */
2273 c
->argv
= c
->mbargv
;
2274 c
->mbargv
= auxargv
;
2277 c
->argc
= c
->mbargc
;
2278 c
->mbargc
= auxargc
;
2280 /* We need to set bulklen to something different than -1
2281 * in order for the code below to process the command without
2282 * to try to read the last argument of a bulk command as
2283 * a special argument. */
2285 /* continue below and process the command */
2292 /* -- end of multi bulk commands processing -- */
2294 /* The QUIT command is handled as a special case. Normal command
2295 * procs are unable to close the client connection safely */
2296 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2301 /* Now lookup the command and check ASAP about trivial error conditions
2302 * such wrong arity, bad command name and so forth. */
2303 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2306 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2307 (char*)c
->argv
[0]->ptr
));
2310 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2311 (c
->argc
< -cmd
->arity
)) {
2313 sdscatprintf(sdsempty(),
2314 "-ERR wrong number of arguments for '%s' command\r\n",
2318 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2319 /* This is a bulk command, we have to read the last argument yet. */
2320 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2322 decrRefCount(c
->argv
[c
->argc
-1]);
2323 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2325 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2330 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2331 /* It is possible that the bulk read is already in the
2332 * buffer. Check this condition and handle it accordingly.
2333 * This is just a fast path, alternative to call processInputBuffer().
2334 * It's a good idea since the code is small and this condition
2335 * happens most of the times. */
2336 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2337 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2339 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2341 /* Otherwise return... there is to read the last argument
2342 * from the socket. */
2346 /* Let's try to encode the bulk object to save space. */
2347 if (cmd
->flags
& REDIS_CMD_BULK
)
2348 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2350 /* Check if the user is authenticated */
2351 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2352 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2357 /* Handle the maxmemory directive */
2358 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2359 zmalloc_used_memory() > server
.maxmemory
)
2361 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2366 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2367 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2369 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2370 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2371 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2376 /* Exec the command */
2377 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2378 queueMultiCommand(c
,cmd
);
2379 addReply(c
,shared
.queued
);
2381 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2382 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2386 /* Prepare the client for the next command */
2391 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2396 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2397 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2398 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2399 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2402 if (argc
<= REDIS_STATIC_ARGS
) {
2405 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2408 lenobj
= createObject(REDIS_STRING
,
2409 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2410 lenobj
->refcount
= 0;
2411 outv
[outc
++] = lenobj
;
2412 for (j
= 0; j
< argc
; j
++) {
2413 lenobj
= createObject(REDIS_STRING
,
2414 sdscatprintf(sdsempty(),"$%lu\r\n",
2415 (unsigned long) stringObjectLen(argv
[j
])));
2416 lenobj
->refcount
= 0;
2417 outv
[outc
++] = lenobj
;
2418 outv
[outc
++] = argv
[j
];
2419 outv
[outc
++] = shared
.crlf
;
2422 /* Increment all the refcounts at start and decrement at end in order to
2423 * be sure to free objects if there is no slave in a replication state
2424 * able to be feed with commands */
2425 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2426 listRewind(slaves
,&li
);
2427 while((ln
= listNext(&li
))) {
2428 redisClient
*slave
= ln
->value
;
2430 /* Don't feed slaves that are still waiting for BGSAVE to start */
2431 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2433 /* Feed all the other slaves, MONITORs and so on */
2434 if (slave
->slaveseldb
!= dictid
) {
2438 case 0: selectcmd
= shared
.select0
; break;
2439 case 1: selectcmd
= shared
.select1
; break;
2440 case 2: selectcmd
= shared
.select2
; break;
2441 case 3: selectcmd
= shared
.select3
; break;
2442 case 4: selectcmd
= shared
.select4
; break;
2443 case 5: selectcmd
= shared
.select5
; break;
2444 case 6: selectcmd
= shared
.select6
; break;
2445 case 7: selectcmd
= shared
.select7
; break;
2446 case 8: selectcmd
= shared
.select8
; break;
2447 case 9: selectcmd
= shared
.select9
; break;
2449 selectcmd
= createObject(REDIS_STRING
,
2450 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2451 selectcmd
->refcount
= 0;
2454 addReply(slave
,selectcmd
);
2455 slave
->slaveseldb
= dictid
;
2457 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2459 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2460 if (outv
!= static_outv
) zfree(outv
);
2463 static void processInputBuffer(redisClient
*c
) {
2465 /* Before to process the input buffer, make sure the client is not
2466 * waitig for a blocking operation such as BLPOP. Note that the first
2467 * iteration the client is never blocked, otherwise the processInputBuffer
2468 * would not be called at all, but after the execution of the first commands
2469 * in the input buffer the client may be blocked, and the "goto again"
2470 * will try to reiterate. The following line will make it return asap. */
2471 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2472 if (c
->bulklen
== -1) {
2473 /* Read the first line of the query */
2474 char *p
= strchr(c
->querybuf
,'\n');
2481 query
= c
->querybuf
;
2482 c
->querybuf
= sdsempty();
2483 querylen
= 1+(p
-(query
));
2484 if (sdslen(query
) > querylen
) {
2485 /* leave data after the first line of the query in the buffer */
2486 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2488 *p
= '\0'; /* remove "\n" */
2489 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2490 sdsupdatelen(query
);
2492 /* Now we can split the query in arguments */
2493 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2496 if (c
->argv
) zfree(c
->argv
);
2497 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2499 for (j
= 0; j
< argc
; j
++) {
2500 if (sdslen(argv
[j
])) {
2501 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2509 /* Execute the command. If the client is still valid
2510 * after processCommand() return and there is something
2511 * on the query buffer try to process the next command. */
2512 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2514 /* Nothing to process, argc == 0. Just process the query
2515 * buffer if it's not empty or return to the caller */
2516 if (sdslen(c
->querybuf
)) goto again
;
2519 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2520 redisLog(REDIS_VERBOSE
, "Client protocol error");
2525 /* Bulk read handling. Note that if we are at this point
2526 the client already sent a command terminated with a newline,
2527 we are reading the bulk data that is actually the last
2528 argument of the command. */
2529 int qbl
= sdslen(c
->querybuf
);
2531 if (c
->bulklen
<= qbl
) {
2532 /* Copy everything but the final CRLF as final argument */
2533 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2535 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2536 /* Process the command. If the client is still valid after
2537 * the processing and there is more data in the buffer
2538 * try to parse it. */
2539 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2545 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2546 redisClient
*c
= (redisClient
*) privdata
;
2547 char buf
[REDIS_IOBUF_LEN
];
2550 REDIS_NOTUSED(mask
);
2552 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2554 if (errno
== EAGAIN
) {
2557 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2561 } else if (nread
== 0) {
2562 redisLog(REDIS_VERBOSE
, "Client closed connection");
2567 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2568 c
->lastinteraction
= time(NULL
);
2572 processInputBuffer(c
);
2575 static int selectDb(redisClient
*c
, int id
) {
2576 if (id
< 0 || id
>= server
.dbnum
)
2578 c
->db
= &server
.db
[id
];
2582 static void *dupClientReplyValue(void *o
) {
2583 incrRefCount((robj
*)o
);
2587 static int listMatchObjects(void *a
, void *b
) {
2588 return compareStringObjects(a
,b
) == 0;
2591 static redisClient
*createClient(int fd
) {
2592 redisClient
*c
= zmalloc(sizeof(*c
));
2594 anetNonBlock(NULL
,fd
);
2595 anetTcpNoDelay(NULL
,fd
);
2596 if (!c
) return NULL
;
2599 c
->querybuf
= sdsempty();
2608 c
->lastinteraction
= time(NULL
);
2609 c
->authenticated
= 0;
2610 c
->replstate
= REDIS_REPL_NONE
;
2611 c
->reply
= listCreate();
2612 listSetFreeMethod(c
->reply
,decrRefCount
);
2613 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2614 c
->blockingkeys
= NULL
;
2615 c
->blockingkeysnum
= 0;
2616 c
->io_keys
= listCreate();
2617 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2618 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2619 c
->pubsub_patterns
= listCreate();
2620 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2621 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2622 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2623 readQueryFromClient
, c
) == AE_ERR
) {
2627 listAddNodeTail(server
.clients
,c
);
2628 initClientMultiState(c
);
2632 static void addReply(redisClient
*c
, robj
*obj
) {
2633 if (listLength(c
->reply
) == 0 &&
2634 (c
->replstate
== REDIS_REPL_NONE
||
2635 c
->replstate
== REDIS_REPL_ONLINE
) &&
2636 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2637 sendReplyToClient
, c
) == AE_ERR
) return;
2639 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2640 obj
= dupStringObject(obj
);
2641 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2643 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2646 static void addReplySds(redisClient
*c
, sds s
) {
2647 robj
*o
= createObject(REDIS_STRING
,s
);
2652 static void addReplyDouble(redisClient
*c
, double d
) {
2655 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2656 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2657 (unsigned long) strlen(buf
),buf
));
2660 static void addReplyLong(redisClient
*c
, long l
) {
2665 addReply(c
,shared
.czero
);
2667 } else if (l
== 1) {
2668 addReply(c
,shared
.cone
);
2671 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2672 addReplySds(c
,sdsnewlen(buf
,len
));
2675 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2680 addReply(c
,shared
.czero
);
2682 } else if (ll
== 1) {
2683 addReply(c
,shared
.cone
);
2686 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2687 addReplySds(c
,sdsnewlen(buf
,len
));
2690 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2695 addReply(c
,shared
.czero
);
2697 } else if (ul
== 1) {
2698 addReply(c
,shared
.cone
);
2701 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2702 addReplySds(c
,sdsnewlen(buf
,len
));
2705 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2708 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2709 len
= sdslen(obj
->ptr
);
2711 long n
= (long)obj
->ptr
;
2713 /* Compute how many bytes will take this integer as a radix 10 string */
2719 while((n
= n
/10) != 0) {
2723 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2726 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2727 addReplyBulkLen(c
,obj
);
2729 addReply(c
,shared
.crlf
);
2732 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2733 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2735 addReply(c
,shared
.nullbulk
);
2737 robj
*o
= createStringObject(s
,strlen(s
));
2743 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2748 REDIS_NOTUSED(mask
);
2749 REDIS_NOTUSED(privdata
);
2751 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2752 if (cfd
== AE_ERR
) {
2753 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2756 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2757 if ((c
= createClient(cfd
)) == NULL
) {
2758 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2759 close(cfd
); /* May be already closed, just ingore errors */
2762 /* If maxclient directive is set and this is one client more... close the
2763 * connection. Note that we create the client instead to check before
2764 * for this condition, since now the socket is already set in nonblocking
2765 * mode and we can send an error for free using the Kernel I/O */
2766 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2767 char *err
= "-ERR max number of clients reached\r\n";
2769 /* That's a best effort error message, don't check write errors */
2770 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2771 /* Nothing to do, Just to avoid the warning... */
2776 server
.stat_numconnections
++;
2779 /* ======================= Redis objects implementation ===================== */
2781 static robj
*createObject(int type
, void *ptr
) {
2784 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2785 if (listLength(server
.objfreelist
)) {
2786 listNode
*head
= listFirst(server
.objfreelist
);
2787 o
= listNodeValue(head
);
2788 listDelNode(server
.objfreelist
,head
);
2789 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2791 if (server
.vm_enabled
) {
2792 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2793 o
= zmalloc(sizeof(*o
));
2795 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2799 o
->encoding
= REDIS_ENCODING_RAW
;
2802 if (server
.vm_enabled
) {
2803 /* Note that this code may run in the context of an I/O thread
2804 * and accessing to server.unixtime in theory is an error
2805 * (no locks). But in practice this is safe, and even if we read
2806 * garbage Redis will not fail, as it's just a statistical info */
2807 o
->vm
.atime
= server
.unixtime
;
2808 o
->storage
= REDIS_VM_MEMORY
;
2813 static robj
*createStringObject(char *ptr
, size_t len
) {
2814 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2817 static robj
*createStringObjectFromLongLong(long long value
) {
2819 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2820 incrRefCount(shared
.integers
[value
]);
2821 o
= shared
.integers
[value
];
2823 o
= createObject(REDIS_STRING
, NULL
);
2824 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
2825 o
->encoding
= REDIS_ENCODING_INT
;
2826 o
->ptr
= (void*)((long)value
);
2828 o
->ptr
= sdscatprintf(sdsempty(),"%lld",value
);
2834 static robj
*dupStringObject(robj
*o
) {
2835 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2836 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2839 static robj
*createListObject(void) {
2840 list
*l
= listCreate();
2842 listSetFreeMethod(l
,decrRefCount
);
2843 return createObject(REDIS_LIST
,l
);
2846 static robj
*createSetObject(void) {
2847 dict
*d
= dictCreate(&setDictType
,NULL
);
2848 return createObject(REDIS_SET
,d
);
2851 static robj
*createHashObject(void) {
2852 /* All the Hashes start as zipmaps. Will be automatically converted
2853 * into hash tables if there are enough elements or big elements
2855 unsigned char *zm
= zipmapNew();
2856 robj
*o
= createObject(REDIS_HASH
,zm
);
2857 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2861 static robj
*createZsetObject(void) {
2862 zset
*zs
= zmalloc(sizeof(*zs
));
2864 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2865 zs
->zsl
= zslCreate();
2866 return createObject(REDIS_ZSET
,zs
);
2869 static void freeStringObject(robj
*o
) {
2870 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2875 static void freeListObject(robj
*o
) {
2876 listRelease((list
*) o
->ptr
);
2879 static void freeSetObject(robj
*o
) {
2880 dictRelease((dict
*) o
->ptr
);
2883 static void freeZsetObject(robj
*o
) {
2886 dictRelease(zs
->dict
);
2891 static void freeHashObject(robj
*o
) {
2892 switch (o
->encoding
) {
2893 case REDIS_ENCODING_HT
:
2894 dictRelease((dict
*) o
->ptr
);
2896 case REDIS_ENCODING_ZIPMAP
:
2900 redisPanic("Unknown hash encoding type");
2905 static void incrRefCount(robj
*o
) {
2909 static void decrRefCount(void *obj
) {
2912 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
2913 /* Object is a key of a swapped out value, or in the process of being
2915 if (server
.vm_enabled
&&
2916 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2918 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2919 redisAssert(o
->type
== REDIS_STRING
);
2920 freeStringObject(o
);
2921 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2922 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2923 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2924 !listAddNodeHead(server
.objfreelist
,o
))
2926 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2927 server
.vm_stats_swapped_objects
--;
2930 /* Object is in memory, or in the process of being swapped out. */
2931 if (--(o
->refcount
) == 0) {
2932 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2933 vmCancelThreadedIOJob(obj
);
2935 case REDIS_STRING
: freeStringObject(o
); break;
2936 case REDIS_LIST
: freeListObject(o
); break;
2937 case REDIS_SET
: freeSetObject(o
); break;
2938 case REDIS_ZSET
: freeZsetObject(o
); break;
2939 case REDIS_HASH
: freeHashObject(o
); break;
2940 default: redisPanic("Unknown object type"); break;
2942 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2943 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2944 !listAddNodeHead(server
.objfreelist
,o
))
2946 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2950 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2951 dictEntry
*de
= dictFind(db
->dict
,key
);
2953 robj
*key
= dictGetEntryKey(de
);
2954 robj
*val
= dictGetEntryVal(de
);
2956 if (server
.vm_enabled
) {
2957 if (key
->storage
== REDIS_VM_MEMORY
||
2958 key
->storage
== REDIS_VM_SWAPPING
)
2960 /* If we were swapping the object out, stop it, this key
2962 if (key
->storage
== REDIS_VM_SWAPPING
)
2963 vmCancelThreadedIOJob(key
);
2964 /* Update the access time of the key for the aging algorithm. */
2965 key
->vm
.atime
= server
.unixtime
;
2967 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2969 /* Our value was swapped on disk. Bring it at home. */
2970 redisAssert(val
== NULL
);
2971 val
= vmLoadObject(key
);
2972 dictGetEntryVal(de
) = val
;
2974 /* Clients blocked by the VM subsystem may be waiting for
2976 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2985 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2986 expireIfNeeded(db
,key
);
2987 return lookupKey(db
,key
);
2990 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2991 deleteIfVolatile(db
,key
);
2992 return lookupKey(db
,key
);
2995 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2996 robj
*o
= lookupKeyRead(c
->db
, key
);
2997 if (!o
) addReply(c
,reply
);
3001 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3002 robj
*o
= lookupKeyWrite(c
->db
, key
);
3003 if (!o
) addReply(c
,reply
);
3007 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3008 if (o
->type
!= type
) {
3009 addReply(c
,shared
.wrongtypeerr
);
3015 static int deleteKey(redisDb
*db
, robj
*key
) {
3018 /* We need to protect key from destruction: after the first dictDelete()
3019 * it may happen that 'key' is no longer valid if we don't increment
3020 * it's count. This may happen when we get the object reference directly
3021 * from the hash table with dictRandomKey() or dict iterators */
3023 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
3024 retval
= dictDelete(db
->dict
,key
);
3027 return retval
== DICT_OK
;
3030 /* Check if the nul-terminated string 's' can be represented by a long
3031 * (that is, is a number that fits into long without any other space or
3032 * character before or after the digits).
3034 * If so, the function returns REDIS_OK and *longval is set to the value
3035 * of the number. Otherwise REDIS_ERR is returned */
3036 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3037 char buf
[32], *endptr
;
3041 value
= strtol(s
, &endptr
, 10);
3042 if (endptr
[0] != '\0') return REDIS_ERR
;
3043 slen
= snprintf(buf
,32,"%ld",value
);
3045 /* If the number converted back into a string is not identical
3046 * then it's not possible to encode the string as integer */
3047 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3048 if (longval
) *longval
= value
;
3052 /* Try to encode a string object in order to save space */
3053 static robj
*tryObjectEncoding(robj
*o
) {
3057 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3058 return o
; /* Already encoded */
3060 /* It's not safe to encode shared objects: shared objects can be shared
3061 * everywhere in the "object space" of Redis. Encoded objects can only
3062 * appear as "values" (and not, for instance, as keys) */
3063 if (o
->refcount
> 1) return o
;
3065 /* Currently we try to encode only strings */
3066 redisAssert(o
->type
== REDIS_STRING
);
3068 /* Check if we can represent this string as a long integer */
3069 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3071 /* Ok, this object can be encoded */
3072 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3074 incrRefCount(shared
.integers
[value
]);
3075 return shared
.integers
[value
];
3077 o
->encoding
= REDIS_ENCODING_INT
;
3079 o
->ptr
= (void*) value
;
3084 /* Get a decoded version of an encoded object (returned as a new object).
3085 * If the object is already raw-encoded just increment the ref count. */
3086 static robj
*getDecodedObject(robj
*o
) {
3089 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3093 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3096 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3097 dec
= createStringObject(buf
,strlen(buf
));
3100 redisPanic("Unknown encoding type");
3104 /* Compare two string objects via strcmp() or alike.
3105 * Note that the objects may be integer-encoded. In such a case we
3106 * use snprintf() to get a string representation of the numbers on the stack
3107 * and compare the strings, it's much faster than calling getDecodedObject().
3109 * Important note: if objects are not integer encoded, but binary-safe strings,
3110 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3112 static int compareStringObjects(robj
*a
, robj
*b
) {
3113 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3114 char bufa
[128], bufb
[128], *astr
, *bstr
;
3117 if (a
== b
) return 0;
3118 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3119 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3125 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3126 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3132 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3135 static size_t stringObjectLen(robj
*o
) {
3136 redisAssert(o
->type
== REDIS_STRING
);
3137 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3138 return sdslen(o
->ptr
);
3142 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3146 static int getDoubleFromObject(robj
*o
, double *target
) {
3153 redisAssert(o
->type
== REDIS_STRING
);
3154 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3155 value
= strtod(o
->ptr
, &eptr
);
3156 if (eptr
[0] != '\0') return REDIS_ERR
;
3157 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3158 value
= (long)o
->ptr
;
3160 redisAssert(1 != 1);
3168 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3170 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3172 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3174 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3183 static int getLongLongFromObject(robj
*o
, long long *target
) {
3190 redisAssert(o
->type
== REDIS_STRING
);
3191 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3192 value
= strtoll(o
->ptr
, &eptr
, 10);
3193 if (eptr
[0] != '\0') return REDIS_ERR
;
3194 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3195 value
= (long)o
->ptr
;
3197 redisAssert(1 != 1);
3205 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3207 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3209 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3211 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3220 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3223 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3224 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3226 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3228 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3237 /*============================ RDB saving/loading =========================== */
3239 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3240 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3244 static int rdbSaveTime(FILE *fp
, time_t t
) {
3245 int32_t t32
= (int32_t) t
;
3246 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3250 /* check rdbLoadLen() comments for more info */
3251 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3252 unsigned char buf
[2];
3255 /* Save a 6 bit len */
3256 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3257 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3258 } else if (len
< (1<<14)) {
3259 /* Save a 14 bit len */
3260 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3262 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3264 /* Save a 32 bit len */
3265 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3266 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3268 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3273 /* String objects in the form "2391" "-100" without any space and with a
3274 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3275 * encoded as integers to save space */
3276 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3278 char *endptr
, buf
[32];
3280 /* Check if it's possible to encode this value as a number */
3281 value
= strtoll(s
, &endptr
, 10);
3282 if (endptr
[0] != '\0') return 0;
3283 snprintf(buf
,32,"%lld",value
);
3285 /* If the number converted back into a string is not identical
3286 * then it's not possible to encode the string as integer */
3287 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3289 /* Finally check if it fits in our ranges */
3290 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3291 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3292 enc
[1] = value
&0xFF;
3294 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3295 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3296 enc
[1] = value
&0xFF;
3297 enc
[2] = (value
>>8)&0xFF;
3299 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3300 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3301 enc
[1] = value
&0xFF;
3302 enc
[2] = (value
>>8)&0xFF;
3303 enc
[3] = (value
>>16)&0xFF;
3304 enc
[4] = (value
>>24)&0xFF;
3311 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3312 size_t comprlen
, outlen
;
3316 /* We require at least four bytes compression for this to be worth it */
3317 if (len
<= 4) return 0;
3319 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3320 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3321 if (comprlen
== 0) {
3325 /* Data compressed! Let's save it on disk */
3326 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3327 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3328 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3329 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3330 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3339 /* Save a string objet as [len][data] on disk. If the object is a string
3340 * representation of an integer value we try to safe it in a special form */
3341 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3344 /* Try integer encoding */
3346 unsigned char buf
[5];
3347 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3348 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3353 /* Try LZF compression - under 20 bytes it's unable to compress even
3354 * aaaaaaaaaaaaaaaaaa so skip it */
3355 if (server
.rdbcompression
&& len
> 20) {
3358 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3359 if (retval
== -1) return -1;
3360 if (retval
> 0) return 0;
3361 /* retval == 0 means data can't be compressed, save the old way */
3364 /* Store verbatim */
3365 if (rdbSaveLen(fp
,len
) == -1) return -1;
3366 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3370 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3371 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3374 /* Avoid incr/decr ref count business when possible.
3375 * This plays well with copy-on-write given that we are probably
3376 * in a child process (BGSAVE). Also this makes sure key objects
3377 * of swapped objects are not incRefCount-ed (an assert does not allow
3378 * this in order to avoid bugs) */
3379 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3380 obj
= getDecodedObject(obj
);
3381 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3384 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3389 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3390 * 8 bit integer specifing the length of the representation.
3391 * This 8 bit integer has special values in order to specify the following
3397 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3398 unsigned char buf
[128];
3404 } else if (!isfinite(val
)) {
3406 buf
[0] = (val
< 0) ? 255 : 254;
3408 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3409 buf
[0] = strlen((char*)buf
+1);
3412 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3416 /* Save a Redis object. */
3417 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3418 if (o
->type
== REDIS_STRING
) {
3419 /* Save a string value */
3420 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3421 } else if (o
->type
== REDIS_LIST
) {
3422 /* Save a list value */
3423 list
*list
= o
->ptr
;
3427 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3428 listRewind(list
,&li
);
3429 while((ln
= listNext(&li
))) {
3430 robj
*eleobj
= listNodeValue(ln
);
3432 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3434 } else if (o
->type
== REDIS_SET
) {
3435 /* Save a set value */
3437 dictIterator
*di
= dictGetIterator(set
);
3440 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3441 while((de
= dictNext(di
)) != NULL
) {
3442 robj
*eleobj
= dictGetEntryKey(de
);
3444 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3446 dictReleaseIterator(di
);
3447 } else if (o
->type
== REDIS_ZSET
) {
3448 /* Save a set value */
3450 dictIterator
*di
= dictGetIterator(zs
->dict
);
3453 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3454 while((de
= dictNext(di
)) != NULL
) {
3455 robj
*eleobj
= dictGetEntryKey(de
);
3456 double *score
= dictGetEntryVal(de
);
3458 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3459 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3461 dictReleaseIterator(di
);
3462 } else if (o
->type
== REDIS_HASH
) {
3463 /* Save a hash value */
3464 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3465 unsigned char *p
= zipmapRewind(o
->ptr
);
3466 unsigned int count
= zipmapLen(o
->ptr
);
3467 unsigned char *key
, *val
;
3468 unsigned int klen
, vlen
;
3470 if (rdbSaveLen(fp
,count
) == -1) return -1;
3471 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3472 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3473 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3476 dictIterator
*di
= dictGetIterator(o
->ptr
);
3479 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3480 while((de
= dictNext(di
)) != NULL
) {
3481 robj
*key
= dictGetEntryKey(de
);
3482 robj
*val
= dictGetEntryVal(de
);
3484 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3485 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3487 dictReleaseIterator(di
);
3490 redisPanic("Unknown object type");
3495 /* Return the length the object will have on disk if saved with
3496 * the rdbSaveObject() function. Currently we use a trick to get
3497 * this length with very little changes to the code. In the future
3498 * we could switch to a faster solution. */
3499 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3500 if (fp
== NULL
) fp
= server
.devnull
;
3502 assert(rdbSaveObject(fp
,o
) != 1);
3506 /* Return the number of pages required to save this object in the swap file */
3507 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3508 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3510 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3513 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3514 static int rdbSave(char *filename
) {
3515 dictIterator
*di
= NULL
;
3520 time_t now
= time(NULL
);
3522 /* Wait for I/O therads to terminate, just in case this is a
3523 * foreground-saving, to avoid seeking the swap file descriptor at the
3525 if (server
.vm_enabled
)
3526 waitEmptyIOJobsQueue();
3528 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3529 fp
= fopen(tmpfile
,"w");
3531 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3534 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3535 for (j
= 0; j
< server
.dbnum
; j
++) {
3536 redisDb
*db
= server
.db
+j
;
3538 if (dictSize(d
) == 0) continue;
3539 di
= dictGetIterator(d
);
3545 /* Write the SELECT DB opcode */
3546 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3547 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3549 /* Iterate this DB writing every entry */
3550 while((de
= dictNext(di
)) != NULL
) {
3551 robj
*key
= dictGetEntryKey(de
);
3552 robj
*o
= dictGetEntryVal(de
);
3553 time_t expiretime
= getExpire(db
,key
);
3555 /* Save the expire time */
3556 if (expiretime
!= -1) {
3557 /* If this key is already expired skip it */
3558 if (expiretime
< now
) continue;
3559 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3560 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3562 /* Save the key and associated value. This requires special
3563 * handling if the value is swapped out. */
3564 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3565 key
->storage
== REDIS_VM_SWAPPING
) {
3566 /* Save type, key, value */
3567 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3568 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3569 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3571 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3573 /* Get a preview of the object in memory */
3574 po
= vmPreviewObject(key
);
3575 /* Save type, key, value */
3576 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3577 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3578 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3579 /* Remove the loaded object from memory */
3583 dictReleaseIterator(di
);
3586 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3588 /* Make sure data will not remain on the OS's output buffers */
3593 /* Use RENAME to make sure the DB file is changed atomically only
3594 * if the generate DB file is ok. */
3595 if (rename(tmpfile
,filename
) == -1) {
3596 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3600 redisLog(REDIS_NOTICE
,"DB saved on disk");
3602 server
.lastsave
= time(NULL
);
3608 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3609 if (di
) dictReleaseIterator(di
);
3613 static int rdbSaveBackground(char *filename
) {
3616 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3617 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3618 if ((childpid
= fork()) == 0) {
3620 if (server
.vm_enabled
) vmReopenSwapFile();
3622 if (rdbSave(filename
) == REDIS_OK
) {
3629 if (childpid
== -1) {
3630 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3634 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3635 server
.bgsavechildpid
= childpid
;
3636 updateDictResizePolicy();
3639 return REDIS_OK
; /* unreached */
3642 static void rdbRemoveTempFile(pid_t childpid
) {
3645 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3649 static int rdbLoadType(FILE *fp
) {
3651 if (fread(&type
,1,1,fp
) == 0) return -1;
3655 static time_t rdbLoadTime(FILE *fp
) {
3657 if (fread(&t32
,4,1,fp
) == 0) return -1;
3658 return (time_t) t32
;
3661 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3662 * of this file for a description of how this are stored on disk.
3664 * isencoded is set to 1 if the readed length is not actually a length but
3665 * an "encoding type", check the above comments for more info */
3666 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3667 unsigned char buf
[2];
3671 if (isencoded
) *isencoded
= 0;
3672 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3673 type
= (buf
[0]&0xC0)>>6;
3674 if (type
== REDIS_RDB_6BITLEN
) {
3675 /* Read a 6 bit len */
3677 } else if (type
== REDIS_RDB_ENCVAL
) {
3678 /* Read a 6 bit len encoding type */
3679 if (isencoded
) *isencoded
= 1;
3681 } else if (type
== REDIS_RDB_14BITLEN
) {
3682 /* Read a 14 bit len */
3683 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3684 return ((buf
[0]&0x3F)<<8)|buf
[1];
3686 /* Read a 32 bit len */
3687 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3692 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3693 unsigned char enc
[4];
3696 if (enctype
== REDIS_RDB_ENC_INT8
) {
3697 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3698 val
= (signed char)enc
[0];
3699 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3701 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3702 v
= enc
[0]|(enc
[1]<<8);
3704 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3706 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3707 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3710 val
= 0; /* anti-warning */
3711 redisPanic("Unknown RDB integer encoding type");
3713 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3716 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3717 unsigned int len
, clen
;
3718 unsigned char *c
= NULL
;
3721 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3722 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3723 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3724 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3725 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3726 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3728 return createObject(REDIS_STRING
,val
);
3735 static robj
*rdbLoadStringObject(FILE*fp
) {
3740 len
= rdbLoadLen(fp
,&isencoded
);
3743 case REDIS_RDB_ENC_INT8
:
3744 case REDIS_RDB_ENC_INT16
:
3745 case REDIS_RDB_ENC_INT32
:
3746 return rdbLoadIntegerObject(fp
,len
);
3747 case REDIS_RDB_ENC_LZF
:
3748 return rdbLoadLzfStringObject(fp
);
3750 redisPanic("Unknown RDB encoding type");
3754 if (len
== REDIS_RDB_LENERR
) return NULL
;
3755 val
= sdsnewlen(NULL
,len
);
3756 if (len
&& fread(val
,len
,1,fp
) == 0) {
3760 return createObject(REDIS_STRING
,val
);
3763 /* For information about double serialization check rdbSaveDoubleValue() */
3764 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3768 if (fread(&len
,1,1,fp
) == 0) return -1;
3770 case 255: *val
= R_NegInf
; return 0;
3771 case 254: *val
= R_PosInf
; return 0;
3772 case 253: *val
= R_Nan
; return 0;
3774 if (fread(buf
,len
,1,fp
) == 0) return -1;
3776 sscanf(buf
, "%lg", val
);
3781 /* Load a Redis object of the specified type from the specified file.
3782 * On success a newly allocated object is returned, otherwise NULL. */
3783 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3786 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3787 if (type
== REDIS_STRING
) {
3788 /* Read string value */
3789 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3790 o
= tryObjectEncoding(o
);
3791 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3792 /* Read list/set value */
3795 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3796 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3797 /* It's faster to expand the dict to the right size asap in order
3798 * to avoid rehashing */
3799 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3800 dictExpand(o
->ptr
,listlen
);
3801 /* Load every single element of the list/set */
3805 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3806 ele
= tryObjectEncoding(ele
);
3807 if (type
== REDIS_LIST
) {
3808 listAddNodeTail((list
*)o
->ptr
,ele
);
3810 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3813 } else if (type
== REDIS_ZSET
) {
3814 /* Read list/set value */
3818 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3819 o
= createZsetObject();
3821 /* Load every single element of the list/set */
3824 double *score
= zmalloc(sizeof(double));
3826 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3827 ele
= tryObjectEncoding(ele
);
3828 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3829 dictAdd(zs
->dict
,ele
,score
);
3830 zslInsert(zs
->zsl
,*score
,ele
);
3831 incrRefCount(ele
); /* added to skiplist */
3833 } else if (type
== REDIS_HASH
) {
3836 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3837 o
= createHashObject();
3838 /* Too many entries? Use an hash table. */
3839 if (hashlen
> server
.hash_max_zipmap_entries
)
3840 convertToRealHash(o
);
3841 /* Load every key/value, then set it into the zipmap or hash
3842 * table, as needed. */
3846 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3847 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3848 /* If we are using a zipmap and there are too big values
3849 * the object is converted to real hash table encoding. */
3850 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3851 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3852 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3854 convertToRealHash(o
);
3857 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3858 unsigned char *zm
= o
->ptr
;
3860 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3861 val
->ptr
,sdslen(val
->ptr
),NULL
);
3866 key
= tryObjectEncoding(key
);
3867 val
= tryObjectEncoding(val
);
3868 dictAdd((dict
*)o
->ptr
,key
,val
);
3872 redisPanic("Unknown object type");
3877 static int rdbLoad(char *filename
) {
3879 robj
*keyobj
= NULL
;
3881 int type
, retval
, rdbver
;
3882 dict
*d
= server
.db
[0].dict
;
3883 redisDb
*db
= server
.db
+0;
3885 time_t expiretime
= -1, now
= time(NULL
);
3886 long long loadedkeys
= 0;
3888 fp
= fopen(filename
,"r");
3889 if (!fp
) return REDIS_ERR
;
3890 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3892 if (memcmp(buf
,"REDIS",5) != 0) {
3894 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3897 rdbver
= atoi(buf
+5);
3900 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3907 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3908 if (type
== REDIS_EXPIRETIME
) {
3909 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3910 /* We read the time so we need to read the object type again */
3911 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3913 if (type
== REDIS_EOF
) break;
3914 /* Handle SELECT DB opcode as a special case */
3915 if (type
== REDIS_SELECTDB
) {
3916 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3918 if (dbid
>= (unsigned)server
.dbnum
) {
3919 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3922 db
= server
.db
+dbid
;
3927 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3929 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3930 /* Add the new object in the hash table */
3931 retval
= dictAdd(d
,keyobj
,o
);
3932 if (retval
== DICT_ERR
) {
3933 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3936 /* Set the expire time if needed */
3937 if (expiretime
!= -1) {
3938 setExpire(db
,keyobj
,expiretime
);
3939 /* Delete this key if already expired */
3940 if (expiretime
< now
) deleteKey(db
,keyobj
);
3944 /* Handle swapping while loading big datasets when VM is on */
3946 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3947 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3948 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3955 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3956 if (keyobj
) decrRefCount(keyobj
);
3957 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3959 return REDIS_ERR
; /* Just to avoid warning */
3962 /*================================== Commands =============================== */
3964 static void authCommand(redisClient
*c
) {
3965 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3966 c
->authenticated
= 1;
3967 addReply(c
,shared
.ok
);
3969 c
->authenticated
= 0;
3970 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3974 static void pingCommand(redisClient
*c
) {
3975 addReply(c
,shared
.pong
);
3978 static void echoCommand(redisClient
*c
) {
3979 addReplyBulk(c
,c
->argv
[1]);
3982 /*=================================== Strings =============================== */
3984 static void setGenericCommand(redisClient
*c
, int nx
) {
3987 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3988 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3989 if (retval
== DICT_ERR
) {
3991 /* If the key is about a swapped value, we want a new key object
3992 * to overwrite the old. So we delete the old key in the database.
3993 * This will also make sure that swap pages about the old object
3994 * will be marked as free. */
3995 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3996 incrRefCount(c
->argv
[1]);
3997 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3998 incrRefCount(c
->argv
[2]);
4000 addReply(c
,shared
.czero
);
4004 incrRefCount(c
->argv
[1]);
4005 incrRefCount(c
->argv
[2]);
4008 removeExpire(c
->db
,c
->argv
[1]);
4009 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4012 static void setCommand(redisClient
*c
) {
4013 setGenericCommand(c
,0);
4016 static void setnxCommand(redisClient
*c
) {
4017 setGenericCommand(c
,1);
4020 static int getGenericCommand(redisClient
*c
) {
4023 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4026 if (o
->type
!= REDIS_STRING
) {
4027 addReply(c
,shared
.wrongtypeerr
);
4035 static void getCommand(redisClient
*c
) {
4036 getGenericCommand(c
);
4039 static void getsetCommand(redisClient
*c
) {
4040 if (getGenericCommand(c
) == REDIS_ERR
) return;
4041 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
4042 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4044 incrRefCount(c
->argv
[1]);
4046 incrRefCount(c
->argv
[2]);
4048 removeExpire(c
->db
,c
->argv
[1]);
4051 static void mgetCommand(redisClient
*c
) {
4054 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4055 for (j
= 1; j
< c
->argc
; j
++) {
4056 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4058 addReply(c
,shared
.nullbulk
);
4060 if (o
->type
!= REDIS_STRING
) {
4061 addReply(c
,shared
.nullbulk
);
4069 static void msetGenericCommand(redisClient
*c
, int nx
) {
4070 int j
, busykeys
= 0;
4072 if ((c
->argc
% 2) == 0) {
4073 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4076 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4077 * set nothing at all if at least one already key exists. */
4079 for (j
= 1; j
< c
->argc
; j
+= 2) {
4080 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4086 addReply(c
, shared
.czero
);
4090 for (j
= 1; j
< c
->argc
; j
+= 2) {
4093 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4094 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4095 if (retval
== DICT_ERR
) {
4096 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4097 incrRefCount(c
->argv
[j
+1]);
4099 incrRefCount(c
->argv
[j
]);
4100 incrRefCount(c
->argv
[j
+1]);
4102 removeExpire(c
->db
,c
->argv
[j
]);
4104 server
.dirty
+= (c
->argc
-1)/2;
4105 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4108 static void msetCommand(redisClient
*c
) {
4109 msetGenericCommand(c
,0);
4112 static void msetnxCommand(redisClient
*c
) {
4113 msetGenericCommand(c
,1);
4116 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4121 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4123 if (getLongLongFromObjectOrReply(c
, o
, &value
, NULL
) != REDIS_OK
) return;
4126 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4127 o
= tryObjectEncoding(o
);
4128 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4129 if (retval
== DICT_ERR
) {
4130 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4131 removeExpire(c
->db
,c
->argv
[1]);
4133 incrRefCount(c
->argv
[1]);
4136 addReply(c
,shared
.colon
);
4138 addReply(c
,shared
.crlf
);
4141 static void incrCommand(redisClient
*c
) {
4142 incrDecrCommand(c
,1);
4145 static void decrCommand(redisClient
*c
) {
4146 incrDecrCommand(c
,-1);
4149 static void incrbyCommand(redisClient
*c
) {
4152 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4153 incrDecrCommand(c
,incr
);
4156 static void decrbyCommand(redisClient
*c
) {
4159 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4160 incrDecrCommand(c
,-incr
);
4163 static void appendCommand(redisClient
*c
) {
4168 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4170 /* Create the key */
4171 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4172 incrRefCount(c
->argv
[1]);
4173 incrRefCount(c
->argv
[2]);
4174 totlen
= stringObjectLen(c
->argv
[2]);
4178 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4181 o
= dictGetEntryVal(de
);
4182 if (o
->type
!= REDIS_STRING
) {
4183 addReply(c
,shared
.wrongtypeerr
);
4186 /* If the object is specially encoded or shared we have to make
4188 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4189 robj
*decoded
= getDecodedObject(o
);
4191 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4192 decrRefCount(decoded
);
4193 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4196 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4197 o
->ptr
= sdscatlen(o
->ptr
,
4198 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4200 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4201 (unsigned long) c
->argv
[2]->ptr
);
4203 totlen
= sdslen(o
->ptr
);
4206 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4209 static void substrCommand(redisClient
*c
) {
4211 long start
= atoi(c
->argv
[2]->ptr
);
4212 long end
= atoi(c
->argv
[3]->ptr
);
4213 size_t rangelen
, strlen
;
4216 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4217 checkType(c
,o
,REDIS_STRING
)) return;
4219 o
= getDecodedObject(o
);
4220 strlen
= sdslen(o
->ptr
);
4222 /* convert negative indexes */
4223 if (start
< 0) start
= strlen
+start
;
4224 if (end
< 0) end
= strlen
+end
;
4225 if (start
< 0) start
= 0;
4226 if (end
< 0) end
= 0;
4228 /* indexes sanity checks */
4229 if (start
> end
|| (size_t)start
>= strlen
) {
4230 /* Out of range start or start > end result in null reply */
4231 addReply(c
,shared
.nullbulk
);
4235 if ((size_t)end
>= strlen
) end
= strlen
-1;
4236 rangelen
= (end
-start
)+1;
4238 /* Return the result */
4239 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4240 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4241 addReplySds(c
,range
);
4242 addReply(c
,shared
.crlf
);
4246 /* ========================= Type agnostic commands ========================= */
4248 static void delCommand(redisClient
*c
) {
4251 for (j
= 1; j
< c
->argc
; j
++) {
4252 if (deleteKey(c
->db
,c
->argv
[j
])) {
4257 addReplyLong(c
,deleted
);
4260 static void existsCommand(redisClient
*c
) {
4261 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4264 static void selectCommand(redisClient
*c
) {
4265 int id
= atoi(c
->argv
[1]->ptr
);
4267 if (selectDb(c
,id
) == REDIS_ERR
) {
4268 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4270 addReply(c
,shared
.ok
);
4274 static void randomkeyCommand(redisClient
*c
) {
4279 de
= dictGetRandomKey(c
->db
->dict
);
4280 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4284 addReply(c
,shared
.nullbulk
);
4288 key
= dictGetEntryKey(de
);
4289 if (server
.vm_enabled
) {
4290 key
= dupStringObject(key
);
4291 addReplyBulk(c
,key
);
4294 addReplyBulk(c
,key
);
4298 static void keysCommand(redisClient
*c
) {
4301 sds pattern
= c
->argv
[1]->ptr
;
4302 int plen
= sdslen(pattern
);
4303 unsigned long numkeys
= 0;
4304 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4306 di
= dictGetIterator(c
->db
->dict
);
4308 decrRefCount(lenobj
);
4309 while((de
= dictNext(di
)) != NULL
) {
4310 robj
*keyobj
= dictGetEntryKey(de
);
4312 sds key
= keyobj
->ptr
;
4313 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4314 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4315 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4316 addReplyBulk(c
,keyobj
);
4321 dictReleaseIterator(di
);
4322 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4325 static void dbsizeCommand(redisClient
*c
) {
4327 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4330 static void lastsaveCommand(redisClient
*c
) {
4332 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4335 static void typeCommand(redisClient
*c
) {
4339 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4344 case REDIS_STRING
: type
= "+string"; break;
4345 case REDIS_LIST
: type
= "+list"; break;
4346 case REDIS_SET
: type
= "+set"; break;
4347 case REDIS_ZSET
: type
= "+zset"; break;
4348 case REDIS_HASH
: type
= "+hash"; break;
4349 default: type
= "+unknown"; break;
4352 addReplySds(c
,sdsnew(type
));
4353 addReply(c
,shared
.crlf
);
4356 static void saveCommand(redisClient
*c
) {
4357 if (server
.bgsavechildpid
!= -1) {
4358 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4361 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4362 addReply(c
,shared
.ok
);
4364 addReply(c
,shared
.err
);
4368 static void bgsaveCommand(redisClient
*c
) {
4369 if (server
.bgsavechildpid
!= -1) {
4370 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4373 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4374 char *status
= "+Background saving started\r\n";
4375 addReplySds(c
,sdsnew(status
));
4377 addReply(c
,shared
.err
);
4381 static void shutdownCommand(redisClient
*c
) {
4382 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4383 /* Kill the saving child if there is a background saving in progress.
4384 We want to avoid race conditions, for instance our saving child may
4385 overwrite the synchronous saving did by SHUTDOWN. */
4386 if (server
.bgsavechildpid
!= -1) {
4387 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4388 kill(server
.bgsavechildpid
,SIGKILL
);
4389 rdbRemoveTempFile(server
.bgsavechildpid
);
4391 if (server
.appendonly
) {
4392 /* Append only file: fsync() the AOF and exit */
4393 fsync(server
.appendfd
);
4394 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4397 /* Snapshotting. Perform a SYNC SAVE and exit */
4398 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4399 if (server
.daemonize
)
4400 unlink(server
.pidfile
);
4401 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4402 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4403 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4406 /* Ooops.. error saving! The best we can do is to continue
4407 * operating. Note that if there was a background saving process,
4408 * in the next cron() Redis will be notified that the background
4409 * saving aborted, handling special stuff like slaves pending for
4410 * synchronization... */
4411 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4413 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4418 static void renameGenericCommand(redisClient
*c
, int nx
) {
4421 /* To use the same key as src and dst is probably an error */
4422 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4423 addReply(c
,shared
.sameobjecterr
);
4427 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4431 deleteIfVolatile(c
->db
,c
->argv
[2]);
4432 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4435 addReply(c
,shared
.czero
);
4438 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4440 incrRefCount(c
->argv
[2]);
4442 deleteKey(c
->db
,c
->argv
[1]);
4444 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4447 static void renameCommand(redisClient
*c
) {
4448 renameGenericCommand(c
,0);
4451 static void renamenxCommand(redisClient
*c
) {
4452 renameGenericCommand(c
,1);
4455 static void moveCommand(redisClient
*c
) {
4460 /* Obtain source and target DB pointers */
4463 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4464 addReply(c
,shared
.outofrangeerr
);
4468 selectDb(c
,srcid
); /* Back to the source DB */
4470 /* If the user is moving using as target the same
4471 * DB as the source DB it is probably an error. */
4473 addReply(c
,shared
.sameobjecterr
);
4477 /* Check if the element exists and get a reference */
4478 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4480 addReply(c
,shared
.czero
);
4484 /* Try to add the element to the target DB */
4485 deleteIfVolatile(dst
,c
->argv
[1]);
4486 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4487 addReply(c
,shared
.czero
);
4490 incrRefCount(c
->argv
[1]);
4493 /* OK! key moved, free the entry in the source DB */
4494 deleteKey(src
,c
->argv
[1]);
4496 addReply(c
,shared
.cone
);
4499 /* =================================== Lists ================================ */
4500 static void pushGenericCommand(redisClient
*c
, int where
) {
4504 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4506 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4507 addReply(c
,shared
.cone
);
4510 lobj
= createListObject();
4512 if (where
== REDIS_HEAD
) {
4513 listAddNodeHead(list
,c
->argv
[2]);
4515 listAddNodeTail(list
,c
->argv
[2]);
4517 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4518 incrRefCount(c
->argv
[1]);
4519 incrRefCount(c
->argv
[2]);
4521 if (lobj
->type
!= REDIS_LIST
) {
4522 addReply(c
,shared
.wrongtypeerr
);
4525 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4526 addReply(c
,shared
.cone
);
4530 if (where
== REDIS_HEAD
) {
4531 listAddNodeHead(list
,c
->argv
[2]);
4533 listAddNodeTail(list
,c
->argv
[2]);
4535 incrRefCount(c
->argv
[2]);
4538 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4541 static void lpushCommand(redisClient
*c
) {
4542 pushGenericCommand(c
,REDIS_HEAD
);
4545 static void rpushCommand(redisClient
*c
) {
4546 pushGenericCommand(c
,REDIS_TAIL
);
4549 static void llenCommand(redisClient
*c
) {
4553 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4554 checkType(c
,o
,REDIS_LIST
)) return;
4557 addReplyUlong(c
,listLength(l
));
4560 static void lindexCommand(redisClient
*c
) {
4562 int index
= atoi(c
->argv
[2]->ptr
);
4566 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4567 checkType(c
,o
,REDIS_LIST
)) return;
4570 ln
= listIndex(list
, index
);
4572 addReply(c
,shared
.nullbulk
);
4574 robj
*ele
= listNodeValue(ln
);
4575 addReplyBulk(c
,ele
);
4579 static void lsetCommand(redisClient
*c
) {
4581 int index
= atoi(c
->argv
[2]->ptr
);
4585 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4586 checkType(c
,o
,REDIS_LIST
)) return;
4589 ln
= listIndex(list
, index
);
4591 addReply(c
,shared
.outofrangeerr
);
4593 robj
*ele
= listNodeValue(ln
);
4596 listNodeValue(ln
) = c
->argv
[3];
4597 incrRefCount(c
->argv
[3]);
4598 addReply(c
,shared
.ok
);
4603 static void popGenericCommand(redisClient
*c
, int where
) {
4608 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4609 checkType(c
,o
,REDIS_LIST
)) return;
4612 if (where
== REDIS_HEAD
)
4613 ln
= listFirst(list
);
4615 ln
= listLast(list
);
4618 addReply(c
,shared
.nullbulk
);
4620 robj
*ele
= listNodeValue(ln
);
4621 addReplyBulk(c
,ele
);
4622 listDelNode(list
,ln
);
4623 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4628 static void lpopCommand(redisClient
*c
) {
4629 popGenericCommand(c
,REDIS_HEAD
);
4632 static void rpopCommand(redisClient
*c
) {
4633 popGenericCommand(c
,REDIS_TAIL
);
4636 static void lrangeCommand(redisClient
*c
) {
4638 int start
= atoi(c
->argv
[2]->ptr
);
4639 int end
= atoi(c
->argv
[3]->ptr
);
4646 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4647 || checkType(c
,o
,REDIS_LIST
)) return;
4649 llen
= listLength(list
);
4651 /* convert negative indexes */
4652 if (start
< 0) start
= llen
+start
;
4653 if (end
< 0) end
= llen
+end
;
4654 if (start
< 0) start
= 0;
4655 if (end
< 0) end
= 0;
4657 /* indexes sanity checks */
4658 if (start
> end
|| start
>= llen
) {
4659 /* Out of range start or start > end result in empty list */
4660 addReply(c
,shared
.emptymultibulk
);
4663 if (end
>= llen
) end
= llen
-1;
4664 rangelen
= (end
-start
)+1;
4666 /* Return the result in form of a multi-bulk reply */
4667 ln
= listIndex(list
, start
);
4668 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4669 for (j
= 0; j
< rangelen
; j
++) {
4670 ele
= listNodeValue(ln
);
4671 addReplyBulk(c
,ele
);
4676 static void ltrimCommand(redisClient
*c
) {
4678 int start
= atoi(c
->argv
[2]->ptr
);
4679 int end
= atoi(c
->argv
[3]->ptr
);
4681 int j
, ltrim
, rtrim
;
4685 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4686 checkType(c
,o
,REDIS_LIST
)) return;
4688 llen
= listLength(list
);
4690 /* convert negative indexes */
4691 if (start
< 0) start
= llen
+start
;
4692 if (end
< 0) end
= llen
+end
;
4693 if (start
< 0) start
= 0;
4694 if (end
< 0) end
= 0;
4696 /* indexes sanity checks */
4697 if (start
> end
|| start
>= llen
) {
4698 /* Out of range start or start > end result in empty list */
4702 if (end
>= llen
) end
= llen
-1;
4707 /* Remove list elements to perform the trim */
4708 for (j
= 0; j
< ltrim
; j
++) {
4709 ln
= listFirst(list
);
4710 listDelNode(list
,ln
);
4712 for (j
= 0; j
< rtrim
; j
++) {
4713 ln
= listLast(list
);
4714 listDelNode(list
,ln
);
4716 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4718 addReply(c
,shared
.ok
);
4721 static void lremCommand(redisClient
*c
) {
4724 listNode
*ln
, *next
;
4725 int toremove
= atoi(c
->argv
[2]->ptr
);
4729 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4730 checkType(c
,o
,REDIS_LIST
)) return;
4734 toremove
= -toremove
;
4737 ln
= fromtail
? list
->tail
: list
->head
;
4739 robj
*ele
= listNodeValue(ln
);
4741 next
= fromtail
? ln
->prev
: ln
->next
;
4742 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4743 listDelNode(list
,ln
);
4746 if (toremove
&& removed
== toremove
) break;
4750 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4751 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4754 /* This is the semantic of this command:
4755 * RPOPLPUSH srclist dstlist:
4756 * IF LLEN(srclist) > 0
4757 * element = RPOP srclist
4758 * LPUSH dstlist element
4765 * The idea is to be able to get an element from a list in a reliable way
4766 * since the element is not just returned but pushed against another list
4767 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4769 static void rpoplpushcommand(redisClient
*c
) {
4774 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4775 checkType(c
,sobj
,REDIS_LIST
)) return;
4776 srclist
= sobj
->ptr
;
4777 ln
= listLast(srclist
);
4780 addReply(c
,shared
.nullbulk
);
4782 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4783 robj
*ele
= listNodeValue(ln
);
4786 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4787 addReply(c
,shared
.wrongtypeerr
);
4791 /* Add the element to the target list (unless it's directly
4792 * passed to some BLPOP-ing client */
4793 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4795 /* Create the list if the key does not exist */
4796 dobj
= createListObject();
4797 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4798 incrRefCount(c
->argv
[2]);
4800 dstlist
= dobj
->ptr
;
4801 listAddNodeHead(dstlist
,ele
);
4805 /* Send the element to the client as reply as well */
4806 addReplyBulk(c
,ele
);
4808 /* Finally remove the element from the source list */
4809 listDelNode(srclist
,ln
);
4810 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4815 /* ==================================== Sets ================================ */
4817 static void saddCommand(redisClient
*c
) {
4820 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4822 set
= createSetObject();
4823 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4824 incrRefCount(c
->argv
[1]);
4826 if (set
->type
!= REDIS_SET
) {
4827 addReply(c
,shared
.wrongtypeerr
);
4831 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4832 incrRefCount(c
->argv
[2]);
4834 addReply(c
,shared
.cone
);
4836 addReply(c
,shared
.czero
);
4840 static void sremCommand(redisClient
*c
) {
4843 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4844 checkType(c
,set
,REDIS_SET
)) return;
4846 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4848 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4849 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4850 addReply(c
,shared
.cone
);
4852 addReply(c
,shared
.czero
);
4856 static void smoveCommand(redisClient
*c
) {
4857 robj
*srcset
, *dstset
;
4859 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4860 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4862 /* If the source key does not exist return 0, if it's of the wrong type
4864 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4865 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4868 /* Error if the destination key is not a set as well */
4869 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4870 addReply(c
,shared
.wrongtypeerr
);
4873 /* Remove the element from the source set */
4874 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4875 /* Key not found in the src set! return zero */
4876 addReply(c
,shared
.czero
);
4879 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4880 deleteKey(c
->db
,c
->argv
[1]);
4882 /* Add the element to the destination set */
4884 dstset
= createSetObject();
4885 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4886 incrRefCount(c
->argv
[2]);
4888 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4889 incrRefCount(c
->argv
[3]);
4890 addReply(c
,shared
.cone
);
4893 static void sismemberCommand(redisClient
*c
) {
4896 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4897 checkType(c
,set
,REDIS_SET
)) return;
4899 if (dictFind(set
->ptr
,c
->argv
[2]))
4900 addReply(c
,shared
.cone
);
4902 addReply(c
,shared
.czero
);
4905 static void scardCommand(redisClient
*c
) {
4909 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4910 checkType(c
,o
,REDIS_SET
)) return;
4913 addReplyUlong(c
,dictSize(s
));
4916 static void spopCommand(redisClient
*c
) {
4920 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4921 checkType(c
,set
,REDIS_SET
)) return;
4923 de
= dictGetRandomKey(set
->ptr
);
4925 addReply(c
,shared
.nullbulk
);
4927 robj
*ele
= dictGetEntryKey(de
);
4929 addReplyBulk(c
,ele
);
4930 dictDelete(set
->ptr
,ele
);
4931 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4932 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4937 static void srandmemberCommand(redisClient
*c
) {
4941 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4942 checkType(c
,set
,REDIS_SET
)) return;
4944 de
= dictGetRandomKey(set
->ptr
);
4946 addReply(c
,shared
.nullbulk
);
4948 robj
*ele
= dictGetEntryKey(de
);
4950 addReplyBulk(c
,ele
);
4954 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4955 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4957 return dictSize(*d1
)-dictSize(*d2
);
4960 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4961 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4964 robj
*lenobj
= NULL
, *dstset
= NULL
;
4965 unsigned long j
, cardinality
= 0;
4967 for (j
= 0; j
< setsnum
; j
++) {
4971 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4972 lookupKeyRead(c
->db
,setskeys
[j
]);
4976 if (deleteKey(c
->db
,dstkey
))
4978 addReply(c
,shared
.czero
);
4980 addReply(c
,shared
.emptymultibulk
);
4984 if (setobj
->type
!= REDIS_SET
) {
4986 addReply(c
,shared
.wrongtypeerr
);
4989 dv
[j
] = setobj
->ptr
;
4991 /* Sort sets from the smallest to largest, this will improve our
4992 * algorithm's performace */
4993 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4995 /* The first thing we should output is the total number of elements...
4996 * since this is a multi-bulk write, but at this stage we don't know
4997 * the intersection set size, so we use a trick, append an empty object
4998 * to the output list and save the pointer to later modify it with the
5001 lenobj
= createObject(REDIS_STRING
,NULL
);
5003 decrRefCount(lenobj
);
5005 /* If we have a target key where to store the resulting set
5006 * create this key with an empty set inside */
5007 dstset
= createSetObject();
5010 /* Iterate all the elements of the first (smallest) set, and test
5011 * the element against all the other sets, if at least one set does
5012 * not include the element it is discarded */
5013 di
= dictGetIterator(dv
[0]);
5015 while((de
= dictNext(di
)) != NULL
) {
5018 for (j
= 1; j
< setsnum
; j
++)
5019 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5021 continue; /* at least one set does not contain the member */
5022 ele
= dictGetEntryKey(de
);
5024 addReplyBulk(c
,ele
);
5027 dictAdd(dstset
->ptr
,ele
,NULL
);
5031 dictReleaseIterator(di
);
5034 /* Store the resulting set into the target, if the intersection
5035 * is not an empty set. */
5036 deleteKey(c
->db
,dstkey
);
5037 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5038 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5039 incrRefCount(dstkey
);
5040 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5042 decrRefCount(dstset
);
5043 addReply(c
,shared
.czero
);
5047 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5052 static void sinterCommand(redisClient
*c
) {
5053 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5056 static void sinterstoreCommand(redisClient
*c
) {
5057 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5060 #define REDIS_OP_UNION 0
5061 #define REDIS_OP_DIFF 1
5062 #define REDIS_OP_INTER 2
5064 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5065 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5068 robj
*dstset
= NULL
;
5069 int j
, cardinality
= 0;
5071 for (j
= 0; j
< setsnum
; j
++) {
5075 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5076 lookupKeyRead(c
->db
,setskeys
[j
]);
5081 if (setobj
->type
!= REDIS_SET
) {
5083 addReply(c
,shared
.wrongtypeerr
);
5086 dv
[j
] = setobj
->ptr
;
5089 /* We need a temp set object to store our union. If the dstkey
5090 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5091 * this set object will be the resulting object to set into the target key*/
5092 dstset
= createSetObject();
5094 /* Iterate all the elements of all the sets, add every element a single
5095 * time to the result set */
5096 for (j
= 0; j
< setsnum
; j
++) {
5097 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5098 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5100 di
= dictGetIterator(dv
[j
]);
5102 while((de
= dictNext(di
)) != NULL
) {
5105 /* dictAdd will not add the same element multiple times */
5106 ele
= dictGetEntryKey(de
);
5107 if (op
== REDIS_OP_UNION
|| j
== 0) {
5108 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5112 } else if (op
== REDIS_OP_DIFF
) {
5113 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5118 dictReleaseIterator(di
);
5120 /* result set is empty? Exit asap. */
5121 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5124 /* Output the content of the resulting set, if not in STORE mode */
5126 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5127 di
= dictGetIterator(dstset
->ptr
);
5128 while((de
= dictNext(di
)) != NULL
) {
5131 ele
= dictGetEntryKey(de
);
5132 addReplyBulk(c
,ele
);
5134 dictReleaseIterator(di
);
5135 decrRefCount(dstset
);
5137 /* If we have a target key where to store the resulting set
5138 * create this key with the result set inside */
5139 deleteKey(c
->db
,dstkey
);
5140 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5141 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5142 incrRefCount(dstkey
);
5143 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5145 decrRefCount(dstset
);
5146 addReply(c
,shared
.czero
);
5153 static void sunionCommand(redisClient
*c
) {
5154 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5157 static void sunionstoreCommand(redisClient
*c
) {
5158 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5161 static void sdiffCommand(redisClient
*c
) {
5162 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5165 static void sdiffstoreCommand(redisClient
*c
) {
5166 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5169 /* ==================================== ZSets =============================== */
5171 /* ZSETs are ordered sets using two data structures to hold the same elements
5172 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5175 * The elements are added to an hash table mapping Redis objects to scores.
5176 * At the same time the elements are added to a skip list mapping scores
5177 * to Redis objects (so objects are sorted by scores in this "view"). */
5179 /* This skiplist implementation is almost a C translation of the original
5180 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5181 * Alternative to Balanced Trees", modified in three ways:
5182 * a) this implementation allows for repeated values.
5183 * b) the comparison is not just by key (our 'score') but by satellite data.
5184 * c) there is a back pointer, so it's a doubly linked list with the back
5185 * pointers being only at "level 1". This allows to traverse the list
5186 * from tail to head, useful for ZREVRANGE. */
5188 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5189 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5191 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5193 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5199 static zskiplist
*zslCreate(void) {
5203 zsl
= zmalloc(sizeof(*zsl
));
5206 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5207 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5208 zsl
->header
->forward
[j
] = NULL
;
5210 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5211 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5212 zsl
->header
->span
[j
] = 0;
5214 zsl
->header
->backward
= NULL
;
5219 static void zslFreeNode(zskiplistNode
*node
) {
5220 decrRefCount(node
->obj
);
5221 zfree(node
->forward
);
5226 static void zslFree(zskiplist
*zsl
) {
5227 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5229 zfree(zsl
->header
->forward
);
5230 zfree(zsl
->header
->span
);
5233 next
= node
->forward
[0];
5240 static int zslRandomLevel(void) {
5242 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5244 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5247 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5248 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5249 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5253 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5254 /* store rank that is crossed to reach the insert position */
5255 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5257 while (x
->forward
[i
] &&
5258 (x
->forward
[i
]->score
< score
||
5259 (x
->forward
[i
]->score
== score
&&
5260 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5261 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5266 /* we assume the key is not already inside, since we allow duplicated
5267 * scores, and the re-insertion of score and redis object should never
5268 * happpen since the caller of zslInsert() should test in the hash table
5269 * if the element is already inside or not. */
5270 level
= zslRandomLevel();
5271 if (level
> zsl
->level
) {
5272 for (i
= zsl
->level
; i
< level
; i
++) {
5274 update
[i
] = zsl
->header
;
5275 update
[i
]->span
[i
-1] = zsl
->length
;
5279 x
= zslCreateNode(level
,score
,obj
);
5280 for (i
= 0; i
< level
; i
++) {
5281 x
->forward
[i
] = update
[i
]->forward
[i
];
5282 update
[i
]->forward
[i
] = x
;
5284 /* update span covered by update[i] as x is inserted here */
5286 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5287 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5291 /* increment span for untouched levels */
5292 for (i
= level
; i
< zsl
->level
; i
++) {
5293 update
[i
]->span
[i
-1]++;
5296 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5298 x
->forward
[0]->backward
= x
;
5304 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5305 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5307 for (i
= 0; i
< zsl
->level
; i
++) {
5308 if (update
[i
]->forward
[i
] == x
) {
5310 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5312 update
[i
]->forward
[i
] = x
->forward
[i
];
5314 /* invariant: i > 0, because update[0]->forward[0]
5315 * is always equal to x */
5316 update
[i
]->span
[i
-1] -= 1;
5319 if (x
->forward
[0]) {
5320 x
->forward
[0]->backward
= x
->backward
;
5322 zsl
->tail
= x
->backward
;
5324 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5329 /* Delete an element with matching score/object from the skiplist. */
5330 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5331 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5335 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5336 while (x
->forward
[i
] &&
5337 (x
->forward
[i
]->score
< score
||
5338 (x
->forward
[i
]->score
== score
&&
5339 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5343 /* We may have multiple elements with the same score, what we need
5344 * is to find the element with both the right score and object. */
5346 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5347 zslDeleteNode(zsl
, x
, update
);
5351 return 0; /* not found */
5353 return 0; /* not found */
5356 /* Delete all the elements with score between min and max from the skiplist.
5357 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5358 * Note that this function takes the reference to the hash table view of the
5359 * sorted set, in order to remove the elements from the hash table too. */
5360 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5361 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5362 unsigned long removed
= 0;
5366 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5367 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5371 /* We may have multiple elements with the same score, what we need
5372 * is to find the element with both the right score and object. */
5374 while (x
&& x
->score
<= max
) {
5375 zskiplistNode
*next
= x
->forward
[0];
5376 zslDeleteNode(zsl
, x
, update
);
5377 dictDelete(dict
,x
->obj
);
5382 return removed
; /* not found */
5385 /* Delete all the elements with rank between start and end from the skiplist.
5386 * Start and end are inclusive. Note that start and end need to be 1-based */
5387 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5388 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5389 unsigned long traversed
= 0, removed
= 0;
5393 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5394 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5395 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5403 while (x
&& traversed
<= end
) {
5404 zskiplistNode
*next
= x
->forward
[0];
5405 zslDeleteNode(zsl
, x
, update
);
5406 dictDelete(dict
,x
->obj
);
5415 /* Find the first node having a score equal or greater than the specified one.
5416 * Returns NULL if there is no match. */
5417 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5422 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5423 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5426 /* We may have multiple elements with the same score, what we need
5427 * is to find the element with both the right score and object. */
5428 return x
->forward
[0];
5431 /* Find the rank for an element by both score and key.
5432 * Returns 0 when the element cannot be found, rank otherwise.
5433 * Note that the rank is 1-based due to the span of zsl->header to the
5435 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5437 unsigned long rank
= 0;
5441 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5442 while (x
->forward
[i
] &&
5443 (x
->forward
[i
]->score
< score
||
5444 (x
->forward
[i
]->score
== score
&&
5445 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5446 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5450 /* x might be equal to zsl->header, so test if obj is non-NULL */
5451 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5458 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5459 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5461 unsigned long traversed
= 0;
5465 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5466 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5468 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5471 if (traversed
== rank
) {
5478 /* The actual Z-commands implementations */
5480 /* This generic command implements both ZADD and ZINCRBY.
5481 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5482 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5483 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5488 zsetobj
= lookupKeyWrite(c
->db
,key
);
5489 if (zsetobj
== NULL
) {
5490 zsetobj
= createZsetObject();
5491 dictAdd(c
->db
->dict
,key
,zsetobj
);
5494 if (zsetobj
->type
!= REDIS_ZSET
) {
5495 addReply(c
,shared
.wrongtypeerr
);
5501 /* Ok now since we implement both ZADD and ZINCRBY here the code
5502 * needs to handle the two different conditions. It's all about setting
5503 * '*score', that is, the new score to set, to the right value. */
5504 score
= zmalloc(sizeof(double));
5508 /* Read the old score. If the element was not present starts from 0 */
5509 de
= dictFind(zs
->dict
,ele
);
5511 double *oldscore
= dictGetEntryVal(de
);
5512 *score
= *oldscore
+ scoreval
;
5520 /* What follows is a simple remove and re-insert operation that is common
5521 * to both ZADD and ZINCRBY... */
5522 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5523 /* case 1: New element */
5524 incrRefCount(ele
); /* added to hash */
5525 zslInsert(zs
->zsl
,*score
,ele
);
5526 incrRefCount(ele
); /* added to skiplist */
5529 addReplyDouble(c
,*score
);
5531 addReply(c
,shared
.cone
);
5536 /* case 2: Score update operation */
5537 de
= dictFind(zs
->dict
,ele
);
5538 redisAssert(de
!= NULL
);
5539 oldscore
= dictGetEntryVal(de
);
5540 if (*score
!= *oldscore
) {
5543 /* Remove and insert the element in the skip list with new score */
5544 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5545 redisAssert(deleted
!= 0);
5546 zslInsert(zs
->zsl
,*score
,ele
);
5548 /* Update the score in the hash table */
5549 dictReplace(zs
->dict
,ele
,score
);
5555 addReplyDouble(c
,*score
);
5557 addReply(c
,shared
.czero
);
5561 static void zaddCommand(redisClient
*c
) {
5564 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5565 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5568 static void zincrbyCommand(redisClient
*c
) {
5571 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
5572 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5575 static void zremCommand(redisClient
*c
) {
5582 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5583 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5586 de
= dictFind(zs
->dict
,c
->argv
[2]);
5588 addReply(c
,shared
.czero
);
5591 /* Delete from the skiplist */
5592 oldscore
= dictGetEntryVal(de
);
5593 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5594 redisAssert(deleted
!= 0);
5596 /* Delete from the hash table */
5597 dictDelete(zs
->dict
,c
->argv
[2]);
5598 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5599 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5601 addReply(c
,shared
.cone
);
5604 static void zremrangebyscoreCommand(redisClient
*c
) {
5611 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
5612 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
5614 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5615 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5618 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5619 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5620 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5621 server
.dirty
+= deleted
;
5622 addReplyLong(c
,deleted
);
5625 static void zremrangebyrankCommand(redisClient
*c
) {
5633 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5634 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5636 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5637 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5639 llen
= zs
->zsl
->length
;
5641 /* convert negative indexes */
5642 if (start
< 0) start
= llen
+start
;
5643 if (end
< 0) end
= llen
+end
;
5644 if (start
< 0) start
= 0;
5645 if (end
< 0) end
= 0;
5647 /* indexes sanity checks */
5648 if (start
> end
|| start
>= llen
) {
5649 addReply(c
,shared
.czero
);
5652 if (end
>= llen
) end
= llen
-1;
5654 /* increment start and end because zsl*Rank functions
5655 * use 1-based rank */
5656 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5657 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5658 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5659 server
.dirty
+= deleted
;
5660 addReplyLong(c
, deleted
);
5668 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5669 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5670 unsigned long size1
, size2
;
5671 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5672 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5673 return size1
- size2
;
5676 #define REDIS_AGGR_SUM 1
5677 #define REDIS_AGGR_MIN 2
5678 #define REDIS_AGGR_MAX 3
5680 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5681 if (aggregate
== REDIS_AGGR_SUM
) {
5682 *target
= *target
+ val
;
5683 } else if (aggregate
== REDIS_AGGR_MIN
) {
5684 *target
= val
< *target
? val
: *target
;
5685 } else if (aggregate
== REDIS_AGGR_MAX
) {
5686 *target
= val
> *target
? val
: *target
;
5689 redisPanic("Unknown ZUNION/INTER aggregate type");
5693 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5695 int aggregate
= REDIS_AGGR_SUM
;
5702 /* expect zsetnum input keys to be given */
5703 zsetnum
= atoi(c
->argv
[2]->ptr
);
5705 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5709 /* test if the expected number of keys would overflow */
5710 if (3+zsetnum
> c
->argc
) {
5711 addReply(c
,shared
.syntaxerr
);
5715 /* read keys to be used for input */
5716 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5717 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5718 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5722 if (zsetobj
->type
!= REDIS_ZSET
) {
5724 addReply(c
,shared
.wrongtypeerr
);
5727 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5730 /* default all weights to 1 */
5731 src
[i
].weight
= 1.0;
5734 /* parse optional extra arguments */
5736 int remaining
= c
->argc
- j
;
5739 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5741 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5742 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
5745 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5747 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5748 aggregate
= REDIS_AGGR_SUM
;
5749 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5750 aggregate
= REDIS_AGGR_MIN
;
5751 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5752 aggregate
= REDIS_AGGR_MAX
;
5755 addReply(c
,shared
.syntaxerr
);
5761 addReply(c
,shared
.syntaxerr
);
5767 /* sort sets from the smallest to largest, this will improve our
5768 * algorithm's performance */
5769 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5771 dstobj
= createZsetObject();
5772 dstzset
= dstobj
->ptr
;
5774 if (op
== REDIS_OP_INTER
) {
5775 /* skip going over all entries if the smallest zset is NULL or empty */
5776 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5777 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5778 * from small to large, all src[i > 0].dict are non-empty too */
5779 di
= dictGetIterator(src
[0].dict
);
5780 while((de
= dictNext(di
)) != NULL
) {
5781 double *score
= zmalloc(sizeof(double)), value
;
5782 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5784 for (j
= 1; j
< zsetnum
; j
++) {
5785 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5787 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5788 zunionInterAggregate(score
, value
, aggregate
);
5794 /* skip entry when not present in every source dict */
5798 robj
*o
= dictGetEntryKey(de
);
5799 dictAdd(dstzset
->dict
,o
,score
);
5800 incrRefCount(o
); /* added to dictionary */
5801 zslInsert(dstzset
->zsl
,*score
,o
);
5802 incrRefCount(o
); /* added to skiplist */
5805 dictReleaseIterator(di
);
5807 } else if (op
== REDIS_OP_UNION
) {
5808 for (i
= 0; i
< zsetnum
; i
++) {
5809 if (!src
[i
].dict
) continue;
5811 di
= dictGetIterator(src
[i
].dict
);
5812 while((de
= dictNext(di
)) != NULL
) {
5813 /* skip key when already processed */
5814 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5816 double *score
= zmalloc(sizeof(double)), value
;
5817 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5819 /* because the zsets are sorted by size, its only possible
5820 * for sets at larger indices to hold this entry */
5821 for (j
= (i
+1); j
< zsetnum
; j
++) {
5822 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5824 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5825 zunionInterAggregate(score
, value
, aggregate
);
5829 robj
*o
= dictGetEntryKey(de
);
5830 dictAdd(dstzset
->dict
,o
,score
);
5831 incrRefCount(o
); /* added to dictionary */
5832 zslInsert(dstzset
->zsl
,*score
,o
);
5833 incrRefCount(o
); /* added to skiplist */
5835 dictReleaseIterator(di
);
5838 /* unknown operator */
5839 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5842 deleteKey(c
->db
,dstkey
);
5843 if (dstzset
->zsl
->length
) {
5844 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5845 incrRefCount(dstkey
);
5846 addReplyLong(c
, dstzset
->zsl
->length
);
5849 decrRefCount(dstobj
);
5850 addReply(c
, shared
.czero
);
5855 static void zunionCommand(redisClient
*c
) {
5856 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5859 static void zinterCommand(redisClient
*c
) {
5860 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5863 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5875 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
5876 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
5878 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5880 } else if (c
->argc
>= 5) {
5881 addReply(c
,shared
.syntaxerr
);
5885 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5886 || checkType(c
,o
,REDIS_ZSET
)) return;
5891 /* convert negative indexes */
5892 if (start
< 0) start
= llen
+start
;
5893 if (end
< 0) end
= llen
+end
;
5894 if (start
< 0) start
= 0;
5895 if (end
< 0) end
= 0;
5897 /* indexes sanity checks */
5898 if (start
> end
|| start
>= llen
) {
5899 /* Out of range start or start > end result in empty list */
5900 addReply(c
,shared
.emptymultibulk
);
5903 if (end
>= llen
) end
= llen
-1;
5904 rangelen
= (end
-start
)+1;
5906 /* check if starting point is trivial, before searching
5907 * the element in log(N) time */
5909 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5912 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5915 /* Return the result in form of a multi-bulk reply */
5916 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5917 withscores
? (rangelen
*2) : rangelen
));
5918 for (j
= 0; j
< rangelen
; j
++) {
5920 addReplyBulk(c
,ele
);
5922 addReplyDouble(c
,ln
->score
);
5923 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5927 static void zrangeCommand(redisClient
*c
) {
5928 zrangeGenericCommand(c
,0);
5931 static void zrevrangeCommand(redisClient
*c
) {
5932 zrangeGenericCommand(c
,1);
5935 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5936 * If justcount is non-zero, just the count is returned. */
5937 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5940 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5941 int offset
= 0, limit
= -1;
5945 /* Parse the min-max interval. If one of the values is prefixed
5946 * by the "(" character, it's considered "open". For instance
5947 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5948 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5949 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5950 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5953 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5955 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5956 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5959 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5962 /* Parse "WITHSCORES": note that if the command was called with
5963 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5964 * enter the following paths to parse WITHSCORES and LIMIT. */
5965 if (c
->argc
== 5 || c
->argc
== 8) {
5966 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5971 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5975 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5980 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5981 addReply(c
,shared
.syntaxerr
);
5983 } else if (c
->argc
== (7 + withscores
)) {
5984 offset
= atoi(c
->argv
[5]->ptr
);
5985 limit
= atoi(c
->argv
[6]->ptr
);
5986 if (offset
< 0) offset
= 0;
5989 /* Ok, lookup the key and get the range */
5990 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5992 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5994 if (o
->type
!= REDIS_ZSET
) {
5995 addReply(c
,shared
.wrongtypeerr
);
5997 zset
*zsetobj
= o
->ptr
;
5998 zskiplist
*zsl
= zsetobj
->zsl
;
6000 robj
*ele
, *lenobj
= NULL
;
6001 unsigned long rangelen
= 0;
6003 /* Get the first node with the score >= min, or with
6004 * score > min if 'minex' is true. */
6005 ln
= zslFirstWithScore(zsl
,min
);
6006 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6009 /* No element matching the speciifed interval */
6010 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6014 /* We don't know in advance how many matching elements there
6015 * are in the list, so we push this object that will represent
6016 * the multi-bulk length in the output buffer, and will "fix"
6019 lenobj
= createObject(REDIS_STRING
,NULL
);
6021 decrRefCount(lenobj
);
6024 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6027 ln
= ln
->forward
[0];
6030 if (limit
== 0) break;
6033 addReplyBulk(c
,ele
);
6035 addReplyDouble(c
,ln
->score
);
6037 ln
= ln
->forward
[0];
6039 if (limit
> 0) limit
--;
6042 addReplyLong(c
,(long)rangelen
);
6044 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6045 withscores
? (rangelen
*2) : rangelen
);
6051 static void zrangebyscoreCommand(redisClient
*c
) {
6052 genericZrangebyscoreCommand(c
,0);
6055 static void zcountCommand(redisClient
*c
) {
6056 genericZrangebyscoreCommand(c
,1);
6059 static void zcardCommand(redisClient
*c
) {
6063 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6064 checkType(c
,o
,REDIS_ZSET
)) return;
6067 addReplyUlong(c
,zs
->zsl
->length
);
6070 static void zscoreCommand(redisClient
*c
) {
6075 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6076 checkType(c
,o
,REDIS_ZSET
)) return;
6079 de
= dictFind(zs
->dict
,c
->argv
[2]);
6081 addReply(c
,shared
.nullbulk
);
6083 double *score
= dictGetEntryVal(de
);
6085 addReplyDouble(c
,*score
);
6089 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6097 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6098 checkType(c
,o
,REDIS_ZSET
)) return;
6102 de
= dictFind(zs
->dict
,c
->argv
[2]);
6104 addReply(c
,shared
.nullbulk
);
6108 score
= dictGetEntryVal(de
);
6109 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6112 addReplyLong(c
, zsl
->length
- rank
);
6114 addReplyLong(c
, rank
-1);
6117 addReply(c
,shared
.nullbulk
);
6121 static void zrankCommand(redisClient
*c
) {
6122 zrankGenericCommand(c
, 0);
6125 static void zrevrankCommand(redisClient
*c
) {
6126 zrankGenericCommand(c
, 1);
6129 /* ========================= Hashes utility functions ======================= */
6130 #define REDIS_HASH_KEY 1
6131 #define REDIS_HASH_VALUE 2
6133 /* Check the length of a number of objects to see if we need to convert a
6134 * zipmap to a real hash. Note that we only check string encoded objects
6135 * as their string length can be queried in constant time. */
6136 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6138 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6140 for (i
= start
; i
<= end
; i
++) {
6141 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6142 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6144 convertToRealHash(subject
);
6150 /* Encode given objects in-place when the hash uses a dict. */
6151 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6152 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6153 if (o1
) *o1
= tryObjectEncoding(*o1
);
6154 if (o2
) *o2
= tryObjectEncoding(*o2
);
6158 /* Get the value from a hash identified by key. Returns either a string
6159 * object or NULL if the value cannot be found. The refcount of the object
6160 * is always increased by 1 when the value was found. */
6161 static robj
*hashGet(robj
*o
, robj
*key
) {
6163 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6166 key
= getDecodedObject(key
);
6167 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6168 value
= createStringObject((char*)v
,vlen
);
6172 dictEntry
*de
= dictFind(o
->ptr
,key
);
6174 value
= dictGetEntryVal(de
);
6175 incrRefCount(value
);
6181 /* Test if the key exists in the given hash. Returns 1 if the key
6182 * exists and 0 when it doesn't. */
6183 static int hashExists(robj
*o
, robj
*key
) {
6184 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6185 key
= getDecodedObject(key
);
6186 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6192 if (dictFind(o
->ptr
,key
) != NULL
) {
6199 /* Add an element, discard the old if the key already exists.
6200 * Return 0 on insert and 1 on update. */
6201 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6203 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6204 key
= getDecodedObject(key
);
6205 value
= getDecodedObject(value
);
6206 o
->ptr
= zipmapSet(o
->ptr
,
6207 key
->ptr
,sdslen(key
->ptr
),
6208 value
->ptr
,sdslen(value
->ptr
), &update
);
6210 decrRefCount(value
);
6212 /* Check if the zipmap needs to be upgraded to a real hash table */
6213 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6214 convertToRealHash(o
);
6216 if (dictReplace(o
->ptr
,key
,value
)) {
6223 incrRefCount(value
);
6228 /* Delete an element from a hash.
6229 * Return 1 on deleted and 0 on not found. */
6230 static int hashDelete(robj
*o
, robj
*key
) {
6232 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6233 key
= getDecodedObject(key
);
6234 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6237 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6238 /* Always check if the dictionary needs a resize after a delete. */
6239 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6244 /* Return the number of elements in a hash. */
6245 static unsigned long hashLength(robj
*o
) {
6246 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6247 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6250 /* Structure to hold hash iteration abstration. Note that iteration over
6251 * hashes involves both fields and values. Because it is possible that
6252 * not both are required, store pointers in the iterator to avoid
6253 * unnecessary memory allocation for fields/values. */
6257 unsigned char *zk
, *zv
;
6258 unsigned int zklen
, zvlen
;
6264 static hashIterator
*hashInitIterator(robj
*subject
) {
6265 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6266 hi
->encoding
= subject
->encoding
;
6267 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6268 hi
->zi
= zipmapRewind(subject
->ptr
);
6269 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6270 hi
->di
= dictGetIterator(subject
->ptr
);
6277 static void hashReleaseIterator(hashIterator
*hi
) {
6278 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6279 dictReleaseIterator(hi
->di
);
6284 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6285 * could be found and REDIS_ERR when the iterator reaches the end. */
6286 static int hashNext(hashIterator
*hi
) {
6287 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6288 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6289 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6291 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6296 /* Get key or value object at current iteration position.
6297 * This increases the refcount of the field object by 1. */
6298 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6300 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6301 if (what
& REDIS_HASH_KEY
) {
6302 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6304 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6307 if (what
& REDIS_HASH_KEY
) {
6308 o
= dictGetEntryKey(hi
->de
);
6310 o
= dictGetEntryVal(hi
->de
);
6317 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6318 robj
*o
= lookupKeyWrite(c
->db
,key
);
6320 o
= createHashObject();
6321 dictAdd(c
->db
->dict
,key
,o
);
6324 if (o
->type
!= REDIS_HASH
) {
6325 addReply(c
,shared
.wrongtypeerr
);
6332 /* ============================= Hash commands ============================== */
6333 static void hsetCommand(redisClient
*c
) {
6337 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6338 hashTryConversion(o
,c
->argv
,2,3);
6339 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6340 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6341 addReply(c
, update
? shared
.czero
: shared
.cone
);
6345 static void hsetnxCommand(redisClient
*c
) {
6347 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6348 hashTryConversion(o
,c
->argv
,2,3);
6350 if (hashExists(o
, c
->argv
[2])) {
6351 addReply(c
, shared
.czero
);
6353 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6354 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6355 addReply(c
, shared
.cone
);
6360 static void hmsetCommand(redisClient
*c
) {
6364 if ((c
->argc
% 2) == 1) {
6365 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6369 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6370 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6371 for (i
= 2; i
< c
->argc
; i
+= 2) {
6372 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6373 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6375 addReply(c
, shared
.ok
);
6379 static void hincrbyCommand(redisClient
*c
) {
6380 long long value
, incr
;
6381 robj
*o
, *current
, *new;
6383 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6384 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6385 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6386 if (current
->encoding
== REDIS_ENCODING_RAW
)
6387 value
= strtoll(current
->ptr
,NULL
,10);
6388 else if (current
->encoding
== REDIS_ENCODING_INT
)
6389 value
= (long)current
->ptr
;
6391 redisAssert(1 != 1);
6392 decrRefCount(current
);
6398 new = createStringObjectFromLongLong(value
);
6399 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6400 hashSet(o
,c
->argv
[2],new);
6402 addReplyLongLong(c
,value
);
6406 static void hgetCommand(redisClient
*c
) {
6408 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6409 checkType(c
,o
,REDIS_HASH
)) return;
6411 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
6412 addReplyBulk(c
,value
);
6413 decrRefCount(value
);
6415 addReply(c
,shared
.nullbulk
);
6419 static void hmgetCommand(redisClient
*c
) {
6422 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6423 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
6424 addReply(c
,shared
.wrongtypeerr
);
6427 /* Note the check for o != NULL happens inside the loop. This is
6428 * done because objects that cannot be found are considered to be
6429 * an empty hash. The reply should then be a series of NULLs. */
6430 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6431 for (i
= 2; i
< c
->argc
; i
++) {
6432 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
6433 addReplyBulk(c
,value
);
6434 decrRefCount(value
);
6436 addReply(c
,shared
.nullbulk
);
6441 static void hdelCommand(redisClient
*c
) {
6443 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6444 checkType(c
,o
,REDIS_HASH
)) return;
6446 if (hashDelete(o
,c
->argv
[2])) {
6447 if (hashLength(o
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6448 addReply(c
,shared
.cone
);
6451 addReply(c
,shared
.czero
);
6455 static void hlenCommand(redisClient
*c
) {
6457 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6458 checkType(c
,o
,REDIS_HASH
)) return;
6460 addReplyUlong(c
,hashLength(o
));
6463 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6464 robj
*o
, *lenobj
, *obj
;
6465 unsigned long count
= 0;
6468 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6469 || checkType(c
,o
,REDIS_HASH
)) return;
6471 lenobj
= createObject(REDIS_STRING
,NULL
);
6473 decrRefCount(lenobj
);
6475 hi
= hashInitIterator(o
);
6476 while (hashNext(hi
) != REDIS_ERR
) {
6477 if (flags
& REDIS_HASH_KEY
) {
6478 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
6479 addReplyBulk(c
,obj
);
6483 if (flags
& REDIS_HASH_VALUE
) {
6484 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
6485 addReplyBulk(c
,obj
);
6490 hashReleaseIterator(hi
);
6492 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6495 static void hkeysCommand(redisClient
*c
) {
6496 genericHgetallCommand(c
,REDIS_HASH_KEY
);
6499 static void hvalsCommand(redisClient
*c
) {
6500 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
6503 static void hgetallCommand(redisClient
*c
) {
6504 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
6507 static void hexistsCommand(redisClient
*c
) {
6509 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6510 checkType(c
,o
,REDIS_HASH
)) return;
6512 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
6515 static void convertToRealHash(robj
*o
) {
6516 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6517 unsigned int klen
, vlen
;
6518 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6520 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6521 p
= zipmapRewind(zm
);
6522 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6523 robj
*keyobj
, *valobj
;
6525 keyobj
= createStringObject((char*)key
,klen
);
6526 valobj
= createStringObject((char*)val
,vlen
);
6527 keyobj
= tryObjectEncoding(keyobj
);
6528 valobj
= tryObjectEncoding(valobj
);
6529 dictAdd(dict
,keyobj
,valobj
);
6531 o
->encoding
= REDIS_ENCODING_HT
;
6536 /* ========================= Non type-specific commands ==================== */
6538 static void flushdbCommand(redisClient
*c
) {
6539 server
.dirty
+= dictSize(c
->db
->dict
);
6540 dictEmpty(c
->db
->dict
);
6541 dictEmpty(c
->db
->expires
);
6542 addReply(c
,shared
.ok
);
6545 static void flushallCommand(redisClient
*c
) {
6546 server
.dirty
+= emptyDb();
6547 addReply(c
,shared
.ok
);
6548 if (server
.bgsavechildpid
!= -1) {
6549 kill(server
.bgsavechildpid
,SIGKILL
);
6550 rdbRemoveTempFile(server
.bgsavechildpid
);
6552 rdbSave(server
.dbfilename
);
6556 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6557 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6559 so
->pattern
= pattern
;
6563 /* Return the value associated to the key with a name obtained
6564 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6565 * The returned object will always have its refcount increased by 1
6566 * when it is non-NULL. */
6567 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6570 robj keyobj
, fieldobj
, *o
;
6571 int prefixlen
, sublen
, postfixlen
, fieldlen
;
6572 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6576 char buf
[REDIS_SORTKEY_MAX
+1];
6577 } keyname
, fieldname
;
6579 /* If the pattern is "#" return the substitution object itself in order
6580 * to implement the "SORT ... GET #" feature. */
6581 spat
= pattern
->ptr
;
6582 if (spat
[0] == '#' && spat
[1] == '\0') {
6583 incrRefCount(subst
);
6587 /* The substitution object may be specially encoded. If so we create
6588 * a decoded object on the fly. Otherwise getDecodedObject will just
6589 * increment the ref count, that we'll decrement later. */
6590 subst
= getDecodedObject(subst
);
6593 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6594 p
= strchr(spat
,'*');
6596 decrRefCount(subst
);
6600 /* Find out if we're dealing with a hash dereference. */
6601 if ((f
= strstr(p
+1, "->")) != NULL
) {
6602 fieldlen
= sdslen(spat
)-(f
-spat
);
6603 /* this also copies \0 character */
6604 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
6605 fieldname
.len
= fieldlen
-2;
6611 sublen
= sdslen(ssub
);
6612 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
6613 memcpy(keyname
.buf
,spat
,prefixlen
);
6614 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6615 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6616 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6617 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6618 decrRefCount(subst
);
6620 /* Lookup substituted key */
6621 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
6622 o
= lookupKeyRead(db
,&keyobj
);
6623 if (o
== NULL
) return NULL
;
6626 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
6628 /* Retrieve value from hash by the field name. This operation
6629 * already increases the refcount of the returned object. */
6630 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
6631 o
= hashGet(o
, &fieldobj
);
6633 if (o
->type
!= REDIS_STRING
) return NULL
;
6635 /* Every object that this function returns needs to have its refcount
6636 * increased. sortCommand decreases it again. */
6643 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6644 * the additional parameter is not standard but a BSD-specific we have to
6645 * pass sorting parameters via the global 'server' structure */
6646 static int sortCompare(const void *s1
, const void *s2
) {
6647 const redisSortObject
*so1
= s1
, *so2
= s2
;
6650 if (!server
.sort_alpha
) {
6651 /* Numeric sorting. Here it's trivial as we precomputed scores */
6652 if (so1
->u
.score
> so2
->u
.score
) {
6654 } else if (so1
->u
.score
< so2
->u
.score
) {
6660 /* Alphanumeric sorting */
6661 if (server
.sort_bypattern
) {
6662 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6663 /* At least one compare object is NULL */
6664 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6666 else if (so1
->u
.cmpobj
== NULL
)
6671 /* We have both the objects, use strcoll */
6672 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6675 /* Compare elements directly. */
6676 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
6679 return server
.sort_desc
? -cmp
: cmp
;
6682 /* The SORT command is the most complex command in Redis. Warning: this code
6683 * is optimized for speed and a bit less for readability */
6684 static void sortCommand(redisClient
*c
) {
6687 int desc
= 0, alpha
= 0;
6688 int limit_start
= 0, limit_count
= -1, start
, end
;
6689 int j
, dontsort
= 0, vectorlen
;
6690 int getop
= 0; /* GET operation counter */
6691 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6692 redisSortObject
*vector
; /* Resulting vector to sort */
6694 /* Lookup the key to sort. It must be of the right types */
6695 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6696 if (sortval
== NULL
) {
6697 addReply(c
,shared
.emptymultibulk
);
6700 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6701 sortval
->type
!= REDIS_ZSET
)
6703 addReply(c
,shared
.wrongtypeerr
);
6707 /* Create a list of operations to perform for every sorted element.
6708 * Operations can be GET/DEL/INCR/DECR */
6709 operations
= listCreate();
6710 listSetFreeMethod(operations
,zfree
);
6713 /* Now we need to protect sortval incrementing its count, in the future
6714 * SORT may have options able to overwrite/delete keys during the sorting
6715 * and the sorted key itself may get destroied */
6716 incrRefCount(sortval
);
6718 /* The SORT command has an SQL-alike syntax, parse it */
6719 while(j
< c
->argc
) {
6720 int leftargs
= c
->argc
-j
-1;
6721 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6723 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6725 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6727 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6728 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6729 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6731 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6732 storekey
= c
->argv
[j
+1];
6734 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6735 sortby
= c
->argv
[j
+1];
6736 /* If the BY pattern does not contain '*', i.e. it is constant,
6737 * we don't need to sort nor to lookup the weight keys. */
6738 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6740 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6741 listAddNodeTail(operations
,createSortOperation(
6742 REDIS_SORT_GET
,c
->argv
[j
+1]));
6746 decrRefCount(sortval
);
6747 listRelease(operations
);
6748 addReply(c
,shared
.syntaxerr
);
6754 /* Load the sorting vector with all the objects to sort */
6755 switch(sortval
->type
) {
6756 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6757 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6758 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6759 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
6761 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6764 if (sortval
->type
== REDIS_LIST
) {
6765 list
*list
= sortval
->ptr
;
6769 listRewind(list
,&li
);
6770 while((ln
= listNext(&li
))) {
6771 robj
*ele
= ln
->value
;
6772 vector
[j
].obj
= ele
;
6773 vector
[j
].u
.score
= 0;
6774 vector
[j
].u
.cmpobj
= NULL
;
6782 if (sortval
->type
== REDIS_SET
) {
6785 zset
*zs
= sortval
->ptr
;
6789 di
= dictGetIterator(set
);
6790 while((setele
= dictNext(di
)) != NULL
) {
6791 vector
[j
].obj
= dictGetEntryKey(setele
);
6792 vector
[j
].u
.score
= 0;
6793 vector
[j
].u
.cmpobj
= NULL
;
6796 dictReleaseIterator(di
);
6798 redisAssert(j
== vectorlen
);
6800 /* Now it's time to load the right scores in the sorting vector */
6801 if (dontsort
== 0) {
6802 for (j
= 0; j
< vectorlen
; j
++) {
6805 /* lookup value to sort by */
6806 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6807 if (!byval
) continue;
6809 /* use object itself to sort by */
6810 byval
= vector
[j
].obj
;
6814 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6816 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6817 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6818 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
6819 /* Don't need to decode the object if it's
6820 * integer-encoded (the only encoding supported) so
6821 * far. We can just cast it */
6822 vector
[j
].u
.score
= (long)byval
->ptr
;
6824 redisAssert(1 != 1);
6828 /* when the object was retrieved using lookupKeyByPattern,
6829 * its refcount needs to be decreased. */
6831 decrRefCount(byval
);
6836 /* We are ready to sort the vector... perform a bit of sanity check
6837 * on the LIMIT option too. We'll use a partial version of quicksort. */
6838 start
= (limit_start
< 0) ? 0 : limit_start
;
6839 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6840 if (start
>= vectorlen
) {
6841 start
= vectorlen
-1;
6844 if (end
>= vectorlen
) end
= vectorlen
-1;
6846 if (dontsort
== 0) {
6847 server
.sort_desc
= desc
;
6848 server
.sort_alpha
= alpha
;
6849 server
.sort_bypattern
= sortby
? 1 : 0;
6850 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6851 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6853 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6856 /* Send command output to the output buffer, performing the specified
6857 * GET/DEL/INCR/DECR operations if any. */
6858 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6859 if (storekey
== NULL
) {
6860 /* STORE option not specified, sent the sorting result to client */
6861 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6862 for (j
= start
; j
<= end
; j
++) {
6866 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6867 listRewind(operations
,&li
);
6868 while((ln
= listNext(&li
))) {
6869 redisSortOperation
*sop
= ln
->value
;
6870 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6873 if (sop
->type
== REDIS_SORT_GET
) {
6875 addReply(c
,shared
.nullbulk
);
6877 addReplyBulk(c
,val
);
6881 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6886 robj
*listObject
= createListObject();
6887 list
*listPtr
= (list
*) listObject
->ptr
;
6889 /* STORE option specified, set the sorting result as a List object */
6890 for (j
= start
; j
<= end
; j
++) {
6895 listAddNodeTail(listPtr
,vector
[j
].obj
);
6896 incrRefCount(vector
[j
].obj
);
6898 listRewind(operations
,&li
);
6899 while((ln
= listNext(&li
))) {
6900 redisSortOperation
*sop
= ln
->value
;
6901 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6904 if (sop
->type
== REDIS_SORT_GET
) {
6906 listAddNodeTail(listPtr
,createStringObject("",0));
6908 /* We should do a incrRefCount on val because it is
6909 * added to the list, but also a decrRefCount because
6910 * it is returned by lookupKeyByPattern. This results
6911 * in doing nothing at all. */
6912 listAddNodeTail(listPtr
,val
);
6915 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6919 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6920 incrRefCount(storekey
);
6922 /* Note: we add 1 because the DB is dirty anyway since even if the
6923 * SORT result is empty a new key is set and maybe the old content
6925 server
.dirty
+= 1+outputlen
;
6926 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6930 decrRefCount(sortval
);
6931 listRelease(operations
);
6932 for (j
= 0; j
< vectorlen
; j
++) {
6933 if (alpha
&& vector
[j
].u
.cmpobj
)
6934 decrRefCount(vector
[j
].u
.cmpobj
);
6939 /* Convert an amount of bytes into a human readable string in the form
6940 * of 100B, 2G, 100M, 4K, and so forth. */
6941 static void bytesToHuman(char *s
, unsigned long long n
) {
6946 sprintf(s
,"%lluB",n
);
6948 } else if (n
< (1024*1024)) {
6949 d
= (double)n
/(1024);
6950 sprintf(s
,"%.2fK",d
);
6951 } else if (n
< (1024LL*1024*1024)) {
6952 d
= (double)n
/(1024*1024);
6953 sprintf(s
,"%.2fM",d
);
6954 } else if (n
< (1024LL*1024*1024*1024)) {
6955 d
= (double)n
/(1024LL*1024*1024);
6956 sprintf(s
,"%.2fG",d
);
6960 /* Create the string returned by the INFO command. This is decoupled
6961 * by the INFO command itself as we need to report the same information
6962 * on memory corruption problems. */
6963 static sds
genRedisInfoString(void) {
6965 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6969 bytesToHuman(hmem
,zmalloc_used_memory());
6970 info
= sdscatprintf(sdsempty(),
6971 "redis_version:%s\r\n"
6973 "multiplexing_api:%s\r\n"
6974 "process_id:%ld\r\n"
6975 "uptime_in_seconds:%ld\r\n"
6976 "uptime_in_days:%ld\r\n"
6977 "connected_clients:%d\r\n"
6978 "connected_slaves:%d\r\n"
6979 "blocked_clients:%d\r\n"
6980 "used_memory:%zu\r\n"
6981 "used_memory_human:%s\r\n"
6982 "changes_since_last_save:%lld\r\n"
6983 "bgsave_in_progress:%d\r\n"
6984 "last_save_time:%ld\r\n"
6985 "bgrewriteaof_in_progress:%d\r\n"
6986 "total_connections_received:%lld\r\n"
6987 "total_commands_processed:%lld\r\n"
6988 "expired_keys:%lld\r\n"
6989 "hash_max_zipmap_entries:%ld\r\n"
6990 "hash_max_zipmap_value:%ld\r\n"
6991 "pubsub_channels:%ld\r\n"
6992 "pubsub_patterns:%u\r\n"
6996 (sizeof(long) == 8) ? "64" : "32",
7001 listLength(server
.clients
)-listLength(server
.slaves
),
7002 listLength(server
.slaves
),
7003 server
.blpop_blocked_clients
,
7004 zmalloc_used_memory(),
7007 server
.bgsavechildpid
!= -1,
7009 server
.bgrewritechildpid
!= -1,
7010 server
.stat_numconnections
,
7011 server
.stat_numcommands
,
7012 server
.stat_expiredkeys
,
7013 server
.hash_max_zipmap_entries
,
7014 server
.hash_max_zipmap_value
,
7015 dictSize(server
.pubsub_channels
),
7016 listLength(server
.pubsub_patterns
),
7017 server
.vm_enabled
!= 0,
7018 server
.masterhost
== NULL
? "master" : "slave"
7020 if (server
.masterhost
) {
7021 info
= sdscatprintf(info
,
7022 "master_host:%s\r\n"
7023 "master_port:%d\r\n"
7024 "master_link_status:%s\r\n"
7025 "master_last_io_seconds_ago:%d\r\n"
7028 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7030 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7033 if (server
.vm_enabled
) {
7035 info
= sdscatprintf(info
,
7036 "vm_conf_max_memory:%llu\r\n"
7037 "vm_conf_page_size:%llu\r\n"
7038 "vm_conf_pages:%llu\r\n"
7039 "vm_stats_used_pages:%llu\r\n"
7040 "vm_stats_swapped_objects:%llu\r\n"
7041 "vm_stats_swappin_count:%llu\r\n"
7042 "vm_stats_swappout_count:%llu\r\n"
7043 "vm_stats_io_newjobs_len:%lu\r\n"
7044 "vm_stats_io_processing_len:%lu\r\n"
7045 "vm_stats_io_processed_len:%lu\r\n"
7046 "vm_stats_io_active_threads:%lu\r\n"
7047 "vm_stats_blocked_clients:%lu\r\n"
7048 ,(unsigned long long) server
.vm_max_memory
,
7049 (unsigned long long) server
.vm_page_size
,
7050 (unsigned long long) server
.vm_pages
,
7051 (unsigned long long) server
.vm_stats_used_pages
,
7052 (unsigned long long) server
.vm_stats_swapped_objects
,
7053 (unsigned long long) server
.vm_stats_swapins
,
7054 (unsigned long long) server
.vm_stats_swapouts
,
7055 (unsigned long) listLength(server
.io_newjobs
),
7056 (unsigned long) listLength(server
.io_processing
),
7057 (unsigned long) listLength(server
.io_processed
),
7058 (unsigned long) server
.io_active_threads
,
7059 (unsigned long) server
.vm_blocked_clients
7063 for (j
= 0; j
< server
.dbnum
; j
++) {
7064 long long keys
, vkeys
;
7066 keys
= dictSize(server
.db
[j
].dict
);
7067 vkeys
= dictSize(server
.db
[j
].expires
);
7068 if (keys
|| vkeys
) {
7069 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7076 static void infoCommand(redisClient
*c
) {
7077 sds info
= genRedisInfoString();
7078 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7079 (unsigned long)sdslen(info
)));
7080 addReplySds(c
,info
);
7081 addReply(c
,shared
.crlf
);
7084 static void monitorCommand(redisClient
*c
) {
7085 /* ignore MONITOR if aleady slave or in monitor mode */
7086 if (c
->flags
& REDIS_SLAVE
) return;
7088 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7090 listAddNodeTail(server
.monitors
,c
);
7091 addReply(c
,shared
.ok
);
7094 /* ================================= Expire ================================= */
7095 static int removeExpire(redisDb
*db
, robj
*key
) {
7096 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
7103 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7104 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7112 /* Return the expire time of the specified key, or -1 if no expire
7113 * is associated with this key (i.e. the key is non volatile) */
7114 static time_t getExpire(redisDb
*db
, robj
*key
) {
7117 /* No expire? return ASAP */
7118 if (dictSize(db
->expires
) == 0 ||
7119 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7121 return (time_t) dictGetEntryVal(de
);
7124 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7128 /* No expire? return ASAP */
7129 if (dictSize(db
->expires
) == 0 ||
7130 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7132 /* Lookup the expire */
7133 when
= (time_t) dictGetEntryVal(de
);
7134 if (time(NULL
) <= when
) return 0;
7136 /* Delete the key */
7137 dictDelete(db
->expires
,key
);
7138 server
.stat_expiredkeys
++;
7139 return dictDelete(db
->dict
,key
) == DICT_OK
;
7142 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7145 /* No expire? return ASAP */
7146 if (dictSize(db
->expires
) == 0 ||
7147 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7149 /* Delete the key */
7151 server
.stat_expiredkeys
++;
7152 dictDelete(db
->expires
,key
);
7153 return dictDelete(db
->dict
,key
) == DICT_OK
;
7156 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7160 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7164 de
= dictFind(c
->db
->dict
,key
);
7166 addReply(c
,shared
.czero
);
7170 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7171 addReply(c
, shared
.cone
);
7174 time_t when
= time(NULL
)+seconds
;
7175 if (setExpire(c
->db
,key
,when
)) {
7176 addReply(c
,shared
.cone
);
7179 addReply(c
,shared
.czero
);
7185 static void expireCommand(redisClient
*c
) {
7186 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7189 static void expireatCommand(redisClient
*c
) {
7190 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7193 static void ttlCommand(redisClient
*c
) {
7197 expire
= getExpire(c
->db
,c
->argv
[1]);
7199 ttl
= (int) (expire
-time(NULL
));
7200 if (ttl
< 0) ttl
= -1;
7202 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7205 /* ================================ MULTI/EXEC ============================== */
7207 /* Client state initialization for MULTI/EXEC */
7208 static void initClientMultiState(redisClient
*c
) {
7209 c
->mstate
.commands
= NULL
;
7210 c
->mstate
.count
= 0;
7213 /* Release all the resources associated with MULTI/EXEC state */
7214 static void freeClientMultiState(redisClient
*c
) {
7217 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7219 multiCmd
*mc
= c
->mstate
.commands
+j
;
7221 for (i
= 0; i
< mc
->argc
; i
++)
7222 decrRefCount(mc
->argv
[i
]);
7225 zfree(c
->mstate
.commands
);
7228 /* Add a new command into the MULTI commands queue */
7229 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7233 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7234 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7235 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7238 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7239 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7240 for (j
= 0; j
< c
->argc
; j
++)
7241 incrRefCount(mc
->argv
[j
]);
7245 static void multiCommand(redisClient
*c
) {
7246 c
->flags
|= REDIS_MULTI
;
7247 addReply(c
,shared
.ok
);
7250 static void discardCommand(redisClient
*c
) {
7251 if (!(c
->flags
& REDIS_MULTI
)) {
7252 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7256 freeClientMultiState(c
);
7257 initClientMultiState(c
);
7258 c
->flags
&= (~REDIS_MULTI
);
7259 addReply(c
,shared
.ok
);
7262 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7263 * implememntation for more information. */
7264 static void execCommandReplicateMulti(redisClient
*c
) {
7265 struct redisCommand
*cmd
;
7266 robj
*multistring
= createStringObject("MULTI",5);
7268 cmd
= lookupCommand("multi");
7269 if (server
.appendonly
)
7270 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7271 if (listLength(server
.slaves
))
7272 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7273 decrRefCount(multistring
);
7276 static void execCommand(redisClient
*c
) {
7281 if (!(c
->flags
& REDIS_MULTI
)) {
7282 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7286 /* Replicate a MULTI request now that we are sure the block is executed.
7287 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7288 * both the AOF and the replication link will have the same consistency
7289 * and atomicity guarantees. */
7290 execCommandReplicateMulti(c
);
7292 /* Exec all the queued commands */
7293 orig_argv
= c
->argv
;
7294 orig_argc
= c
->argc
;
7295 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7296 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7297 c
->argc
= c
->mstate
.commands
[j
].argc
;
7298 c
->argv
= c
->mstate
.commands
[j
].argv
;
7299 call(c
,c
->mstate
.commands
[j
].cmd
);
7301 c
->argv
= orig_argv
;
7302 c
->argc
= orig_argc
;
7303 freeClientMultiState(c
);
7304 initClientMultiState(c
);
7305 c
->flags
&= (~REDIS_MULTI
);
7306 /* Make sure the EXEC command is always replicated / AOF, since we
7307 * always send the MULTI command (we can't know beforehand if the
7308 * next operations will contain at least a modification to the DB). */
7312 /* =========================== Blocking Operations ========================= */
7314 /* Currently Redis blocking operations support is limited to list POP ops,
7315 * so the current implementation is not fully generic, but it is also not
7316 * completely specific so it will not require a rewrite to support new
7317 * kind of blocking operations in the future.
7319 * Still it's important to note that list blocking operations can be already
7320 * used as a notification mechanism in order to implement other blocking
7321 * operations at application level, so there must be a very strong evidence
7322 * of usefulness and generality before new blocking operations are implemented.
7324 * This is how the current blocking POP works, we use BLPOP as example:
7325 * - If the user calls BLPOP and the key exists and contains a non empty list
7326 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7327 * if there is not to block.
7328 * - If instead BLPOP is called and the key does not exists or the list is
7329 * empty we need to block. In order to do so we remove the notification for
7330 * new data to read in the client socket (so that we'll not serve new
7331 * requests if the blocking request is not served). Also we put the client
7332 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7333 * blocking for this keys.
7334 * - If a PUSH operation against a key with blocked clients waiting is
7335 * performed, we serve the first in the list: basically instead to push
7336 * the new element inside the list we return it to the (first / oldest)
7337 * blocking client, unblock the client, and remove it form the list.
7339 * The above comment and the source code should be enough in order to understand
7340 * the implementation and modify / fix it later.
7343 /* Set a client in blocking mode for the specified key, with the specified
7345 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7350 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7351 c
->blockingkeysnum
= numkeys
;
7352 c
->blockingto
= timeout
;
7353 for (j
= 0; j
< numkeys
; j
++) {
7354 /* Add the key in the client structure, to map clients -> keys */
7355 c
->blockingkeys
[j
] = keys
[j
];
7356 incrRefCount(keys
[j
]);
7358 /* And in the other "side", to map keys -> clients */
7359 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7363 /* For every key we take a list of clients blocked for it */
7365 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7366 incrRefCount(keys
[j
]);
7367 assert(retval
== DICT_OK
);
7369 l
= dictGetEntryVal(de
);
7371 listAddNodeTail(l
,c
);
7373 /* Mark the client as a blocked client */
7374 c
->flags
|= REDIS_BLOCKED
;
7375 server
.blpop_blocked_clients
++;
7378 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7379 static void unblockClientWaitingData(redisClient
*c
) {
7384 assert(c
->blockingkeys
!= NULL
);
7385 /* The client may wait for multiple keys, so unblock it for every key. */
7386 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7387 /* Remove this client from the list of clients waiting for this key. */
7388 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7390 l
= dictGetEntryVal(de
);
7391 listDelNode(l
,listSearchKey(l
,c
));
7392 /* If the list is empty we need to remove it to avoid wasting memory */
7393 if (listLength(l
) == 0)
7394 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7395 decrRefCount(c
->blockingkeys
[j
]);
7397 /* Cleanup the client structure */
7398 zfree(c
->blockingkeys
);
7399 c
->blockingkeys
= NULL
;
7400 c
->flags
&= (~REDIS_BLOCKED
);
7401 server
.blpop_blocked_clients
--;
7402 /* We want to process data if there is some command waiting
7403 * in the input buffer. Note that this is safe even if
7404 * unblockClientWaitingData() gets called from freeClient() because
7405 * freeClient() will be smart enough to call this function
7406 * *after* c->querybuf was set to NULL. */
7407 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7410 /* This should be called from any function PUSHing into lists.
7411 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7412 * 'ele' is the element pushed.
7414 * If the function returns 0 there was no client waiting for a list push
7417 * If the function returns 1 there was a client waiting for a list push
7418 * against this key, the element was passed to this client thus it's not
7419 * needed to actually add it to the list and the caller should return asap. */
7420 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7421 struct dictEntry
*de
;
7422 redisClient
*receiver
;
7426 de
= dictFind(c
->db
->blockingkeys
,key
);
7427 if (de
== NULL
) return 0;
7428 l
= dictGetEntryVal(de
);
7431 receiver
= ln
->value
;
7433 addReplySds(receiver
,sdsnew("*2\r\n"));
7434 addReplyBulk(receiver
,key
);
7435 addReplyBulk(receiver
,ele
);
7436 unblockClientWaitingData(receiver
);
7440 /* Blocking RPOP/LPOP */
7441 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7446 for (j
= 1; j
< c
->argc
-1; j
++) {
7447 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7449 if (o
->type
!= REDIS_LIST
) {
7450 addReply(c
,shared
.wrongtypeerr
);
7453 list
*list
= o
->ptr
;
7454 if (listLength(list
) != 0) {
7455 /* If the list contains elements fall back to the usual
7456 * non-blocking POP operation */
7457 robj
*argv
[2], **orig_argv
;
7460 /* We need to alter the command arguments before to call
7461 * popGenericCommand() as the command takes a single key. */
7462 orig_argv
= c
->argv
;
7463 orig_argc
= c
->argc
;
7464 argv
[1] = c
->argv
[j
];
7468 /* Also the return value is different, we need to output
7469 * the multi bulk reply header and the key name. The
7470 * "real" command will add the last element (the value)
7471 * for us. If this souds like an hack to you it's just
7472 * because it is... */
7473 addReplySds(c
,sdsnew("*2\r\n"));
7474 addReplyBulk(c
,argv
[1]);
7475 popGenericCommand(c
,where
);
7477 /* Fix the client structure with the original stuff */
7478 c
->argv
= orig_argv
;
7479 c
->argc
= orig_argc
;
7485 /* If the list is empty or the key does not exists we must block */
7486 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7487 if (timeout
> 0) timeout
+= time(NULL
);
7488 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7491 static void blpopCommand(redisClient
*c
) {
7492 blockingPopGenericCommand(c
,REDIS_HEAD
);
7495 static void brpopCommand(redisClient
*c
) {
7496 blockingPopGenericCommand(c
,REDIS_TAIL
);
7499 /* =============================== Replication ============================= */
7501 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7502 ssize_t nwritten
, ret
= size
;
7503 time_t start
= time(NULL
);
7507 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7508 nwritten
= write(fd
,ptr
,size
);
7509 if (nwritten
== -1) return -1;
7513 if ((time(NULL
)-start
) > timeout
) {
7521 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7522 ssize_t nread
, totread
= 0;
7523 time_t start
= time(NULL
);
7527 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7528 nread
= read(fd
,ptr
,size
);
7529 if (nread
== -1) return -1;
7534 if ((time(NULL
)-start
) > timeout
) {
7542 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7549 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7552 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7563 static void syncCommand(redisClient
*c
) {
7564 /* ignore SYNC if aleady slave or in monitor mode */
7565 if (c
->flags
& REDIS_SLAVE
) return;
7567 /* SYNC can't be issued when the server has pending data to send to
7568 * the client about already issued commands. We need a fresh reply
7569 * buffer registering the differences between the BGSAVE and the current
7570 * dataset, so that we can copy to other slaves if needed. */
7571 if (listLength(c
->reply
) != 0) {
7572 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7576 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7577 /* Here we need to check if there is a background saving operation
7578 * in progress, or if it is required to start one */
7579 if (server
.bgsavechildpid
!= -1) {
7580 /* Ok a background save is in progress. Let's check if it is a good
7581 * one for replication, i.e. if there is another slave that is
7582 * registering differences since the server forked to save */
7587 listRewind(server
.slaves
,&li
);
7588 while((ln
= listNext(&li
))) {
7590 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7593 /* Perfect, the server is already registering differences for
7594 * another slave. Set the right state, and copy the buffer. */
7595 listRelease(c
->reply
);
7596 c
->reply
= listDup(slave
->reply
);
7597 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7598 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7600 /* No way, we need to wait for the next BGSAVE in order to
7601 * register differences */
7602 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7603 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7606 /* Ok we don't have a BGSAVE in progress, let's start one */
7607 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7608 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7609 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7610 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7613 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7616 c
->flags
|= REDIS_SLAVE
;
7618 listAddNodeTail(server
.slaves
,c
);
7622 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7623 redisClient
*slave
= privdata
;
7625 REDIS_NOTUSED(mask
);
7626 char buf
[REDIS_IOBUF_LEN
];
7627 ssize_t nwritten
, buflen
;
7629 if (slave
->repldboff
== 0) {
7630 /* Write the bulk write count before to transfer the DB. In theory here
7631 * we don't know how much room there is in the output buffer of the
7632 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7633 * operations) will never be smaller than the few bytes we need. */
7636 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7638 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7646 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7647 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7649 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7650 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7654 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7655 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7660 slave
->repldboff
+= nwritten
;
7661 if (slave
->repldboff
== slave
->repldbsize
) {
7662 close(slave
->repldbfd
);
7663 slave
->repldbfd
= -1;
7664 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7665 slave
->replstate
= REDIS_REPL_ONLINE
;
7666 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7667 sendReplyToClient
, slave
) == AE_ERR
) {
7671 addReplySds(slave
,sdsempty());
7672 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7676 /* This function is called at the end of every backgrond saving.
7677 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7678 * otherwise REDIS_ERR is passed to the function.
7680 * The goal of this function is to handle slaves waiting for a successful
7681 * background saving in order to perform non-blocking synchronization. */
7682 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7684 int startbgsave
= 0;
7687 listRewind(server
.slaves
,&li
);
7688 while((ln
= listNext(&li
))) {
7689 redisClient
*slave
= ln
->value
;
7691 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7693 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7694 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7695 struct redis_stat buf
;
7697 if (bgsaveerr
!= REDIS_OK
) {
7699 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7702 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7703 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7705 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7708 slave
->repldboff
= 0;
7709 slave
->repldbsize
= buf
.st_size
;
7710 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7711 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7712 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7719 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7722 listRewind(server
.slaves
,&li
);
7723 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7724 while((ln
= listNext(&li
))) {
7725 redisClient
*slave
= ln
->value
;
7727 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7734 static int syncWithMaster(void) {
7735 char buf
[1024], tmpfile
[256], authcmd
[1024];
7737 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7738 int dfd
, maxtries
= 5;
7741 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7746 /* AUTH with the master if required. */
7747 if(server
.masterauth
) {
7748 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7749 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7751 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7755 /* Read the AUTH result. */
7756 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7758 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7762 if (buf
[0] != '+') {
7764 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7769 /* Issue the SYNC command */
7770 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7772 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7776 /* Read the bulk write count */
7777 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7779 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7783 if (buf
[0] != '$') {
7785 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7788 dumpsize
= strtol(buf
+1,NULL
,10);
7789 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7790 /* Read the bulk write data on a temp file */
7792 snprintf(tmpfile
,256,
7793 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7794 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7795 if (dfd
!= -1) break;
7800 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7804 int nread
, nwritten
;
7806 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7808 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7814 nwritten
= write(dfd
,buf
,nread
);
7815 if (nwritten
== -1) {
7816 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7824 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7825 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7831 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7832 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7836 server
.master
= createClient(fd
);
7837 server
.master
->flags
|= REDIS_MASTER
;
7838 server
.master
->authenticated
= 1;
7839 server
.replstate
= REDIS_REPL_CONNECTED
;
7843 static void slaveofCommand(redisClient
*c
) {
7844 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7845 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7846 if (server
.masterhost
) {
7847 sdsfree(server
.masterhost
);
7848 server
.masterhost
= NULL
;
7849 if (server
.master
) freeClient(server
.master
);
7850 server
.replstate
= REDIS_REPL_NONE
;
7851 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7854 sdsfree(server
.masterhost
);
7855 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7856 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7857 if (server
.master
) freeClient(server
.master
);
7858 server
.replstate
= REDIS_REPL_CONNECT
;
7859 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7860 server
.masterhost
, server
.masterport
);
7862 addReply(c
,shared
.ok
);
7865 /* ============================ Maxmemory directive ======================== */
7867 /* Try to free one object form the pre-allocated objects free list.
7868 * This is useful under low mem conditions as by default we take 1 million
7869 * free objects allocated. On success REDIS_OK is returned, otherwise
7871 static int tryFreeOneObjectFromFreelist(void) {
7874 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7875 if (listLength(server
.objfreelist
)) {
7876 listNode
*head
= listFirst(server
.objfreelist
);
7877 o
= listNodeValue(head
);
7878 listDelNode(server
.objfreelist
,head
);
7879 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7883 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7888 /* This function gets called when 'maxmemory' is set on the config file to limit
7889 * the max memory used by the server, and we are out of memory.
7890 * This function will try to, in order:
7892 * - Free objects from the free list
7893 * - Try to remove keys with an EXPIRE set
7895 * It is not possible to free enough memory to reach used-memory < maxmemory
7896 * the server will start refusing commands that will enlarge even more the
7899 static void freeMemoryIfNeeded(void) {
7900 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7901 int j
, k
, freed
= 0;
7903 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7904 for (j
= 0; j
< server
.dbnum
; j
++) {
7906 robj
*minkey
= NULL
;
7907 struct dictEntry
*de
;
7909 if (dictSize(server
.db
[j
].expires
)) {
7911 /* From a sample of three keys drop the one nearest to
7912 * the natural expire */
7913 for (k
= 0; k
< 3; k
++) {
7916 de
= dictGetRandomKey(server
.db
[j
].expires
);
7917 t
= (time_t) dictGetEntryVal(de
);
7918 if (minttl
== -1 || t
< minttl
) {
7919 minkey
= dictGetEntryKey(de
);
7923 deleteKey(server
.db
+j
,minkey
);
7926 if (!freed
) return; /* nothing to free... */
7930 /* ============================== Append Only file ========================== */
7932 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7933 sds buf
= sdsempty();
7939 /* The DB this command was targetting is not the same as the last command
7940 * we appendend. To issue a SELECT command is needed. */
7941 if (dictid
!= server
.appendseldb
) {
7944 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7945 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7946 (unsigned long)strlen(seldb
),seldb
);
7947 server
.appendseldb
= dictid
;
7950 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7951 * EXPIREs into EXPIREATs calls */
7952 if (cmd
->proc
== expireCommand
) {
7955 tmpargv
[0] = createStringObject("EXPIREAT",8);
7956 tmpargv
[1] = argv
[1];
7957 incrRefCount(argv
[1]);
7958 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7959 tmpargv
[2] = createObject(REDIS_STRING
,
7960 sdscatprintf(sdsempty(),"%ld",when
));
7964 /* Append the actual command */
7965 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7966 for (j
= 0; j
< argc
; j
++) {
7969 o
= getDecodedObject(o
);
7970 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7971 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7972 buf
= sdscatlen(buf
,"\r\n",2);
7976 /* Free the objects from the modified argv for EXPIREAT */
7977 if (cmd
->proc
== expireCommand
) {
7978 for (j
= 0; j
< 3; j
++)
7979 decrRefCount(argv
[j
]);
7982 /* We want to perform a single write. This should be guaranteed atomic
7983 * at least if the filesystem we are writing is a real physical one.
7984 * While this will save us against the server being killed I don't think
7985 * there is much to do about the whole server stopping for power problems
7987 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7988 if (nwritten
!= (signed)sdslen(buf
)) {
7989 /* Ooops, we are in troubles. The best thing to do for now is
7990 * to simply exit instead to give the illusion that everything is
7991 * working as expected. */
7992 if (nwritten
== -1) {
7993 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7995 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7999 /* If a background append only file rewriting is in progress we want to
8000 * accumulate the differences between the child DB and the current one
8001 * in a buffer, so that when the child process will do its work we
8002 * can append the differences to the new append only file. */
8003 if (server
.bgrewritechildpid
!= -1)
8004 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8008 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8009 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8010 now
-server
.lastfsync
> 1))
8012 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8013 server
.lastfsync
= now
;
8017 /* In Redis commands are always executed in the context of a client, so in
8018 * order to load the append only file we need to create a fake client. */
8019 static struct redisClient
*createFakeClient(void) {
8020 struct redisClient
*c
= zmalloc(sizeof(*c
));
8024 c
->querybuf
= sdsempty();
8028 /* We set the fake client as a slave waiting for the synchronization
8029 * so that Redis will not try to send replies to this client. */
8030 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8031 c
->reply
= listCreate();
8032 listSetFreeMethod(c
->reply
,decrRefCount
);
8033 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8037 static void freeFakeClient(struct redisClient
*c
) {
8038 sdsfree(c
->querybuf
);
8039 listRelease(c
->reply
);
8043 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8044 * error (the append only file is zero-length) REDIS_ERR is returned. On
8045 * fatal error an error message is logged and the program exists. */
8046 int loadAppendOnlyFile(char *filename
) {
8047 struct redisClient
*fakeClient
;
8048 FILE *fp
= fopen(filename
,"r");
8049 struct redis_stat sb
;
8050 unsigned long long loadedkeys
= 0;
8052 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8056 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8060 fakeClient
= createFakeClient();
8067 struct redisCommand
*cmd
;
8069 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8075 if (buf
[0] != '*') goto fmterr
;
8077 argv
= zmalloc(sizeof(robj
*)*argc
);
8078 for (j
= 0; j
< argc
; j
++) {
8079 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8080 if (buf
[0] != '$') goto fmterr
;
8081 len
= strtol(buf
+1,NULL
,10);
8082 argsds
= sdsnewlen(NULL
,len
);
8083 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8084 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8085 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8088 /* Command lookup */
8089 cmd
= lookupCommand(argv
[0]->ptr
);
8091 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8094 /* Try object encoding */
8095 if (cmd
->flags
& REDIS_CMD_BULK
)
8096 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8097 /* Run the command in the context of a fake client */
8098 fakeClient
->argc
= argc
;
8099 fakeClient
->argv
= argv
;
8100 cmd
->proc(fakeClient
);
8101 /* Discard the reply objects list from the fake client */
8102 while(listLength(fakeClient
->reply
))
8103 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8104 /* Clean up, ready for the next command */
8105 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8107 /* Handle swapping while loading big datasets when VM is on */
8109 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
8110 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8111 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8116 freeFakeClient(fakeClient
);
8121 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8123 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8127 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8131 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
8132 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8136 /* Avoid the incr/decr ref count business if possible to help
8137 * copy-on-write (we are often in a child process when this function
8139 * Also makes sure that key objects don't get incrRefCount-ed when VM
8141 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8142 obj
= getDecodedObject(obj
);
8145 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8146 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8147 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8149 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8150 if (decrrc
) decrRefCount(obj
);
8153 if (decrrc
) decrRefCount(obj
);
8157 /* Write binary-safe string into a file in the bulkformat
8158 * $<count>\r\n<payload>\r\n */
8159 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8162 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8163 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8164 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8165 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8169 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8170 static int fwriteBulkDouble(FILE *fp
, double d
) {
8171 char buf
[128], dbuf
[128];
8173 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8174 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8175 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8176 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8180 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8181 static int fwriteBulkLong(FILE *fp
, long l
) {
8182 char buf
[128], lbuf
[128];
8184 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8185 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8186 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8187 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8191 /* Write a sequence of commands able to fully rebuild the dataset into
8192 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8193 static int rewriteAppendOnlyFile(char *filename
) {
8194 dictIterator
*di
= NULL
;
8199 time_t now
= time(NULL
);
8201 /* Note that we have to use a different temp name here compared to the
8202 * one used by rewriteAppendOnlyFileBackground() function. */
8203 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8204 fp
= fopen(tmpfile
,"w");
8206 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8209 for (j
= 0; j
< server
.dbnum
; j
++) {
8210 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8211 redisDb
*db
= server
.db
+j
;
8213 if (dictSize(d
) == 0) continue;
8214 di
= dictGetIterator(d
);
8220 /* SELECT the new DB */
8221 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8222 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8224 /* Iterate this DB writing every entry */
8225 while((de
= dictNext(di
)) != NULL
) {
8230 key
= dictGetEntryKey(de
);
8231 /* If the value for this key is swapped, load a preview in memory.
8232 * We use a "swapped" flag to remember if we need to free the
8233 * value object instead to just increment the ref count anyway
8234 * in order to avoid copy-on-write of pages if we are forked() */
8235 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8236 key
->storage
== REDIS_VM_SWAPPING
) {
8237 o
= dictGetEntryVal(de
);
8240 o
= vmPreviewObject(key
);
8243 expiretime
= getExpire(db
,key
);
8245 /* Save the key and associated value */
8246 if (o
->type
== REDIS_STRING
) {
8247 /* Emit a SET command */
8248 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8249 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8251 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8252 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8253 } else if (o
->type
== REDIS_LIST
) {
8254 /* Emit the RPUSHes needed to rebuild the list */
8255 list
*list
= o
->ptr
;
8259 listRewind(list
,&li
);
8260 while((ln
= listNext(&li
))) {
8261 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8262 robj
*eleobj
= listNodeValue(ln
);
8264 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8265 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8266 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8268 } else if (o
->type
== REDIS_SET
) {
8269 /* Emit the SADDs needed to rebuild the set */
8271 dictIterator
*di
= dictGetIterator(set
);
8274 while((de
= dictNext(di
)) != NULL
) {
8275 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8276 robj
*eleobj
= dictGetEntryKey(de
);
8278 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8279 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8280 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8282 dictReleaseIterator(di
);
8283 } else if (o
->type
== REDIS_ZSET
) {
8284 /* Emit the ZADDs needed to rebuild the sorted set */
8286 dictIterator
*di
= dictGetIterator(zs
->dict
);
8289 while((de
= dictNext(di
)) != NULL
) {
8290 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8291 robj
*eleobj
= dictGetEntryKey(de
);
8292 double *score
= dictGetEntryVal(de
);
8294 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8295 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8296 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8297 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8299 dictReleaseIterator(di
);
8300 } else if (o
->type
== REDIS_HASH
) {
8301 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8303 /* Emit the HSETs needed to rebuild the hash */
8304 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8305 unsigned char *p
= zipmapRewind(o
->ptr
);
8306 unsigned char *field
, *val
;
8307 unsigned int flen
, vlen
;
8309 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8310 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8311 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8312 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8314 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8318 dictIterator
*di
= dictGetIterator(o
->ptr
);
8321 while((de
= dictNext(di
)) != NULL
) {
8322 robj
*field
= dictGetEntryKey(de
);
8323 robj
*val
= dictGetEntryVal(de
);
8325 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8326 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8327 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8328 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8330 dictReleaseIterator(di
);
8333 redisPanic("Unknown object type");
8335 /* Save the expire time */
8336 if (expiretime
!= -1) {
8337 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8338 /* If this key is already expired skip it */
8339 if (expiretime
< now
) continue;
8340 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8341 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8342 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8344 if (swapped
) decrRefCount(o
);
8346 dictReleaseIterator(di
);
8349 /* Make sure data will not remain on the OS's output buffers */
8354 /* Use RENAME to make sure the DB file is changed atomically only
8355 * if the generate DB file is ok. */
8356 if (rename(tmpfile
,filename
) == -1) {
8357 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8361 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8367 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8368 if (di
) dictReleaseIterator(di
);
8372 /* This is how rewriting of the append only file in background works:
8374 * 1) The user calls BGREWRITEAOF
8375 * 2) Redis calls this function, that forks():
8376 * 2a) the child rewrite the append only file in a temp file.
8377 * 2b) the parent accumulates differences in server.bgrewritebuf.
8378 * 3) When the child finished '2a' exists.
8379 * 4) The parent will trap the exit code, if it's OK, will append the
8380 * data accumulated into server.bgrewritebuf into the temp file, and
8381 * finally will rename(2) the temp file in the actual file name.
8382 * The the new file is reopened as the new append only file. Profit!
8384 static int rewriteAppendOnlyFileBackground(void) {
8387 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8388 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8389 if ((childpid
= fork()) == 0) {
8393 if (server
.vm_enabled
) vmReopenSwapFile();
8395 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8396 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8403 if (childpid
== -1) {
8404 redisLog(REDIS_WARNING
,
8405 "Can't rewrite append only file in background: fork: %s",
8409 redisLog(REDIS_NOTICE
,
8410 "Background append only file rewriting started by pid %d",childpid
);
8411 server
.bgrewritechildpid
= childpid
;
8412 updateDictResizePolicy();
8413 /* We set appendseldb to -1 in order to force the next call to the
8414 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8415 * accumulated by the parent into server.bgrewritebuf will start
8416 * with a SELECT statement and it will be safe to merge. */
8417 server
.appendseldb
= -1;
8420 return REDIS_OK
; /* unreached */
8423 static void bgrewriteaofCommand(redisClient
*c
) {
8424 if (server
.bgrewritechildpid
!= -1) {
8425 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8428 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8429 char *status
= "+Background append only file rewriting started\r\n";
8430 addReplySds(c
,sdsnew(status
));
8432 addReply(c
,shared
.err
);
8436 static void aofRemoveTempFile(pid_t childpid
) {
8439 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8443 /* Virtual Memory is composed mainly of two subsystems:
8444 * - Blocking Virutal Memory
8445 * - Threaded Virtual Memory I/O
8446 * The two parts are not fully decoupled, but functions are split among two
8447 * different sections of the source code (delimited by comments) in order to
8448 * make more clear what functionality is about the blocking VM and what about
8449 * the threaded (not blocking) VM.
8453 * Redis VM is a blocking VM (one that blocks reading swapped values from
8454 * disk into memory when a value swapped out is needed in memory) that is made
8455 * unblocking by trying to examine the command argument vector in order to
8456 * load in background values that will likely be needed in order to exec
8457 * the command. The command is executed only once all the relevant keys
8458 * are loaded into memory.
8460 * This basically is almost as simple of a blocking VM, but almost as parallel
8461 * as a fully non-blocking VM.
8464 /* =================== Virtual Memory - Blocking Side ====================== */
8466 /* substitute the first occurrence of '%p' with the process pid in the
8467 * swap file name. */
8468 static void expandVmSwapFilename(void) {
8469 char *p
= strstr(server
.vm_swap_file
,"%p");
8475 new = sdscat(new,server
.vm_swap_file
);
8476 new = sdscatprintf(new,"%ld",(long) getpid());
8477 new = sdscat(new,p
+2);
8478 zfree(server
.vm_swap_file
);
8479 server
.vm_swap_file
= new;
8482 static void vmInit(void) {
8487 if (server
.vm_max_threads
!= 0)
8488 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8490 expandVmSwapFilename();
8491 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8492 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8493 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8495 if (server
.vm_fp
== NULL
) {
8496 redisLog(REDIS_WARNING
,
8497 "Impossible to open the swap file: %s. Exiting.",
8501 server
.vm_fd
= fileno(server
.vm_fp
);
8502 server
.vm_next_page
= 0;
8503 server
.vm_near_pages
= 0;
8504 server
.vm_stats_used_pages
= 0;
8505 server
.vm_stats_swapped_objects
= 0;
8506 server
.vm_stats_swapouts
= 0;
8507 server
.vm_stats_swapins
= 0;
8508 totsize
= server
.vm_pages
*server
.vm_page_size
;
8509 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8510 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8511 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8515 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8517 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8518 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8519 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8520 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8522 /* Initialize threaded I/O (used by Virtual Memory) */
8523 server
.io_newjobs
= listCreate();
8524 server
.io_processing
= listCreate();
8525 server
.io_processed
= listCreate();
8526 server
.io_ready_clients
= listCreate();
8527 pthread_mutex_init(&server
.io_mutex
,NULL
);
8528 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8529 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8530 server
.io_active_threads
= 0;
8531 if (pipe(pipefds
) == -1) {
8532 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8536 server
.io_ready_pipe_read
= pipefds
[0];
8537 server
.io_ready_pipe_write
= pipefds
[1];
8538 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8539 /* LZF requires a lot of stack */
8540 pthread_attr_init(&server
.io_threads_attr
);
8541 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8542 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8543 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8544 /* Listen for events in the threaded I/O pipe */
8545 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8546 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8547 oom("creating file event");
8550 /* Mark the page as used */
8551 static void vmMarkPageUsed(off_t page
) {
8552 off_t byte
= page
/8;
8554 redisAssert(vmFreePage(page
) == 1);
8555 server
.vm_bitmap
[byte
] |= 1<<bit
;
8558 /* Mark N contiguous pages as used, with 'page' being the first. */
8559 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8562 for (j
= 0; j
< count
; j
++)
8563 vmMarkPageUsed(page
+j
);
8564 server
.vm_stats_used_pages
+= count
;
8565 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8566 (long long)count
, (long long)page
);
8569 /* Mark the page as free */
8570 static void vmMarkPageFree(off_t page
) {
8571 off_t byte
= page
/8;
8573 redisAssert(vmFreePage(page
) == 0);
8574 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8577 /* Mark N contiguous pages as free, with 'page' being the first. */
8578 static void vmMarkPagesFree(off_t page
, off_t count
) {
8581 for (j
= 0; j
< count
; j
++)
8582 vmMarkPageFree(page
+j
);
8583 server
.vm_stats_used_pages
-= count
;
8584 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8585 (long long)count
, (long long)page
);
8588 /* Test if the page is free */
8589 static int vmFreePage(off_t page
) {
8590 off_t byte
= page
/8;
8592 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8595 /* Find N contiguous free pages storing the first page of the cluster in *first.
8596 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8597 * REDIS_ERR is returned.
8599 * This function uses a simple algorithm: we try to allocate
8600 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8601 * again from the start of the swap file searching for free spaces.
8603 * If it looks pretty clear that there are no free pages near our offset
8604 * we try to find less populated places doing a forward jump of
8605 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8606 * without hurry, and then we jump again and so forth...
8608 * This function can be improved using a free list to avoid to guess
8609 * too much, since we could collect data about freed pages.
8611 * note: I implemented this function just after watching an episode of
8612 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8614 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8615 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8617 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8618 server
.vm_near_pages
= 0;
8619 server
.vm_next_page
= 0;
8621 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8622 base
= server
.vm_next_page
;
8624 while(offset
< server
.vm_pages
) {
8625 off_t
this = base
+offset
;
8627 /* If we overflow, restart from page zero */
8628 if (this >= server
.vm_pages
) {
8629 this -= server
.vm_pages
;
8631 /* Just overflowed, what we found on tail is no longer
8632 * interesting, as it's no longer contiguous. */
8636 if (vmFreePage(this)) {
8637 /* This is a free page */
8639 /* Already got N free pages? Return to the caller, with success */
8641 *first
= this-(n
-1);
8642 server
.vm_next_page
= this+1;
8643 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8647 /* The current one is not a free page */
8651 /* Fast-forward if the current page is not free and we already
8652 * searched enough near this place. */
8654 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8655 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8657 /* Note that even if we rewind after the jump, we are don't need
8658 * to make sure numfree is set to zero as we only jump *if* it
8659 * is set to zero. */
8661 /* Otherwise just check the next page */
8668 /* Write the specified object at the specified page of the swap file */
8669 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8670 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8671 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8672 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8673 redisLog(REDIS_WARNING
,
8674 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8678 rdbSaveObject(server
.vm_fp
,o
);
8679 fflush(server
.vm_fp
);
8680 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8684 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8685 * needed to later retrieve the object into the key object.
8686 * If we can't find enough contiguous empty pages to swap the object on disk
8687 * REDIS_ERR is returned. */
8688 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8689 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8692 assert(key
->storage
== REDIS_VM_MEMORY
);
8693 assert(key
->refcount
== 1);
8694 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8695 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8696 key
->vm
.page
= page
;
8697 key
->vm
.usedpages
= pages
;
8698 key
->storage
= REDIS_VM_SWAPPED
;
8699 key
->vtype
= val
->type
;
8700 decrRefCount(val
); /* Deallocate the object from memory. */
8701 vmMarkPagesUsed(page
,pages
);
8702 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8703 (unsigned char*) key
->ptr
,
8704 (unsigned long long) page
, (unsigned long long) pages
);
8705 server
.vm_stats_swapped_objects
++;
8706 server
.vm_stats_swapouts
++;
8710 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8713 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8714 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8715 redisLog(REDIS_WARNING
,
8716 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8720 o
= rdbLoadObject(type
,server
.vm_fp
);
8722 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8725 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8729 /* Load the value object relative to the 'key' object from swap to memory.
8730 * The newly allocated object is returned.
8732 * If preview is true the unserialized object is returned to the caller but
8733 * no changes are made to the key object, nor the pages are marked as freed */
8734 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8737 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8738 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8740 key
->storage
= REDIS_VM_MEMORY
;
8741 key
->vm
.atime
= server
.unixtime
;
8742 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8743 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8744 (unsigned char*) key
->ptr
);
8745 server
.vm_stats_swapped_objects
--;
8747 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8748 (unsigned char*) key
->ptr
);
8750 server
.vm_stats_swapins
++;
8754 /* Plain object loading, from swap to memory */
8755 static robj
*vmLoadObject(robj
*key
) {
8756 /* If we are loading the object in background, stop it, we
8757 * need to load this object synchronously ASAP. */
8758 if (key
->storage
== REDIS_VM_LOADING
)
8759 vmCancelThreadedIOJob(key
);
8760 return vmGenericLoadObject(key
,0);
8763 /* Just load the value on disk, without to modify the key.
8764 * This is useful when we want to perform some operation on the value
8765 * without to really bring it from swap to memory, like while saving the
8766 * dataset or rewriting the append only log. */
8767 static robj
*vmPreviewObject(robj
*key
) {
8768 return vmGenericLoadObject(key
,1);
8771 /* How a good candidate is this object for swapping?
8772 * The better candidate it is, the greater the returned value.
8774 * Currently we try to perform a fast estimation of the object size in
8775 * memory, and combine it with aging informations.
8777 * Basically swappability = idle-time * log(estimated size)
8779 * Bigger objects are preferred over smaller objects, but not
8780 * proportionally, this is why we use the logarithm. This algorithm is
8781 * just a first try and will probably be tuned later. */
8782 static double computeObjectSwappability(robj
*o
) {
8783 time_t age
= server
.unixtime
- o
->vm
.atime
;
8787 struct dictEntry
*de
;
8790 if (age
<= 0) return 0;
8793 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8796 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8801 listNode
*ln
= listFirst(l
);
8803 asize
= sizeof(list
);
8805 robj
*ele
= ln
->value
;
8808 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8809 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8811 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8816 z
= (o
->type
== REDIS_ZSET
);
8817 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8819 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8820 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8825 de
= dictGetRandomKey(d
);
8826 ele
= dictGetEntryKey(de
);
8827 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8828 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8830 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8831 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8835 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8836 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8837 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8838 unsigned int klen
, vlen
;
8839 unsigned char *key
, *val
;
8841 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8845 asize
= len
*(klen
+vlen
+3);
8846 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8848 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8853 de
= dictGetRandomKey(d
);
8854 ele
= dictGetEntryKey(de
);
8855 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8856 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8858 ele
= dictGetEntryVal(de
);
8859 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8860 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8862 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8867 return (double)age
*log(1+asize
);
8870 /* Try to swap an object that's a good candidate for swapping.
8871 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8872 * to swap any object at all.
8874 * If 'usethreaded' is true, Redis will try to swap the object in background
8875 * using I/O threads. */
8876 static int vmSwapOneObject(int usethreads
) {
8878 struct dictEntry
*best
= NULL
;
8879 double best_swappability
= 0;
8880 redisDb
*best_db
= NULL
;
8883 for (j
= 0; j
< server
.dbnum
; j
++) {
8884 redisDb
*db
= server
.db
+j
;
8885 /* Why maxtries is set to 100?
8886 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8887 * are swappable objects */
8890 if (dictSize(db
->dict
) == 0) continue;
8891 for (i
= 0; i
< 5; i
++) {
8893 double swappability
;
8895 if (maxtries
) maxtries
--;
8896 de
= dictGetRandomKey(db
->dict
);
8897 key
= dictGetEntryKey(de
);
8898 val
= dictGetEntryVal(de
);
8899 /* Only swap objects that are currently in memory.
8901 * Also don't swap shared objects if threaded VM is on, as we
8902 * try to ensure that the main thread does not touch the
8903 * object while the I/O thread is using it, but we can't
8904 * control other keys without adding additional mutex. */
8905 if (key
->storage
!= REDIS_VM_MEMORY
||
8906 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8907 if (maxtries
) i
--; /* don't count this try */
8910 swappability
= computeObjectSwappability(val
);
8911 if (!best
|| swappability
> best_swappability
) {
8913 best_swappability
= swappability
;
8918 if (best
== NULL
) return REDIS_ERR
;
8919 key
= dictGetEntryKey(best
);
8920 val
= dictGetEntryVal(best
);
8922 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8923 key
->ptr
, best_swappability
);
8925 /* Unshare the key if needed */
8926 if (key
->refcount
> 1) {
8927 robj
*newkey
= dupStringObject(key
);
8929 key
= dictGetEntryKey(best
) = newkey
;
8933 vmSwapObjectThreaded(key
,val
,best_db
);
8936 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8937 dictGetEntryVal(best
) = NULL
;
8945 static int vmSwapOneObjectBlocking() {
8946 return vmSwapOneObject(0);
8949 static int vmSwapOneObjectThreaded() {
8950 return vmSwapOneObject(1);
8953 /* Return true if it's safe to swap out objects in a given moment.
8954 * Basically we don't want to swap objects out while there is a BGSAVE
8955 * or a BGAEOREWRITE running in backgroud. */
8956 static int vmCanSwapOut(void) {
8957 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8960 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8961 * and was deleted. Otherwise 0 is returned. */
8962 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8966 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8967 foundkey
= dictGetEntryKey(de
);
8968 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8973 /* =================== Virtual Memory - Threaded I/O ======================= */
8975 static void freeIOJob(iojob
*j
) {
8976 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8977 j
->type
== REDIS_IOJOB_DO_SWAP
||
8978 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8979 decrRefCount(j
->val
);
8980 /* We don't decrRefCount the j->key field as we did't incremented
8981 * the count creating IO Jobs. This is because the key field here is
8982 * just used as an indentifier and if a key is removed the Job should
8983 * never be touched again. */
8987 /* Every time a thread finished a Job, it writes a byte into the write side
8988 * of an unix pipe in order to "awake" the main thread, and this function
8990 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8994 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8996 REDIS_NOTUSED(mask
);
8997 REDIS_NOTUSED(privdata
);
8999 /* For every byte we read in the read side of the pipe, there is one
9000 * I/O job completed to process. */
9001 while((retval
= read(fd
,buf
,1)) == 1) {
9005 struct dictEntry
*de
;
9007 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9009 /* Get the processed element (the oldest one) */
9011 assert(listLength(server
.io_processed
) != 0);
9012 if (toprocess
== -1) {
9013 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9014 if (toprocess
<= 0) toprocess
= 1;
9016 ln
= listFirst(server
.io_processed
);
9018 listDelNode(server
.io_processed
,ln
);
9020 /* If this job is marked as canceled, just ignore it */
9025 /* Post process it in the main thread, as there are things we
9026 * can do just here to avoid race conditions and/or invasive locks */
9027 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
9028 de
= dictFind(j
->db
->dict
,j
->key
);
9030 key
= dictGetEntryKey(de
);
9031 if (j
->type
== REDIS_IOJOB_LOAD
) {
9034 /* Key loaded, bring it at home */
9035 key
->storage
= REDIS_VM_MEMORY
;
9036 key
->vm
.atime
= server
.unixtime
;
9037 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
9038 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9039 (unsigned char*) key
->ptr
);
9040 server
.vm_stats_swapped_objects
--;
9041 server
.vm_stats_swapins
++;
9042 dictGetEntryVal(de
) = j
->val
;
9043 incrRefCount(j
->val
);
9046 /* Handle clients waiting for this key to be loaded. */
9047 handleClientsBlockedOnSwappedKey(db
,key
);
9048 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9049 /* Now we know the amount of pages required to swap this object.
9050 * Let's find some space for it, and queue this task again
9051 * rebranded as REDIS_IOJOB_DO_SWAP. */
9052 if (!vmCanSwapOut() ||
9053 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9055 /* Ooops... no space or we can't swap as there is
9056 * a fork()ed Redis trying to save stuff on disk. */
9058 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9060 /* Note that we need to mark this pages as used now,
9061 * if the job will be canceled, we'll mark them as freed
9063 vmMarkPagesUsed(j
->page
,j
->pages
);
9064 j
->type
= REDIS_IOJOB_DO_SWAP
;
9069 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9072 /* Key swapped. We can finally free some memory. */
9073 if (key
->storage
!= REDIS_VM_SWAPPING
) {
9074 printf("key->storage: %d\n",key
->storage
);
9075 printf("key->name: %s\n",(char*)key
->ptr
);
9076 printf("key->refcount: %d\n",key
->refcount
);
9077 printf("val: %p\n",(void*)j
->val
);
9078 printf("val->type: %d\n",j
->val
->type
);
9079 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9081 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
9082 val
= dictGetEntryVal(de
);
9083 key
->vm
.page
= j
->page
;
9084 key
->vm
.usedpages
= j
->pages
;
9085 key
->storage
= REDIS_VM_SWAPPED
;
9086 key
->vtype
= j
->val
->type
;
9087 decrRefCount(val
); /* Deallocate the object from memory. */
9088 dictGetEntryVal(de
) = NULL
;
9089 redisLog(REDIS_DEBUG
,
9090 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9091 (unsigned char*) key
->ptr
,
9092 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9093 server
.vm_stats_swapped_objects
++;
9094 server
.vm_stats_swapouts
++;
9096 /* Put a few more swap requests in queue if we are still
9098 if (trytoswap
&& vmCanSwapOut() &&
9099 zmalloc_used_memory() > server
.vm_max_memory
)
9104 more
= listLength(server
.io_newjobs
) <
9105 (unsigned) server
.vm_max_threads
;
9107 /* Don't waste CPU time if swappable objects are rare. */
9108 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9116 if (processed
== toprocess
) return;
9118 if (retval
< 0 && errno
!= EAGAIN
) {
9119 redisLog(REDIS_WARNING
,
9120 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9125 static void lockThreadedIO(void) {
9126 pthread_mutex_lock(&server
.io_mutex
);
9129 static void unlockThreadedIO(void) {
9130 pthread_mutex_unlock(&server
.io_mutex
);
9133 /* Remove the specified object from the threaded I/O queue if still not
9134 * processed, otherwise make sure to flag it as canceled. */
9135 static void vmCancelThreadedIOJob(robj
*o
) {
9137 server
.io_newjobs
, /* 0 */
9138 server
.io_processing
, /* 1 */
9139 server
.io_processed
/* 2 */
9143 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9146 /* Search for a matching key in one of the queues */
9147 for (i
= 0; i
< 3; i
++) {
9151 listRewind(lists
[i
],&li
);
9152 while ((ln
= listNext(&li
)) != NULL
) {
9153 iojob
*job
= ln
->value
;
9155 if (job
->canceled
) continue; /* Skip this, already canceled. */
9156 if (job
->key
== o
) {
9157 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9158 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9159 /* Mark the pages as free since the swap didn't happened
9160 * or happened but is now discarded. */
9161 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9162 vmMarkPagesFree(job
->page
,job
->pages
);
9163 /* Cancel the job. It depends on the list the job is
9166 case 0: /* io_newjobs */
9167 /* If the job was yet not processed the best thing to do
9168 * is to remove it from the queue at all */
9170 listDelNode(lists
[i
],ln
);
9172 case 1: /* io_processing */
9173 /* Oh Shi- the thread is messing with the Job:
9175 * Probably it's accessing the object if this is a
9176 * PREPARE_SWAP or DO_SWAP job.
9177 * If it's a LOAD job it may be reading from disk and
9178 * if we don't wait for the job to terminate before to
9179 * cancel it, maybe in a few microseconds data can be
9180 * corrupted in this pages. So the short story is:
9182 * Better to wait for the job to move into the
9183 * next queue (processed)... */
9185 /* We try again and again until the job is completed. */
9187 /* But let's wait some time for the I/O thread
9188 * to finish with this job. After all this condition
9189 * should be very rare. */
9192 case 2: /* io_processed */
9193 /* The job was already processed, that's easy...
9194 * just mark it as canceled so that we'll ignore it
9195 * when processing completed jobs. */
9199 /* Finally we have to adjust the storage type of the object
9200 * in order to "UNDO" the operaiton. */
9201 if (o
->storage
== REDIS_VM_LOADING
)
9202 o
->storage
= REDIS_VM_SWAPPED
;
9203 else if (o
->storage
== REDIS_VM_SWAPPING
)
9204 o
->storage
= REDIS_VM_MEMORY
;
9211 assert(1 != 1); /* We should never reach this */
9214 static void *IOThreadEntryPoint(void *arg
) {
9219 pthread_detach(pthread_self());
9221 /* Get a new job to process */
9223 if (listLength(server
.io_newjobs
) == 0) {
9224 /* No new jobs in queue, exit. */
9225 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9226 (long) pthread_self());
9227 server
.io_active_threads
--;
9231 ln
= listFirst(server
.io_newjobs
);
9233 listDelNode(server
.io_newjobs
,ln
);
9234 /* Add the job in the processing queue */
9235 j
->thread
= pthread_self();
9236 listAddNodeTail(server
.io_processing
,j
);
9237 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9239 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9240 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9242 /* Process the Job */
9243 if (j
->type
== REDIS_IOJOB_LOAD
) {
9244 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9245 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9246 FILE *fp
= fopen("/dev/null","w+");
9247 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9249 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9250 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9254 /* Done: insert the job into the processed queue */
9255 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9256 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9258 listDelNode(server
.io_processing
,ln
);
9259 listAddNodeTail(server
.io_processed
,j
);
9262 /* Signal the main thread there is new stuff to process */
9263 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9265 return NULL
; /* never reached */
9268 static void spawnIOThread(void) {
9270 sigset_t mask
, omask
;
9274 sigaddset(&mask
,SIGCHLD
);
9275 sigaddset(&mask
,SIGHUP
);
9276 sigaddset(&mask
,SIGPIPE
);
9277 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9278 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9279 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9283 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9284 server
.io_active_threads
++;
9287 /* We need to wait for the last thread to exit before we are able to
9288 * fork() in order to BGSAVE or BGREWRITEAOF. */
9289 static void waitEmptyIOJobsQueue(void) {
9291 int io_processed_len
;
9294 if (listLength(server
.io_newjobs
) == 0 &&
9295 listLength(server
.io_processing
) == 0 &&
9296 server
.io_active_threads
== 0)
9301 /* While waiting for empty jobs queue condition we post-process some
9302 * finshed job, as I/O threads may be hanging trying to write against
9303 * the io_ready_pipe_write FD but there are so much pending jobs that
9305 io_processed_len
= listLength(server
.io_processed
);
9307 if (io_processed_len
) {
9308 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9309 usleep(1000); /* 1 millisecond */
9311 usleep(10000); /* 10 milliseconds */
9316 static void vmReopenSwapFile(void) {
9317 /* Note: we don't close the old one as we are in the child process
9318 * and don't want to mess at all with the original file object. */
9319 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9320 if (server
.vm_fp
== NULL
) {
9321 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9322 server
.vm_swap_file
);
9325 server
.vm_fd
= fileno(server
.vm_fp
);
9328 /* This function must be called while with threaded IO locked */
9329 static void queueIOJob(iojob
*j
) {
9330 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9331 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9332 listAddNodeTail(server
.io_newjobs
,j
);
9333 if (server
.io_active_threads
< server
.vm_max_threads
)
9337 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9340 assert(key
->storage
== REDIS_VM_MEMORY
);
9341 assert(key
->refcount
== 1);
9343 j
= zmalloc(sizeof(*j
));
9344 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9350 j
->thread
= (pthread_t
) -1;
9351 key
->storage
= REDIS_VM_SWAPPING
;
9359 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9361 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9362 * If there is not already a job loading the key, it is craeted.
9363 * The key is added to the io_keys list in the client structure, and also
9364 * in the hash table mapping swapped keys to waiting clients, that is,
9365 * server.io_waited_keys. */
9366 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9367 struct dictEntry
*de
;
9371 /* If the key does not exist or is already in RAM we don't need to
9372 * block the client at all. */
9373 de
= dictFind(c
->db
->dict
,key
);
9374 if (de
== NULL
) return 0;
9375 o
= dictGetEntryKey(de
);
9376 if (o
->storage
== REDIS_VM_MEMORY
) {
9378 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9379 /* We were swapping the key, undo it! */
9380 vmCancelThreadedIOJob(o
);
9384 /* OK: the key is either swapped, or being loaded just now. */
9386 /* Add the key to the list of keys this client is waiting for.
9387 * This maps clients to keys they are waiting for. */
9388 listAddNodeTail(c
->io_keys
,key
);
9391 /* Add the client to the swapped keys => clients waiting map. */
9392 de
= dictFind(c
->db
->io_keys
,key
);
9396 /* For every key we take a list of clients blocked for it */
9398 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9400 assert(retval
== DICT_OK
);
9402 l
= dictGetEntryVal(de
);
9404 listAddNodeTail(l
,c
);
9406 /* Are we already loading the key from disk? If not create a job */
9407 if (o
->storage
== REDIS_VM_SWAPPED
) {
9410 o
->storage
= REDIS_VM_LOADING
;
9411 j
= zmalloc(sizeof(*j
));
9412 j
->type
= REDIS_IOJOB_LOAD
;
9415 j
->key
->vtype
= o
->vtype
;
9416 j
->page
= o
->vm
.page
;
9419 j
->thread
= (pthread_t
) -1;
9427 /* Preload keys needed for the ZUNION and ZINTER commands. */
9428 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9430 num
= atoi(c
->argv
[2]->ptr
);
9431 for (i
= 0; i
< num
; i
++) {
9432 waitForSwappedKey(c
,c
->argv
[3+i
]);
9436 /* Is this client attempting to run a command against swapped keys?
9437 * If so, block it ASAP, load the keys in background, then resume it.
9439 * The important idea about this function is that it can fail! If keys will
9440 * still be swapped when the client is resumed, this key lookups will
9441 * just block loading keys from disk. In practical terms this should only
9442 * happen with SORT BY command or if there is a bug in this function.
9444 * Return 1 if the client is marked as blocked, 0 if the client can
9445 * continue as the keys it is going to access appear to be in memory. */
9446 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9449 if (cmd
->vm_preload_proc
!= NULL
) {
9450 cmd
->vm_preload_proc(c
);
9452 if (cmd
->vm_firstkey
== 0) return 0;
9453 last
= cmd
->vm_lastkey
;
9454 if (last
< 0) last
= c
->argc
+last
;
9455 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9456 waitForSwappedKey(c
,c
->argv
[j
]);
9459 /* If the client was blocked for at least one key, mark it as blocked. */
9460 if (listLength(c
->io_keys
)) {
9461 c
->flags
|= REDIS_IO_WAIT
;
9462 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9463 server
.vm_blocked_clients
++;
9470 /* Remove the 'key' from the list of blocked keys for a given client.
9472 * The function returns 1 when there are no longer blocking keys after
9473 * the current one was removed (and the client can be unblocked). */
9474 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9478 struct dictEntry
*de
;
9480 /* Remove the key from the list of keys this client is waiting for. */
9481 listRewind(c
->io_keys
,&li
);
9482 while ((ln
= listNext(&li
)) != NULL
) {
9483 if (compareStringObjects(ln
->value
,key
) == 0) {
9484 listDelNode(c
->io_keys
,ln
);
9490 /* Remove the client form the key => waiting clients map. */
9491 de
= dictFind(c
->db
->io_keys
,key
);
9493 l
= dictGetEntryVal(de
);
9494 ln
= listSearchKey(l
,c
);
9497 if (listLength(l
) == 0)
9498 dictDelete(c
->db
->io_keys
,key
);
9500 return listLength(c
->io_keys
) == 0;
9503 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9504 struct dictEntry
*de
;
9509 de
= dictFind(db
->io_keys
,key
);
9512 l
= dictGetEntryVal(de
);
9513 len
= listLength(l
);
9514 /* Note: we can't use something like while(listLength(l)) as the list
9515 * can be freed by the calling function when we remove the last element. */
9518 redisClient
*c
= ln
->value
;
9520 if (dontWaitForSwappedKey(c
,key
)) {
9521 /* Put the client in the list of clients ready to go as we
9522 * loaded all the keys about it. */
9523 listAddNodeTail(server
.io_ready_clients
,c
);
9528 /* =========================== Remote Configuration ========================= */
9530 static void configSetCommand(redisClient
*c
) {
9531 robj
*o
= getDecodedObject(c
->argv
[3]);
9532 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9533 zfree(server
.dbfilename
);
9534 server
.dbfilename
= zstrdup(o
->ptr
);
9535 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9536 zfree(server
.requirepass
);
9537 server
.requirepass
= zstrdup(o
->ptr
);
9538 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9539 zfree(server
.masterauth
);
9540 server
.masterauth
= zstrdup(o
->ptr
);
9541 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9542 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9544 addReplySds(c
,sdscatprintf(sdsempty(),
9545 "-ERR not supported CONFIG parameter %s\r\n",
9546 (char*)c
->argv
[2]->ptr
));
9551 addReply(c
,shared
.ok
);
9554 static void configGetCommand(redisClient
*c
) {
9555 robj
*o
= getDecodedObject(c
->argv
[2]);
9556 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9557 char *pattern
= o
->ptr
;
9561 decrRefCount(lenobj
);
9563 if (stringmatch(pattern
,"dbfilename",0)) {
9564 addReplyBulkCString(c
,"dbfilename");
9565 addReplyBulkCString(c
,server
.dbfilename
);
9568 if (stringmatch(pattern
,"requirepass",0)) {
9569 addReplyBulkCString(c
,"requirepass");
9570 addReplyBulkCString(c
,server
.requirepass
);
9573 if (stringmatch(pattern
,"masterauth",0)) {
9574 addReplyBulkCString(c
,"masterauth");
9575 addReplyBulkCString(c
,server
.masterauth
);
9578 if (stringmatch(pattern
,"maxmemory",0)) {
9581 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9582 addReplyBulkCString(c
,"maxmemory");
9583 addReplyBulkCString(c
,buf
);
9587 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9590 static void configCommand(redisClient
*c
) {
9591 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9592 if (c
->argc
!= 4) goto badarity
;
9593 configSetCommand(c
);
9594 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9595 if (c
->argc
!= 3) goto badarity
;
9596 configGetCommand(c
);
9597 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9598 if (c
->argc
!= 2) goto badarity
;
9599 server
.stat_numcommands
= 0;
9600 server
.stat_numconnections
= 0;
9601 server
.stat_expiredkeys
= 0;
9602 server
.stat_starttime
= time(NULL
);
9603 addReply(c
,shared
.ok
);
9605 addReplySds(c
,sdscatprintf(sdsempty(),
9606 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9611 addReplySds(c
,sdscatprintf(sdsempty(),
9612 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9613 (char*) c
->argv
[1]->ptr
));
9616 /* =========================== Pubsub implementation ======================== */
9618 static void freePubsubPattern(void *p
) {
9619 pubsubPattern
*pat
= p
;
9621 decrRefCount(pat
->pattern
);
9625 static int listMatchPubsubPattern(void *a
, void *b
) {
9626 pubsubPattern
*pa
= a
, *pb
= b
;
9628 return (pa
->client
== pb
->client
) &&
9629 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9632 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9633 * 0 if the client was already subscribed to that channel. */
9634 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9635 struct dictEntry
*de
;
9636 list
*clients
= NULL
;
9639 /* Add the channel to the client -> channels hash table */
9640 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9642 incrRefCount(channel
);
9643 /* Add the client to the channel -> list of clients hash table */
9644 de
= dictFind(server
.pubsub_channels
,channel
);
9646 clients
= listCreate();
9647 dictAdd(server
.pubsub_channels
,channel
,clients
);
9648 incrRefCount(channel
);
9650 clients
= dictGetEntryVal(de
);
9652 listAddNodeTail(clients
,c
);
9654 /* Notify the client */
9655 addReply(c
,shared
.mbulk3
);
9656 addReply(c
,shared
.subscribebulk
);
9657 addReplyBulk(c
,channel
);
9658 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9662 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9663 * 0 if the client was not subscribed to the specified channel. */
9664 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9665 struct dictEntry
*de
;
9670 /* Remove the channel from the client -> channels hash table */
9671 incrRefCount(channel
); /* channel may be just a pointer to the same object
9672 we have in the hash tables. Protect it... */
9673 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9675 /* Remove the client from the channel -> clients list hash table */
9676 de
= dictFind(server
.pubsub_channels
,channel
);
9678 clients
= dictGetEntryVal(de
);
9679 ln
= listSearchKey(clients
,c
);
9681 listDelNode(clients
,ln
);
9682 if (listLength(clients
) == 0) {
9683 /* Free the list and associated hash entry at all if this was
9684 * the latest client, so that it will be possible to abuse
9685 * Redis PUBSUB creating millions of channels. */
9686 dictDelete(server
.pubsub_channels
,channel
);
9689 /* Notify the client */
9691 addReply(c
,shared
.mbulk3
);
9692 addReply(c
,shared
.unsubscribebulk
);
9693 addReplyBulk(c
,channel
);
9694 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9695 listLength(c
->pubsub_patterns
));
9698 decrRefCount(channel
); /* it is finally safe to release it */
9702 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9703 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9706 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9709 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9710 incrRefCount(pattern
);
9711 pat
= zmalloc(sizeof(*pat
));
9712 pat
->pattern
= getDecodedObject(pattern
);
9714 listAddNodeTail(server
.pubsub_patterns
,pat
);
9716 /* Notify the client */
9717 addReply(c
,shared
.mbulk3
);
9718 addReply(c
,shared
.psubscribebulk
);
9719 addReplyBulk(c
,pattern
);
9720 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9724 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9725 * 0 if the client was not subscribed to the specified channel. */
9726 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9731 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9732 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9734 listDelNode(c
->pubsub_patterns
,ln
);
9736 pat
.pattern
= pattern
;
9737 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9738 listDelNode(server
.pubsub_patterns
,ln
);
9740 /* Notify the client */
9742 addReply(c
,shared
.mbulk3
);
9743 addReply(c
,shared
.punsubscribebulk
);
9744 addReplyBulk(c
,pattern
);
9745 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9746 listLength(c
->pubsub_patterns
));
9748 decrRefCount(pattern
);
9752 /* Unsubscribe from all the channels. Return the number of channels the
9753 * client was subscribed from. */
9754 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9755 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9759 while((de
= dictNext(di
)) != NULL
) {
9760 robj
*channel
= dictGetEntryKey(de
);
9762 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9764 dictReleaseIterator(di
);
9768 /* Unsubscribe from all the patterns. Return the number of patterns the
9769 * client was subscribed from. */
9770 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9775 listRewind(c
->pubsub_patterns
,&li
);
9776 while ((ln
= listNext(&li
)) != NULL
) {
9777 robj
*pattern
= ln
->value
;
9779 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9784 /* Publish a message */
9785 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9787 struct dictEntry
*de
;
9791 /* Send to clients listening for that channel */
9792 de
= dictFind(server
.pubsub_channels
,channel
);
9794 list
*list
= dictGetEntryVal(de
);
9798 listRewind(list
,&li
);
9799 while ((ln
= listNext(&li
)) != NULL
) {
9800 redisClient
*c
= ln
->value
;
9802 addReply(c
,shared
.mbulk3
);
9803 addReply(c
,shared
.messagebulk
);
9804 addReplyBulk(c
,channel
);
9805 addReplyBulk(c
,message
);
9809 /* Send to clients listening to matching channels */
9810 if (listLength(server
.pubsub_patterns
)) {
9811 listRewind(server
.pubsub_patterns
,&li
);
9812 channel
= getDecodedObject(channel
);
9813 while ((ln
= listNext(&li
)) != NULL
) {
9814 pubsubPattern
*pat
= ln
->value
;
9816 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9817 sdslen(pat
->pattern
->ptr
),
9818 (char*)channel
->ptr
,
9819 sdslen(channel
->ptr
),0)) {
9820 addReply(pat
->client
,shared
.mbulk3
);
9821 addReply(pat
->client
,shared
.messagebulk
);
9822 addReplyBulk(pat
->client
,channel
);
9823 addReplyBulk(pat
->client
,message
);
9827 decrRefCount(channel
);
9832 static void subscribeCommand(redisClient
*c
) {
9835 for (j
= 1; j
< c
->argc
; j
++)
9836 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9839 static void unsubscribeCommand(redisClient
*c
) {
9841 pubsubUnsubscribeAllChannels(c
,1);
9846 for (j
= 1; j
< c
->argc
; j
++)
9847 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9851 static void psubscribeCommand(redisClient
*c
) {
9854 for (j
= 1; j
< c
->argc
; j
++)
9855 pubsubSubscribePattern(c
,c
->argv
[j
]);
9858 static void punsubscribeCommand(redisClient
*c
) {
9860 pubsubUnsubscribeAllPatterns(c
,1);
9865 for (j
= 1; j
< c
->argc
; j
++)
9866 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9870 static void publishCommand(redisClient
*c
) {
9871 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9872 addReplyLong(c
,receivers
);
9875 /* ================================= Debugging ============================== */
9877 static void debugCommand(redisClient
*c
) {
9878 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9880 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9881 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9882 addReply(c
,shared
.err
);
9886 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9887 addReply(c
,shared
.err
);
9890 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9891 addReply(c
,shared
.ok
);
9892 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9894 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9895 addReply(c
,shared
.err
);
9898 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9899 addReply(c
,shared
.ok
);
9900 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9901 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9905 addReply(c
,shared
.nokeyerr
);
9908 key
= dictGetEntryKey(de
);
9909 val
= dictGetEntryVal(de
);
9910 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9911 key
->storage
== REDIS_VM_SWAPPING
)) {
9915 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9916 strenc
= strencoding
[val
->encoding
];
9918 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9921 addReplySds(c
,sdscatprintf(sdsempty(),
9922 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9923 "encoding:%s serializedlength:%lld\r\n",
9924 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9925 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9927 addReplySds(c
,sdscatprintf(sdsempty(),
9928 "+Key at:%p refcount:%d, value swapped at: page %llu "
9929 "using %llu pages\r\n",
9930 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9931 (unsigned long long) key
->vm
.usedpages
));
9933 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9934 lookupKeyRead(c
->db
,c
->argv
[2]);
9935 addReply(c
,shared
.ok
);
9936 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9937 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9940 if (!server
.vm_enabled
) {
9941 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9945 addReply(c
,shared
.nokeyerr
);
9948 key
= dictGetEntryKey(de
);
9949 val
= dictGetEntryVal(de
);
9950 /* If the key is shared we want to create a copy */
9951 if (key
->refcount
> 1) {
9952 robj
*newkey
= dupStringObject(key
);
9954 key
= dictGetEntryKey(de
) = newkey
;
9957 if (key
->storage
!= REDIS_VM_MEMORY
) {
9958 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9959 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9960 dictGetEntryVal(de
) = NULL
;
9961 addReply(c
,shared
.ok
);
9963 addReply(c
,shared
.err
);
9966 addReplySds(c
,sdsnew(
9967 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9971 static void _redisAssert(char *estr
, char *file
, int line
) {
9972 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9973 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9974 #ifdef HAVE_BACKTRACE
9975 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9980 static void _redisPanic(char *msg
, char *file
, int line
) {
9981 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
9982 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
9983 #ifdef HAVE_BACKTRACE
9984 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9989 /* =================================== Main! ================================ */
9992 int linuxOvercommitMemoryValue(void) {
9993 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9997 if (fgets(buf
,64,fp
) == NULL
) {
10006 void linuxOvercommitMemoryWarning(void) {
10007 if (linuxOvercommitMemoryValue() == 0) {
10008 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
10011 #endif /* __linux__ */
10013 static void daemonize(void) {
10017 if (fork() != 0) exit(0); /* parent exits */
10018 setsid(); /* create a new session */
10020 /* Every output goes to /dev/null. If Redis is daemonized but
10021 * the 'logfile' is set to 'stdout' in the configuration file
10022 * it will not log at all. */
10023 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
10024 dup2(fd
, STDIN_FILENO
);
10025 dup2(fd
, STDOUT_FILENO
);
10026 dup2(fd
, STDERR_FILENO
);
10027 if (fd
> STDERR_FILENO
) close(fd
);
10029 /* Try to write the pid file */
10030 fp
= fopen(server
.pidfile
,"w");
10032 fprintf(fp
,"%d\n",getpid());
10037 static void version() {
10038 printf("Redis server version %s\n", REDIS_VERSION
);
10042 static void usage() {
10043 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
10044 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
10048 int main(int argc
, char **argv
) {
10051 initServerConfig();
10053 if (strcmp(argv
[1], "-v") == 0 ||
10054 strcmp(argv
[1], "--version") == 0) version();
10055 if (strcmp(argv
[1], "--help") == 0) usage();
10056 resetServerSaveParams();
10057 loadServerConfig(argv
[1]);
10058 } else if ((argc
> 2)) {
10061 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
10063 if (server
.daemonize
) daemonize();
10065 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
10067 linuxOvercommitMemoryWarning();
10069 start
= time(NULL
);
10070 if (server
.appendonly
) {
10071 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
10072 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
10074 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
10075 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
10077 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
10078 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
10080 aeDeleteEventLoop(server
.el
);
10084 /* ============================= Backtrace support ========================= */
10086 #ifdef HAVE_BACKTRACE
10087 static char *findFuncName(void *pointer
, unsigned long *offset
);
10089 static void *getMcontextEip(ucontext_t
*uc
) {
10090 #if defined(__FreeBSD__)
10091 return (void*) uc
->uc_mcontext
.mc_eip
;
10092 #elif defined(__dietlibc__)
10093 return (void*) uc
->uc_mcontext
.eip
;
10094 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
10096 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10098 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10100 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
10101 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
10102 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
10104 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
10106 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
10107 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
10108 #elif defined(__ia64__) /* Linux IA64 */
10109 return (void*) uc
->uc_mcontext
.sc_ip
;
10115 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
10117 char **messages
= NULL
;
10118 int i
, trace_size
= 0;
10119 unsigned long offset
=0;
10120 ucontext_t
*uc
= (ucontext_t
*) secret
;
10122 REDIS_NOTUSED(info
);
10124 redisLog(REDIS_WARNING
,
10125 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
10126 infostring
= genRedisInfoString();
10127 redisLog(REDIS_WARNING
, "%s",infostring
);
10128 /* It's not safe to sdsfree() the returned string under memory
10129 * corruption conditions. Let it leak as we are going to abort */
10131 trace_size
= backtrace(trace
, 100);
10132 /* overwrite sigaction with caller's address */
10133 if (getMcontextEip(uc
) != NULL
) {
10134 trace
[1] = getMcontextEip(uc
);
10136 messages
= backtrace_symbols(trace
, trace_size
);
10138 for (i
=1; i
<trace_size
; ++i
) {
10139 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
10141 p
= strchr(messages
[i
],'+');
10142 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10143 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10145 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10148 /* free(messages); Don't call free() with possibly corrupted memory. */
10152 static void setupSigSegvAction(void) {
10153 struct sigaction act
;
10155 sigemptyset (&act
.sa_mask
);
10156 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10157 * is used. Otherwise, sa_handler is used */
10158 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10159 act
.sa_sigaction
= segvHandler
;
10160 sigaction (SIGSEGV
, &act
, NULL
);
10161 sigaction (SIGBUS
, &act
, NULL
);
10162 sigaction (SIGFPE
, &act
, NULL
);
10163 sigaction (SIGILL
, &act
, NULL
);
10164 sigaction (SIGBUS
, &act
, NULL
);
10168 #include "staticsymbols.h"
10169 /* This function try to convert a pointer into a function name. It's used in
10170 * oreder to provide a backtrace under segmentation fault that's able to
10171 * display functions declared as static (otherwise the backtrace is useless). */
10172 static char *findFuncName(void *pointer
, unsigned long *offset
){
10174 unsigned long off
, minoff
= 0;
10176 /* Try to match against the Symbol with the smallest offset */
10177 for (i
=0; symsTable
[i
].pointer
; i
++) {
10178 unsigned long lp
= (unsigned long) pointer
;
10180 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10181 off
=lp
-symsTable
[i
].pointer
;
10182 if (ret
< 0 || off
< minoff
) {
10188 if (ret
== -1) return NULL
;
10190 return symsTable
[ret
].name
;
10192 #else /* HAVE_BACKTRACE */
10193 static void setupSigSegvAction(void) {
10195 #endif /* HAVE_BACKTRACE */