2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 #define REDIS_SHARED_INTEGERS 10000
509 struct sharedObjectsStruct
{
510 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
511 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
512 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
513 *outofrangeerr
, *plus
,
514 *select0
, *select1
, *select2
, *select3
, *select4
,
515 *select5
, *select6
, *select7
, *select8
, *select9
,
516 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
517 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
520 /* Global vars that are actally used as constants. The following double
521 * values are used for double on-disk serialization, and are initialized
522 * at runtime to avoid strange compiler optimizations. */
524 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
526 /* VM threaded I/O request message */
527 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
528 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
529 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
530 typedef struct iojob
{
531 int type
; /* Request type, REDIS_IOJOB_* */
532 redisDb
*db
;/* Redis database */
533 robj
*key
; /* This I/O request is about swapping this key */
534 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
535 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
536 off_t page
; /* Swap page where to read/write the object */
537 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
538 int canceled
; /* True if this command was canceled by blocking side of VM */
539 pthread_t thread
; /* ID of the thread processing this entry */
542 /*================================ Prototypes =============================== */
544 static void freeStringObject(robj
*o
);
545 static void freeListObject(robj
*o
);
546 static void freeSetObject(robj
*o
);
547 static void decrRefCount(void *o
);
548 static robj
*createObject(int type
, void *ptr
);
549 static void freeClient(redisClient
*c
);
550 static int rdbLoad(char *filename
);
551 static void addReply(redisClient
*c
, robj
*obj
);
552 static void addReplySds(redisClient
*c
, sds s
);
553 static void incrRefCount(robj
*o
);
554 static int rdbSaveBackground(char *filename
);
555 static robj
*createStringObject(char *ptr
, size_t len
);
556 static robj
*dupStringObject(robj
*o
);
557 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
558 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
559 static int syncWithMaster(void);
560 static robj
*tryObjectEncoding(robj
*o
);
561 static robj
*getDecodedObject(robj
*o
);
562 static int removeExpire(redisDb
*db
, robj
*key
);
563 static int expireIfNeeded(redisDb
*db
, robj
*key
);
564 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
565 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
566 static int deleteKey(redisDb
*db
, robj
*key
);
567 static time_t getExpire(redisDb
*db
, robj
*key
);
568 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
569 static void updateSlavesWaitingBgsave(int bgsaveerr
);
570 static void freeMemoryIfNeeded(void);
571 static int processCommand(redisClient
*c
);
572 static void setupSigSegvAction(void);
573 static void rdbRemoveTempFile(pid_t childpid
);
574 static void aofRemoveTempFile(pid_t childpid
);
575 static size_t stringObjectLen(robj
*o
);
576 static void processInputBuffer(redisClient
*c
);
577 static zskiplist
*zslCreate(void);
578 static void zslFree(zskiplist
*zsl
);
579 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
580 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
581 static void initClientMultiState(redisClient
*c
);
582 static void freeClientMultiState(redisClient
*c
);
583 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
584 static void unblockClientWaitingData(redisClient
*c
);
585 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
586 static void vmInit(void);
587 static void vmMarkPagesFree(off_t page
, off_t count
);
588 static robj
*vmLoadObject(robj
*key
);
589 static robj
*vmPreviewObject(robj
*key
);
590 static int vmSwapOneObjectBlocking(void);
591 static int vmSwapOneObjectThreaded(void);
592 static int vmCanSwapOut(void);
593 static int tryFreeOneObjectFromFreelist(void);
594 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
596 static void vmCancelThreadedIOJob(robj
*o
);
597 static void lockThreadedIO(void);
598 static void unlockThreadedIO(void);
599 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
600 static void freeIOJob(iojob
*j
);
601 static void queueIOJob(iojob
*j
);
602 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
603 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
604 static void waitEmptyIOJobsQueue(void);
605 static void vmReopenSwapFile(void);
606 static int vmFreePage(off_t page
);
607 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
608 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
609 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
610 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
611 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
612 static struct redisCommand
*lookupCommand(char *name
);
613 static void call(redisClient
*c
, struct redisCommand
*cmd
);
614 static void resetClient(redisClient
*c
);
615 static void convertToRealHash(robj
*o
);
616 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
617 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
618 static void freePubsubPattern(void *p
);
619 static int listMatchPubsubPattern(void *a
, void *b
);
620 static int compareStringObjects(robj
*a
, robj
*b
);
623 static void authCommand(redisClient
*c
);
624 static void pingCommand(redisClient
*c
);
625 static void echoCommand(redisClient
*c
);
626 static void setCommand(redisClient
*c
);
627 static void setnxCommand(redisClient
*c
);
628 static void getCommand(redisClient
*c
);
629 static void delCommand(redisClient
*c
);
630 static void existsCommand(redisClient
*c
);
631 static void incrCommand(redisClient
*c
);
632 static void decrCommand(redisClient
*c
);
633 static void incrbyCommand(redisClient
*c
);
634 static void decrbyCommand(redisClient
*c
);
635 static void selectCommand(redisClient
*c
);
636 static void randomkeyCommand(redisClient
*c
);
637 static void keysCommand(redisClient
*c
);
638 static void dbsizeCommand(redisClient
*c
);
639 static void lastsaveCommand(redisClient
*c
);
640 static void saveCommand(redisClient
*c
);
641 static void bgsaveCommand(redisClient
*c
);
642 static void bgrewriteaofCommand(redisClient
*c
);
643 static void shutdownCommand(redisClient
*c
);
644 static void moveCommand(redisClient
*c
);
645 static void renameCommand(redisClient
*c
);
646 static void renamenxCommand(redisClient
*c
);
647 static void lpushCommand(redisClient
*c
);
648 static void rpushCommand(redisClient
*c
);
649 static void lpopCommand(redisClient
*c
);
650 static void rpopCommand(redisClient
*c
);
651 static void llenCommand(redisClient
*c
);
652 static void lindexCommand(redisClient
*c
);
653 static void lrangeCommand(redisClient
*c
);
654 static void ltrimCommand(redisClient
*c
);
655 static void typeCommand(redisClient
*c
);
656 static void lsetCommand(redisClient
*c
);
657 static void saddCommand(redisClient
*c
);
658 static void sremCommand(redisClient
*c
);
659 static void smoveCommand(redisClient
*c
);
660 static void sismemberCommand(redisClient
*c
);
661 static void scardCommand(redisClient
*c
);
662 static void spopCommand(redisClient
*c
);
663 static void srandmemberCommand(redisClient
*c
);
664 static void sinterCommand(redisClient
*c
);
665 static void sinterstoreCommand(redisClient
*c
);
666 static void sunionCommand(redisClient
*c
);
667 static void sunionstoreCommand(redisClient
*c
);
668 static void sdiffCommand(redisClient
*c
);
669 static void sdiffstoreCommand(redisClient
*c
);
670 static void syncCommand(redisClient
*c
);
671 static void flushdbCommand(redisClient
*c
);
672 static void flushallCommand(redisClient
*c
);
673 static void sortCommand(redisClient
*c
);
674 static void lremCommand(redisClient
*c
);
675 static void rpoplpushcommand(redisClient
*c
);
676 static void infoCommand(redisClient
*c
);
677 static void mgetCommand(redisClient
*c
);
678 static void monitorCommand(redisClient
*c
);
679 static void expireCommand(redisClient
*c
);
680 static void expireatCommand(redisClient
*c
);
681 static void getsetCommand(redisClient
*c
);
682 static void ttlCommand(redisClient
*c
);
683 static void slaveofCommand(redisClient
*c
);
684 static void debugCommand(redisClient
*c
);
685 static void msetCommand(redisClient
*c
);
686 static void msetnxCommand(redisClient
*c
);
687 static void zaddCommand(redisClient
*c
);
688 static void zincrbyCommand(redisClient
*c
);
689 static void zrangeCommand(redisClient
*c
);
690 static void zrangebyscoreCommand(redisClient
*c
);
691 static void zcountCommand(redisClient
*c
);
692 static void zrevrangeCommand(redisClient
*c
);
693 static void zcardCommand(redisClient
*c
);
694 static void zremCommand(redisClient
*c
);
695 static void zscoreCommand(redisClient
*c
);
696 static void zremrangebyscoreCommand(redisClient
*c
);
697 static void multiCommand(redisClient
*c
);
698 static void execCommand(redisClient
*c
);
699 static void discardCommand(redisClient
*c
);
700 static void blpopCommand(redisClient
*c
);
701 static void brpopCommand(redisClient
*c
);
702 static void appendCommand(redisClient
*c
);
703 static void substrCommand(redisClient
*c
);
704 static void zrankCommand(redisClient
*c
);
705 static void zrevrankCommand(redisClient
*c
);
706 static void hsetCommand(redisClient
*c
);
707 static void hgetCommand(redisClient
*c
);
708 static void hmsetCommand(redisClient
*c
);
709 static void hmgetCommand(redisClient
*c
);
710 static void hdelCommand(redisClient
*c
);
711 static void hlenCommand(redisClient
*c
);
712 static void zremrangebyrankCommand(redisClient
*c
);
713 static void zunionCommand(redisClient
*c
);
714 static void zinterCommand(redisClient
*c
);
715 static void hkeysCommand(redisClient
*c
);
716 static void hvalsCommand(redisClient
*c
);
717 static void hgetallCommand(redisClient
*c
);
718 static void hexistsCommand(redisClient
*c
);
719 static void configCommand(redisClient
*c
);
720 static void hincrbyCommand(redisClient
*c
);
721 static void subscribeCommand(redisClient
*c
);
722 static void unsubscribeCommand(redisClient
*c
);
723 static void psubscribeCommand(redisClient
*c
);
724 static void punsubscribeCommand(redisClient
*c
);
725 static void publishCommand(redisClient
*c
);
727 /*================================= Globals ================================= */
730 static struct redisServer server
; /* server global state */
731 static struct redisCommand cmdTable
[] = {
732 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
733 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
734 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
735 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
736 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
738 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
739 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
740 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
742 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
743 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
751 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
754 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
755 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
758 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
759 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
761 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
763 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
764 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
765 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
766 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
767 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
768 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
769 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
772 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
775 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
776 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
781 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
785 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
788 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
789 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
792 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
794 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
795 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
799 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
800 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
812 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
820 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
828 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
831 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
836 {NULL
,NULL
,0,0,NULL
,0,0,0}
839 /*============================ Utility functions ============================ */
841 /* Glob-style pattern matching. */
842 static int stringmatchlen(const char *pattern
, int patternLen
,
843 const char *string
, int stringLen
, int nocase
)
848 while (pattern
[1] == '*') {
853 return 1; /* match */
855 if (stringmatchlen(pattern
+1, patternLen
-1,
856 string
, stringLen
, nocase
))
857 return 1; /* match */
861 return 0; /* no match */
865 return 0; /* no match */
875 not = pattern
[0] == '^';
882 if (pattern
[0] == '\\') {
885 if (pattern
[0] == string
[0])
887 } else if (pattern
[0] == ']') {
889 } else if (patternLen
== 0) {
893 } else if (pattern
[1] == '-' && patternLen
>= 3) {
894 int start
= pattern
[0];
895 int end
= pattern
[2];
903 start
= tolower(start
);
909 if (c
>= start
&& c
<= end
)
913 if (pattern
[0] == string
[0])
916 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
926 return 0; /* no match */
932 if (patternLen
>= 2) {
939 if (pattern
[0] != string
[0])
940 return 0; /* no match */
942 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
943 return 0; /* no match */
951 if (stringLen
== 0) {
952 while(*pattern
== '*') {
959 if (patternLen
== 0 && stringLen
== 0)
964 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
965 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
968 static void redisLog(int level
, const char *fmt
, ...) {
972 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
976 if (level
>= server
.verbosity
) {
982 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
983 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
984 vfprintf(fp
, fmt
, ap
);
990 if (server
.logfile
) fclose(fp
);
993 /*====================== Hash table type implementation ==================== */
995 /* This is an hash table type that uses the SDS dynamic strings libary as
996 * keys and radis objects as values (objects can hold SDS strings,
999 static void dictVanillaFree(void *privdata
, void *val
)
1001 DICT_NOTUSED(privdata
);
1005 static void dictListDestructor(void *privdata
, void *val
)
1007 DICT_NOTUSED(privdata
);
1008 listRelease((list
*)val
);
1011 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1015 DICT_NOTUSED(privdata
);
1017 l1
= sdslen((sds
)key1
);
1018 l2
= sdslen((sds
)key2
);
1019 if (l1
!= l2
) return 0;
1020 return memcmp(key1
, key2
, l1
) == 0;
1023 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1025 DICT_NOTUSED(privdata
);
1027 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1031 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1034 const robj
*o1
= key1
, *o2
= key2
;
1035 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1038 static unsigned int dictObjHash(const void *key
) {
1039 const robj
*o
= key
;
1040 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1043 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1046 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1049 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1050 o2
->encoding
== REDIS_ENCODING_INT
&&
1051 o1
->ptr
== o2
->ptr
) return 1;
1053 o1
= getDecodedObject(o1
);
1054 o2
= getDecodedObject(o2
);
1055 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1061 static unsigned int dictEncObjHash(const void *key
) {
1062 robj
*o
= (robj
*) key
;
1064 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1065 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1067 if (o
->encoding
== REDIS_ENCODING_INT
) {
1071 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1072 return dictGenHashFunction((unsigned char*)buf
, len
);
1076 o
= getDecodedObject(o
);
1077 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1084 /* Sets type and expires */
1085 static dictType setDictType
= {
1086 dictEncObjHash
, /* hash function */
1089 dictEncObjKeyCompare
, /* key compare */
1090 dictRedisObjectDestructor
, /* key destructor */
1091 NULL
/* val destructor */
1094 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1095 static dictType zsetDictType
= {
1096 dictEncObjHash
, /* hash function */
1099 dictEncObjKeyCompare
, /* key compare */
1100 dictRedisObjectDestructor
, /* key destructor */
1101 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1105 static dictType dbDictType
= {
1106 dictObjHash
, /* hash function */
1109 dictObjKeyCompare
, /* key compare */
1110 dictRedisObjectDestructor
, /* key destructor */
1111 dictRedisObjectDestructor
/* val destructor */
1115 static dictType keyptrDictType
= {
1116 dictObjHash
, /* hash function */
1119 dictObjKeyCompare
, /* key compare */
1120 dictRedisObjectDestructor
, /* key destructor */
1121 NULL
/* val destructor */
1124 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1125 static dictType hashDictType
= {
1126 dictEncObjHash
, /* hash function */
1129 dictEncObjKeyCompare
, /* key compare */
1130 dictRedisObjectDestructor
, /* key destructor */
1131 dictRedisObjectDestructor
/* val destructor */
1134 /* Keylist hash table type has unencoded redis objects as keys and
1135 * lists as values. It's used for blocking operations (BLPOP) and to
1136 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1137 static dictType keylistDictType
= {
1138 dictObjHash
, /* hash function */
1141 dictObjKeyCompare
, /* key compare */
1142 dictRedisObjectDestructor
, /* key destructor */
1143 dictListDestructor
/* val destructor */
1146 static void version();
1148 /* ========================= Random utility functions ======================= */
1150 /* Redis generally does not try to recover from out of memory conditions
1151 * when allocating objects or strings, it is not clear if it will be possible
1152 * to report this condition to the client since the networking layer itself
1153 * is based on heap allocation for send buffers, so we simply abort.
1154 * At least the code will be simpler to read... */
1155 static void oom(const char *msg
) {
1156 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1161 /* ====================== Redis server networking stuff ===================== */
1162 static void closeTimedoutClients(void) {
1165 time_t now
= time(NULL
);
1168 listRewind(server
.clients
,&li
);
1169 while ((ln
= listNext(&li
)) != NULL
) {
1170 c
= listNodeValue(ln
);
1171 if (server
.maxidletime
&&
1172 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1173 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1174 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1175 listLength(c
->pubsub_patterns
) == 0 &&
1176 (now
- c
->lastinteraction
> server
.maxidletime
))
1178 redisLog(REDIS_VERBOSE
,"Closing idle client");
1180 } else if (c
->flags
& REDIS_BLOCKED
) {
1181 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1182 addReply(c
,shared
.nullmultibulk
);
1183 unblockClientWaitingData(c
);
1189 static int htNeedsResize(dict
*dict
) {
1190 long long size
, used
;
1192 size
= dictSlots(dict
);
1193 used
= dictSize(dict
);
1194 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1195 (used
*100/size
< REDIS_HT_MINFILL
));
1198 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1199 * we resize the hash table to save memory */
1200 static void tryResizeHashTables(void) {
1203 for (j
= 0; j
< server
.dbnum
; j
++) {
1204 if (htNeedsResize(server
.db
[j
].dict
)) {
1205 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1206 dictResize(server
.db
[j
].dict
);
1207 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1209 if (htNeedsResize(server
.db
[j
].expires
))
1210 dictResize(server
.db
[j
].expires
);
1214 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1215 void backgroundSaveDoneHandler(int statloc
) {
1216 int exitcode
= WEXITSTATUS(statloc
);
1217 int bysignal
= WIFSIGNALED(statloc
);
1219 if (!bysignal
&& exitcode
== 0) {
1220 redisLog(REDIS_NOTICE
,
1221 "Background saving terminated with success");
1223 server
.lastsave
= time(NULL
);
1224 } else if (!bysignal
&& exitcode
!= 0) {
1225 redisLog(REDIS_WARNING
, "Background saving error");
1227 redisLog(REDIS_WARNING
,
1228 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1229 rdbRemoveTempFile(server
.bgsavechildpid
);
1231 server
.bgsavechildpid
= -1;
1232 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1233 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1234 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1237 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1239 void backgroundRewriteDoneHandler(int statloc
) {
1240 int exitcode
= WEXITSTATUS(statloc
);
1241 int bysignal
= WIFSIGNALED(statloc
);
1243 if (!bysignal
&& exitcode
== 0) {
1247 redisLog(REDIS_NOTICE
,
1248 "Background append only file rewriting terminated with success");
1249 /* Now it's time to flush the differences accumulated by the parent */
1250 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1251 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1253 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1256 /* Flush our data... */
1257 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1258 (signed) sdslen(server
.bgrewritebuf
)) {
1259 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1263 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1264 /* Now our work is to rename the temp file into the stable file. And
1265 * switch the file descriptor used by the server for append only. */
1266 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1267 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1271 /* Mission completed... almost */
1272 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1273 if (server
.appendfd
!= -1) {
1274 /* If append only is actually enabled... */
1275 close(server
.appendfd
);
1276 server
.appendfd
= fd
;
1278 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1279 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1281 /* If append only is disabled we just generate a dump in this
1282 * format. Why not? */
1285 } else if (!bysignal
&& exitcode
!= 0) {
1286 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1288 redisLog(REDIS_WARNING
,
1289 "Background append only file rewriting terminated by signal %d",
1293 sdsfree(server
.bgrewritebuf
);
1294 server
.bgrewritebuf
= sdsempty();
1295 aofRemoveTempFile(server
.bgrewritechildpid
);
1296 server
.bgrewritechildpid
= -1;
1299 /* This function is called once a background process of some kind terminates,
1300 * as we want to avoid resizing the hash tables when there is a child in order
1301 * to play well with copy-on-write (otherwise when a resize happens lots of
1302 * memory pages are copied). The goal of this function is to update the ability
1303 * for dict.c to resize the hash tables accordingly to the fact we have o not
1304 * running childs. */
1305 static void updateDictResizePolicy(void) {
1306 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1309 dictDisableResize();
1312 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1313 int j
, loops
= server
.cronloops
++;
1314 REDIS_NOTUSED(eventLoop
);
1316 REDIS_NOTUSED(clientData
);
1318 /* We take a cached value of the unix time in the global state because
1319 * with virtual memory and aging there is to store the current time
1320 * in objects at every object access, and accuracy is not needed.
1321 * To access a global var is faster than calling time(NULL) */
1322 server
.unixtime
= time(NULL
);
1324 /* Show some info about non-empty databases */
1325 for (j
= 0; j
< server
.dbnum
; j
++) {
1326 long long size
, used
, vkeys
;
1328 size
= dictSlots(server
.db
[j
].dict
);
1329 used
= dictSize(server
.db
[j
].dict
);
1330 vkeys
= dictSize(server
.db
[j
].expires
);
1331 if (!(loops
% 50) && (used
|| vkeys
)) {
1332 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1333 /* dictPrintStats(server.dict); */
1337 /* We don't want to resize the hash tables while a bacground saving
1338 * is in progress: the saving child is created using fork() that is
1339 * implemented with a copy-on-write semantic in most modern systems, so
1340 * if we resize the HT while there is the saving child at work actually
1341 * a lot of memory movements in the parent will cause a lot of pages
1343 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1346 tryResizeHashTables();
1349 /* Show information about connected clients */
1350 if (!(loops
% 50)) {
1351 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1352 listLength(server
.clients
)-listLength(server
.slaves
),
1353 listLength(server
.slaves
),
1354 zmalloc_used_memory());
1357 /* Close connections of timedout clients */
1358 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1359 closeTimedoutClients();
1361 /* Check if a background saving or AOF rewrite in progress terminated */
1362 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1366 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1367 if (pid
== server
.bgsavechildpid
) {
1368 backgroundSaveDoneHandler(statloc
);
1370 backgroundRewriteDoneHandler(statloc
);
1372 updateDictResizePolicy();
1375 /* If there is not a background saving in progress check if
1376 * we have to save now */
1377 time_t now
= time(NULL
);
1378 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1379 struct saveparam
*sp
= server
.saveparams
+j
;
1381 if (server
.dirty
>= sp
->changes
&&
1382 now
-server
.lastsave
> sp
->seconds
) {
1383 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1384 sp
->changes
, sp
->seconds
);
1385 rdbSaveBackground(server
.dbfilename
);
1391 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1392 * will use few CPU cycles if there are few expiring keys, otherwise
1393 * it will get more aggressive to avoid that too much memory is used by
1394 * keys that can be removed from the keyspace. */
1395 for (j
= 0; j
< server
.dbnum
; j
++) {
1397 redisDb
*db
= server
.db
+j
;
1399 /* Continue to expire if at the end of the cycle more than 25%
1400 * of the keys were expired. */
1402 long num
= dictSize(db
->expires
);
1403 time_t now
= time(NULL
);
1406 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1407 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1412 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1413 t
= (time_t) dictGetEntryVal(de
);
1415 deleteKey(db
,dictGetEntryKey(de
));
1417 server
.stat_expiredkeys
++;
1420 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1423 /* Swap a few keys on disk if we are over the memory limit and VM
1424 * is enbled. Try to free objects from the free list first. */
1425 if (vmCanSwapOut()) {
1426 while (server
.vm_enabled
&& zmalloc_used_memory() >
1427 server
.vm_max_memory
)
1431 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1432 retval
= (server
.vm_max_threads
== 0) ?
1433 vmSwapOneObjectBlocking() :
1434 vmSwapOneObjectThreaded();
1435 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1436 zmalloc_used_memory() >
1437 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1439 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1441 /* Note that when using threade I/O we free just one object,
1442 * because anyway when the I/O thread in charge to swap this
1443 * object out will finish, the handler of completed jobs
1444 * will try to swap more objects if we are still out of memory. */
1445 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1449 /* Check if we should connect to a MASTER */
1450 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1451 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1452 if (syncWithMaster() == REDIS_OK
) {
1453 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1459 /* This function gets called every time Redis is entering the
1460 * main loop of the event driven library, that is, before to sleep
1461 * for ready file descriptors. */
1462 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1463 REDIS_NOTUSED(eventLoop
);
1465 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1469 listRewind(server
.io_ready_clients
,&li
);
1470 while((ln
= listNext(&li
))) {
1471 redisClient
*c
= ln
->value
;
1472 struct redisCommand
*cmd
;
1474 /* Resume the client. */
1475 listDelNode(server
.io_ready_clients
,ln
);
1476 c
->flags
&= (~REDIS_IO_WAIT
);
1477 server
.vm_blocked_clients
--;
1478 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1479 readQueryFromClient
, c
);
1480 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1481 assert(cmd
!= NULL
);
1484 /* There may be more data to process in the input buffer. */
1485 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1486 processInputBuffer(c
);
1491 static void createSharedObjects(void) {
1494 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1495 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1496 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1497 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1498 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1499 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1500 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1501 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1502 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1503 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1504 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1505 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1506 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1507 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1508 "-ERR no such key\r\n"));
1509 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1510 "-ERR syntax error\r\n"));
1511 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1512 "-ERR source and destination objects are the same\r\n"));
1513 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1514 "-ERR index out of range\r\n"));
1515 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1516 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1517 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1518 shared
.select0
= createStringObject("select 0\r\n",10);
1519 shared
.select1
= createStringObject("select 1\r\n",10);
1520 shared
.select2
= createStringObject("select 2\r\n",10);
1521 shared
.select3
= createStringObject("select 3\r\n",10);
1522 shared
.select4
= createStringObject("select 4\r\n",10);
1523 shared
.select5
= createStringObject("select 5\r\n",10);
1524 shared
.select6
= createStringObject("select 6\r\n",10);
1525 shared
.select7
= createStringObject("select 7\r\n",10);
1526 shared
.select8
= createStringObject("select 8\r\n",10);
1527 shared
.select9
= createStringObject("select 9\r\n",10);
1528 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1529 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1530 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1531 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1532 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1533 shared
.mbulk3
= createStringObject("*3\r\n",4);
1534 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1535 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1536 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1540 static void appendServerSaveParams(time_t seconds
, int changes
) {
1541 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1542 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1543 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1544 server
.saveparamslen
++;
1547 static void resetServerSaveParams() {
1548 zfree(server
.saveparams
);
1549 server
.saveparams
= NULL
;
1550 server
.saveparamslen
= 0;
1553 static void initServerConfig() {
1554 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1555 server
.port
= REDIS_SERVERPORT
;
1556 server
.verbosity
= REDIS_VERBOSE
;
1557 server
.maxidletime
= REDIS_MAXIDLETIME
;
1558 server
.saveparams
= NULL
;
1559 server
.logfile
= NULL
; /* NULL = log on standard output */
1560 server
.bindaddr
= NULL
;
1561 server
.glueoutputbuf
= 1;
1562 server
.daemonize
= 0;
1563 server
.appendonly
= 0;
1564 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1565 server
.lastfsync
= time(NULL
);
1566 server
.appendfd
= -1;
1567 server
.appendseldb
= -1; /* Make sure the first time will not match */
1568 server
.pidfile
= zstrdup("/var/run/redis.pid");
1569 server
.dbfilename
= zstrdup("dump.rdb");
1570 server
.appendfilename
= zstrdup("appendonly.aof");
1571 server
.requirepass
= NULL
;
1572 server
.shareobjects
= 0;
1573 server
.rdbcompression
= 1;
1574 server
.maxclients
= 0;
1575 server
.blpop_blocked_clients
= 0;
1576 server
.maxmemory
= 0;
1577 server
.vm_enabled
= 0;
1578 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1579 server
.vm_page_size
= 256; /* 256 bytes per page */
1580 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1581 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1582 server
.vm_max_threads
= 4;
1583 server
.vm_blocked_clients
= 0;
1584 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1585 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1587 resetServerSaveParams();
1589 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1590 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1591 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1592 /* Replication related */
1594 server
.masterauth
= NULL
;
1595 server
.masterhost
= NULL
;
1596 server
.masterport
= 6379;
1597 server
.master
= NULL
;
1598 server
.replstate
= REDIS_REPL_NONE
;
1600 /* Double constants initialization */
1602 R_PosInf
= 1.0/R_Zero
;
1603 R_NegInf
= -1.0/R_Zero
;
1604 R_Nan
= R_Zero
/R_Zero
;
1607 static void initServer() {
1610 signal(SIGHUP
, SIG_IGN
);
1611 signal(SIGPIPE
, SIG_IGN
);
1612 setupSigSegvAction();
1614 server
.devnull
= fopen("/dev/null","w");
1615 if (server
.devnull
== NULL
) {
1616 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1619 server
.clients
= listCreate();
1620 server
.slaves
= listCreate();
1621 server
.monitors
= listCreate();
1622 server
.objfreelist
= listCreate();
1623 createSharedObjects();
1624 server
.el
= aeCreateEventLoop();
1625 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1626 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1627 if (server
.fd
== -1) {
1628 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1631 for (j
= 0; j
< server
.dbnum
; j
++) {
1632 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1633 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1634 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1635 if (server
.vm_enabled
)
1636 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1637 server
.db
[j
].id
= j
;
1639 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1640 server
.pubsub_patterns
= listCreate();
1641 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1642 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1643 server
.cronloops
= 0;
1644 server
.bgsavechildpid
= -1;
1645 server
.bgrewritechildpid
= -1;
1646 server
.bgrewritebuf
= sdsempty();
1647 server
.lastsave
= time(NULL
);
1649 server
.stat_numcommands
= 0;
1650 server
.stat_numconnections
= 0;
1651 server
.stat_expiredkeys
= 0;
1652 server
.stat_starttime
= time(NULL
);
1653 server
.unixtime
= time(NULL
);
1654 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1655 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1656 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1658 if (server
.appendonly
) {
1659 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1660 if (server
.appendfd
== -1) {
1661 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1667 if (server
.vm_enabled
) vmInit();
1670 /* Empty the whole database */
1671 static long long emptyDb() {
1673 long long removed
= 0;
1675 for (j
= 0; j
< server
.dbnum
; j
++) {
1676 removed
+= dictSize(server
.db
[j
].dict
);
1677 dictEmpty(server
.db
[j
].dict
);
1678 dictEmpty(server
.db
[j
].expires
);
1683 static int yesnotoi(char *s
) {
1684 if (!strcasecmp(s
,"yes")) return 1;
1685 else if (!strcasecmp(s
,"no")) return 0;
1689 /* I agree, this is a very rudimental way to load a configuration...
1690 will improve later if the config gets more complex */
1691 static void loadServerConfig(char *filename
) {
1693 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1697 if (filename
[0] == '-' && filename
[1] == '\0')
1700 if ((fp
= fopen(filename
,"r")) == NULL
) {
1701 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1706 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1712 line
= sdstrim(line
," \t\r\n");
1714 /* Skip comments and blank lines*/
1715 if (line
[0] == '#' || line
[0] == '\0') {
1720 /* Split into arguments */
1721 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1722 sdstolower(argv
[0]);
1724 /* Execute config directives */
1725 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1726 server
.maxidletime
= atoi(argv
[1]);
1727 if (server
.maxidletime
< 0) {
1728 err
= "Invalid timeout value"; goto loaderr
;
1730 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1731 server
.port
= atoi(argv
[1]);
1732 if (server
.port
< 1 || server
.port
> 65535) {
1733 err
= "Invalid port"; goto loaderr
;
1735 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1736 server
.bindaddr
= zstrdup(argv
[1]);
1737 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1738 int seconds
= atoi(argv
[1]);
1739 int changes
= atoi(argv
[2]);
1740 if (seconds
< 1 || changes
< 0) {
1741 err
= "Invalid save parameters"; goto loaderr
;
1743 appendServerSaveParams(seconds
,changes
);
1744 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1745 if (chdir(argv
[1]) == -1) {
1746 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1747 argv
[1], strerror(errno
));
1750 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1751 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1752 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1753 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1754 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1756 err
= "Invalid log level. Must be one of debug, notice, warning";
1759 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1762 server
.logfile
= zstrdup(argv
[1]);
1763 if (!strcasecmp(server
.logfile
,"stdout")) {
1764 zfree(server
.logfile
);
1765 server
.logfile
= NULL
;
1767 if (server
.logfile
) {
1768 /* Test if we are able to open the file. The server will not
1769 * be able to abort just for this problem later... */
1770 logfp
= fopen(server
.logfile
,"a");
1771 if (logfp
== NULL
) {
1772 err
= sdscatprintf(sdsempty(),
1773 "Can't open the log file: %s", strerror(errno
));
1778 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1779 server
.dbnum
= atoi(argv
[1]);
1780 if (server
.dbnum
< 1) {
1781 err
= "Invalid number of databases"; goto loaderr
;
1783 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1784 loadServerConfig(argv
[1]);
1785 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1786 server
.maxclients
= atoi(argv
[1]);
1787 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1788 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1789 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1790 server
.masterhost
= sdsnew(argv
[1]);
1791 server
.masterport
= atoi(argv
[2]);
1792 server
.replstate
= REDIS_REPL_CONNECT
;
1793 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1794 server
.masterauth
= zstrdup(argv
[1]);
1795 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1796 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1797 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1799 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1800 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1801 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1803 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1804 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1805 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1807 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1808 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1809 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1811 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1812 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1813 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1815 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1816 if (!strcasecmp(argv
[1],"no")) {
1817 server
.appendfsync
= APPENDFSYNC_NO
;
1818 } else if (!strcasecmp(argv
[1],"always")) {
1819 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1820 } else if (!strcasecmp(argv
[1],"everysec")) {
1821 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1823 err
= "argument must be 'no', 'always' or 'everysec'";
1826 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1827 server
.requirepass
= zstrdup(argv
[1]);
1828 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1829 zfree(server
.pidfile
);
1830 server
.pidfile
= zstrdup(argv
[1]);
1831 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1832 zfree(server
.dbfilename
);
1833 server
.dbfilename
= zstrdup(argv
[1]);
1834 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1835 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1836 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1838 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1839 zfree(server
.vm_swap_file
);
1840 server
.vm_swap_file
= zstrdup(argv
[1]);
1841 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1842 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1843 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1844 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1845 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1846 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1847 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1848 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1849 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1850 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1851 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1852 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1853 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1854 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1856 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1858 for (j
= 0; j
< argc
; j
++)
1863 if (fp
!= stdin
) fclose(fp
);
1867 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1868 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1869 fprintf(stderr
, ">>> '%s'\n", line
);
1870 fprintf(stderr
, "%s\n", err
);
1874 static void freeClientArgv(redisClient
*c
) {
1877 for (j
= 0; j
< c
->argc
; j
++)
1878 decrRefCount(c
->argv
[j
]);
1879 for (j
= 0; j
< c
->mbargc
; j
++)
1880 decrRefCount(c
->mbargv
[j
]);
1885 static void freeClient(redisClient
*c
) {
1888 /* Note that if the client we are freeing is blocked into a blocking
1889 * call, we have to set querybuf to NULL *before* to call
1890 * unblockClientWaitingData() to avoid processInputBuffer() will get
1891 * called. Also it is important to remove the file events after
1892 * this, because this call adds the READABLE event. */
1893 sdsfree(c
->querybuf
);
1895 if (c
->flags
& REDIS_BLOCKED
)
1896 unblockClientWaitingData(c
);
1898 /* Unsubscribe from all the pubsub channels */
1899 pubsubUnsubscribeAllChannels(c
,0);
1900 pubsubUnsubscribeAllPatterns(c
,0);
1901 dictRelease(c
->pubsub_channels
);
1902 listRelease(c
->pubsub_patterns
);
1903 /* Obvious cleanup */
1904 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1905 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1906 listRelease(c
->reply
);
1909 /* Remove from the list of clients */
1910 ln
= listSearchKey(server
.clients
,c
);
1911 redisAssert(ln
!= NULL
);
1912 listDelNode(server
.clients
,ln
);
1913 /* Remove from the list of clients waiting for swapped keys */
1914 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1915 ln
= listSearchKey(server
.io_ready_clients
,c
);
1917 listDelNode(server
.io_ready_clients
,ln
);
1918 server
.vm_blocked_clients
--;
1921 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1922 ln
= listFirst(c
->io_keys
);
1923 dontWaitForSwappedKey(c
,ln
->value
);
1925 listRelease(c
->io_keys
);
1926 /* Master/slave cleanup */
1927 if (c
->flags
& REDIS_SLAVE
) {
1928 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1930 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1931 ln
= listSearchKey(l
,c
);
1932 redisAssert(ln
!= NULL
);
1935 if (c
->flags
& REDIS_MASTER
) {
1936 server
.master
= NULL
;
1937 server
.replstate
= REDIS_REPL_CONNECT
;
1939 /* Release memory */
1942 freeClientMultiState(c
);
1946 #define GLUEREPLY_UP_TO (1024)
1947 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1949 char buf
[GLUEREPLY_UP_TO
];
1954 listRewind(c
->reply
,&li
);
1955 while((ln
= listNext(&li
))) {
1959 objlen
= sdslen(o
->ptr
);
1960 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1961 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1963 listDelNode(c
->reply
,ln
);
1965 if (copylen
== 0) return;
1969 /* Now the output buffer is empty, add the new single element */
1970 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1971 listAddNodeHead(c
->reply
,o
);
1974 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1975 redisClient
*c
= privdata
;
1976 int nwritten
= 0, totwritten
= 0, objlen
;
1979 REDIS_NOTUSED(mask
);
1981 /* Use writev() if we have enough buffers to send */
1982 if (!server
.glueoutputbuf
&&
1983 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1984 !(c
->flags
& REDIS_MASTER
))
1986 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1990 while(listLength(c
->reply
)) {
1991 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1992 glueReplyBuffersIfNeeded(c
);
1994 o
= listNodeValue(listFirst(c
->reply
));
1995 objlen
= sdslen(o
->ptr
);
1998 listDelNode(c
->reply
,listFirst(c
->reply
));
2002 if (c
->flags
& REDIS_MASTER
) {
2003 /* Don't reply to a master */
2004 nwritten
= objlen
- c
->sentlen
;
2006 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2007 if (nwritten
<= 0) break;
2009 c
->sentlen
+= nwritten
;
2010 totwritten
+= nwritten
;
2011 /* If we fully sent the object on head go to the next one */
2012 if (c
->sentlen
== objlen
) {
2013 listDelNode(c
->reply
,listFirst(c
->reply
));
2016 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2017 * bytes, in a single threaded server it's a good idea to serve
2018 * other clients as well, even if a very large request comes from
2019 * super fast link that is always able to accept data (in real world
2020 * scenario think about 'KEYS *' against the loopback interfae) */
2021 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2023 if (nwritten
== -1) {
2024 if (errno
== EAGAIN
) {
2027 redisLog(REDIS_VERBOSE
,
2028 "Error writing to client: %s", strerror(errno
));
2033 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2034 if (listLength(c
->reply
) == 0) {
2036 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2040 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2042 redisClient
*c
= privdata
;
2043 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2045 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2046 int offset
, ion
= 0;
2048 REDIS_NOTUSED(mask
);
2051 while (listLength(c
->reply
)) {
2052 offset
= c
->sentlen
;
2056 /* fill-in the iov[] array */
2057 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2058 o
= listNodeValue(node
);
2059 objlen
= sdslen(o
->ptr
);
2061 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2064 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2065 break; /* no more iovecs */
2067 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2068 iov
[ion
].iov_len
= objlen
- offset
;
2069 willwrite
+= objlen
- offset
;
2070 offset
= 0; /* just for the first item */
2077 /* write all collected blocks at once */
2078 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2079 if (errno
!= EAGAIN
) {
2080 redisLog(REDIS_VERBOSE
,
2081 "Error writing to client: %s", strerror(errno
));
2088 totwritten
+= nwritten
;
2089 offset
= c
->sentlen
;
2091 /* remove written robjs from c->reply */
2092 while (nwritten
&& listLength(c
->reply
)) {
2093 o
= listNodeValue(listFirst(c
->reply
));
2094 objlen
= sdslen(o
->ptr
);
2096 if(nwritten
>= objlen
- offset
) {
2097 listDelNode(c
->reply
, listFirst(c
->reply
));
2098 nwritten
-= objlen
- offset
;
2102 c
->sentlen
+= nwritten
;
2110 c
->lastinteraction
= time(NULL
);
2112 if (listLength(c
->reply
) == 0) {
2114 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2118 static struct redisCommand
*lookupCommand(char *name
) {
2120 while(cmdTable
[j
].name
!= NULL
) {
2121 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2127 /* resetClient prepare the client to process the next command */
2128 static void resetClient(redisClient
*c
) {
2134 /* Call() is the core of Redis execution of a command */
2135 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2138 dirty
= server
.dirty
;
2140 dirty
= server
.dirty
-dirty
;
2142 if (server
.appendonly
&& dirty
)
2143 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2144 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2145 listLength(server
.slaves
))
2146 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2147 if (listLength(server
.monitors
))
2148 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2149 server
.stat_numcommands
++;
2152 /* If this function gets called we already read a whole
2153 * command, argments are in the client argv/argc fields.
2154 * processCommand() execute the command or prepare the
2155 * server for a bulk read from the client.
2157 * If 1 is returned the client is still alive and valid and
2158 * and other operations can be performed by the caller. Otherwise
2159 * if 0 is returned the client was destroied (i.e. after QUIT). */
2160 static int processCommand(redisClient
*c
) {
2161 struct redisCommand
*cmd
;
2163 /* Free some memory if needed (maxmemory setting) */
2164 if (server
.maxmemory
) freeMemoryIfNeeded();
2166 /* Handle the multi bulk command type. This is an alternative protocol
2167 * supported by Redis in order to receive commands that are composed of
2168 * multiple binary-safe "bulk" arguments. The latency of processing is
2169 * a bit higher but this allows things like multi-sets, so if this
2170 * protocol is used only for MSET and similar commands this is a big win. */
2171 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2172 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2173 if (c
->multibulk
<= 0) {
2177 decrRefCount(c
->argv
[c
->argc
-1]);
2181 } else if (c
->multibulk
) {
2182 if (c
->bulklen
== -1) {
2183 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2184 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2188 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2189 decrRefCount(c
->argv
[0]);
2190 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2192 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2197 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2201 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2202 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2206 if (c
->multibulk
== 0) {
2210 /* Here we need to swap the multi-bulk argc/argv with the
2211 * normal argc/argv of the client structure. */
2213 c
->argv
= c
->mbargv
;
2214 c
->mbargv
= auxargv
;
2217 c
->argc
= c
->mbargc
;
2218 c
->mbargc
= auxargc
;
2220 /* We need to set bulklen to something different than -1
2221 * in order for the code below to process the command without
2222 * to try to read the last argument of a bulk command as
2223 * a special argument. */
2225 /* continue below and process the command */
2232 /* -- end of multi bulk commands processing -- */
2234 /* The QUIT command is handled as a special case. Normal command
2235 * procs are unable to close the client connection safely */
2236 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2241 /* Now lookup the command and check ASAP about trivial error conditions
2242 * such wrong arity, bad command name and so forth. */
2243 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2246 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2247 (char*)c
->argv
[0]->ptr
));
2250 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2251 (c
->argc
< -cmd
->arity
)) {
2253 sdscatprintf(sdsempty(),
2254 "-ERR wrong number of arguments for '%s' command\r\n",
2258 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2259 /* This is a bulk command, we have to read the last argument yet. */
2260 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2262 decrRefCount(c
->argv
[c
->argc
-1]);
2263 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2265 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2270 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2271 /* It is possible that the bulk read is already in the
2272 * buffer. Check this condition and handle it accordingly.
2273 * This is just a fast path, alternative to call processInputBuffer().
2274 * It's a good idea since the code is small and this condition
2275 * happens most of the times. */
2276 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2277 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2279 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2281 /* Otherwise return... there is to read the last argument
2282 * from the socket. */
2286 /* Let's try to encode the bulk object to save space. */
2287 if (cmd
->flags
& REDIS_CMD_BULK
)
2288 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2290 /* Check if the user is authenticated */
2291 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2292 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2297 /* Handle the maxmemory directive */
2298 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2299 zmalloc_used_memory() > server
.maxmemory
)
2301 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2306 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2307 if (dictSize(c
->pubsub_channels
) > 0 &&
2308 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2309 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2310 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2315 /* Exec the command */
2316 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2317 queueMultiCommand(c
,cmd
);
2318 addReply(c
,shared
.queued
);
2320 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2321 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2325 /* Prepare the client for the next command */
2330 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2335 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2336 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2337 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2338 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2341 if (argc
<= REDIS_STATIC_ARGS
) {
2344 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2347 lenobj
= createObject(REDIS_STRING
,
2348 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2349 lenobj
->refcount
= 0;
2350 outv
[outc
++] = lenobj
;
2351 for (j
= 0; j
< argc
; j
++) {
2352 lenobj
= createObject(REDIS_STRING
,
2353 sdscatprintf(sdsempty(),"$%lu\r\n",
2354 (unsigned long) stringObjectLen(argv
[j
])));
2355 lenobj
->refcount
= 0;
2356 outv
[outc
++] = lenobj
;
2357 outv
[outc
++] = argv
[j
];
2358 outv
[outc
++] = shared
.crlf
;
2361 /* Increment all the refcounts at start and decrement at end in order to
2362 * be sure to free objects if there is no slave in a replication state
2363 * able to be feed with commands */
2364 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2365 listRewind(slaves
,&li
);
2366 while((ln
= listNext(&li
))) {
2367 redisClient
*slave
= ln
->value
;
2369 /* Don't feed slaves that are still waiting for BGSAVE to start */
2370 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2372 /* Feed all the other slaves, MONITORs and so on */
2373 if (slave
->slaveseldb
!= dictid
) {
2377 case 0: selectcmd
= shared
.select0
; break;
2378 case 1: selectcmd
= shared
.select1
; break;
2379 case 2: selectcmd
= shared
.select2
; break;
2380 case 3: selectcmd
= shared
.select3
; break;
2381 case 4: selectcmd
= shared
.select4
; break;
2382 case 5: selectcmd
= shared
.select5
; break;
2383 case 6: selectcmd
= shared
.select6
; break;
2384 case 7: selectcmd
= shared
.select7
; break;
2385 case 8: selectcmd
= shared
.select8
; break;
2386 case 9: selectcmd
= shared
.select9
; break;
2388 selectcmd
= createObject(REDIS_STRING
,
2389 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2390 selectcmd
->refcount
= 0;
2393 addReply(slave
,selectcmd
);
2394 slave
->slaveseldb
= dictid
;
2396 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2398 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2399 if (outv
!= static_outv
) zfree(outv
);
2402 static void processInputBuffer(redisClient
*c
) {
2404 /* Before to process the input buffer, make sure the client is not
2405 * waitig for a blocking operation such as BLPOP. Note that the first
2406 * iteration the client is never blocked, otherwise the processInputBuffer
2407 * would not be called at all, but after the execution of the first commands
2408 * in the input buffer the client may be blocked, and the "goto again"
2409 * will try to reiterate. The following line will make it return asap. */
2410 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2411 if (c
->bulklen
== -1) {
2412 /* Read the first line of the query */
2413 char *p
= strchr(c
->querybuf
,'\n');
2420 query
= c
->querybuf
;
2421 c
->querybuf
= sdsempty();
2422 querylen
= 1+(p
-(query
));
2423 if (sdslen(query
) > querylen
) {
2424 /* leave data after the first line of the query in the buffer */
2425 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2427 *p
= '\0'; /* remove "\n" */
2428 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2429 sdsupdatelen(query
);
2431 /* Now we can split the query in arguments */
2432 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2435 if (c
->argv
) zfree(c
->argv
);
2436 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2438 for (j
= 0; j
< argc
; j
++) {
2439 if (sdslen(argv
[j
])) {
2440 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2448 /* Execute the command. If the client is still valid
2449 * after processCommand() return and there is something
2450 * on the query buffer try to process the next command. */
2451 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2453 /* Nothing to process, argc == 0. Just process the query
2454 * buffer if it's not empty or return to the caller */
2455 if (sdslen(c
->querybuf
)) goto again
;
2458 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2459 redisLog(REDIS_VERBOSE
, "Client protocol error");
2464 /* Bulk read handling. Note that if we are at this point
2465 the client already sent a command terminated with a newline,
2466 we are reading the bulk data that is actually the last
2467 argument of the command. */
2468 int qbl
= sdslen(c
->querybuf
);
2470 if (c
->bulklen
<= qbl
) {
2471 /* Copy everything but the final CRLF as final argument */
2472 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2474 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2475 /* Process the command. If the client is still valid after
2476 * the processing and there is more data in the buffer
2477 * try to parse it. */
2478 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2484 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2485 redisClient
*c
= (redisClient
*) privdata
;
2486 char buf
[REDIS_IOBUF_LEN
];
2489 REDIS_NOTUSED(mask
);
2491 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2493 if (errno
== EAGAIN
) {
2496 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2500 } else if (nread
== 0) {
2501 redisLog(REDIS_VERBOSE
, "Client closed connection");
2506 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2507 c
->lastinteraction
= time(NULL
);
2511 processInputBuffer(c
);
2514 static int selectDb(redisClient
*c
, int id
) {
2515 if (id
< 0 || id
>= server
.dbnum
)
2517 c
->db
= &server
.db
[id
];
2521 static void *dupClientReplyValue(void *o
) {
2522 incrRefCount((robj
*)o
);
2526 static int listMatchObjects(void *a
, void *b
) {
2527 return compareStringObjects(a
,b
) == 0;
2530 static redisClient
*createClient(int fd
) {
2531 redisClient
*c
= zmalloc(sizeof(*c
));
2533 anetNonBlock(NULL
,fd
);
2534 anetTcpNoDelay(NULL
,fd
);
2535 if (!c
) return NULL
;
2538 c
->querybuf
= sdsempty();
2547 c
->lastinteraction
= time(NULL
);
2548 c
->authenticated
= 0;
2549 c
->replstate
= REDIS_REPL_NONE
;
2550 c
->reply
= listCreate();
2551 listSetFreeMethod(c
->reply
,decrRefCount
);
2552 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2553 c
->blockingkeys
= NULL
;
2554 c
->blockingkeysnum
= 0;
2555 c
->io_keys
= listCreate();
2556 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2557 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2558 c
->pubsub_patterns
= listCreate();
2559 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2560 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2561 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2562 readQueryFromClient
, c
) == AE_ERR
) {
2566 listAddNodeTail(server
.clients
,c
);
2567 initClientMultiState(c
);
2571 static void addReply(redisClient
*c
, robj
*obj
) {
2572 if (listLength(c
->reply
) == 0 &&
2573 (c
->replstate
== REDIS_REPL_NONE
||
2574 c
->replstate
== REDIS_REPL_ONLINE
) &&
2575 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2576 sendReplyToClient
, c
) == AE_ERR
) return;
2578 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2579 obj
= dupStringObject(obj
);
2580 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2582 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2585 static void addReplySds(redisClient
*c
, sds s
) {
2586 robj
*o
= createObject(REDIS_STRING
,s
);
2591 static void addReplyDouble(redisClient
*c
, double d
) {
2594 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2595 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2596 (unsigned long) strlen(buf
),buf
));
2599 static void addReplyLong(redisClient
*c
, long l
) {
2604 addReply(c
,shared
.czero
);
2606 } else if (l
== 1) {
2607 addReply(c
,shared
.cone
);
2610 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2611 addReplySds(c
,sdsnewlen(buf
,len
));
2614 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2619 addReply(c
,shared
.czero
);
2621 } else if (ll
== 1) {
2622 addReply(c
,shared
.cone
);
2625 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2626 addReplySds(c
,sdsnewlen(buf
,len
));
2629 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2634 addReply(c
,shared
.czero
);
2636 } else if (ul
== 1) {
2637 addReply(c
,shared
.cone
);
2640 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2641 addReplySds(c
,sdsnewlen(buf
,len
));
2644 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2647 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2648 len
= sdslen(obj
->ptr
);
2650 long n
= (long)obj
->ptr
;
2652 /* Compute how many bytes will take this integer as a radix 10 string */
2658 while((n
= n
/10) != 0) {
2662 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2665 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2666 addReplyBulkLen(c
,obj
);
2668 addReply(c
,shared
.crlf
);
2671 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2672 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2674 addReply(c
,shared
.nullbulk
);
2676 robj
*o
= createStringObject(s
,strlen(s
));
2682 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2687 REDIS_NOTUSED(mask
);
2688 REDIS_NOTUSED(privdata
);
2690 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2691 if (cfd
== AE_ERR
) {
2692 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2695 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2696 if ((c
= createClient(cfd
)) == NULL
) {
2697 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2698 close(cfd
); /* May be already closed, just ingore errors */
2701 /* If maxclient directive is set and this is one client more... close the
2702 * connection. Note that we create the client instead to check before
2703 * for this condition, since now the socket is already set in nonblocking
2704 * mode and we can send an error for free using the Kernel I/O */
2705 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2706 char *err
= "-ERR max number of clients reached\r\n";
2708 /* That's a best effort error message, don't check write errors */
2709 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2710 /* Nothing to do, Just to avoid the warning... */
2715 server
.stat_numconnections
++;
2718 /* ======================= Redis objects implementation ===================== */
2720 static robj
*createObject(int type
, void *ptr
) {
2723 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2724 if (listLength(server
.objfreelist
)) {
2725 listNode
*head
= listFirst(server
.objfreelist
);
2726 o
= listNodeValue(head
);
2727 listDelNode(server
.objfreelist
,head
);
2728 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2730 if (server
.vm_enabled
) {
2731 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2732 o
= zmalloc(sizeof(*o
));
2734 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2738 o
->encoding
= REDIS_ENCODING_RAW
;
2741 if (server
.vm_enabled
) {
2742 /* Note that this code may run in the context of an I/O thread
2743 * and accessing to server.unixtime in theory is an error
2744 * (no locks). But in practice this is safe, and even if we read
2745 * garbage Redis will not fail, as it's just a statistical info */
2746 o
->vm
.atime
= server
.unixtime
;
2747 o
->storage
= REDIS_VM_MEMORY
;
2752 static robj
*createStringObject(char *ptr
, size_t len
) {
2753 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2756 static robj
*dupStringObject(robj
*o
) {
2757 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2758 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2761 static robj
*createListObject(void) {
2762 list
*l
= listCreate();
2764 listSetFreeMethod(l
,decrRefCount
);
2765 return createObject(REDIS_LIST
,l
);
2768 static robj
*createSetObject(void) {
2769 dict
*d
= dictCreate(&setDictType
,NULL
);
2770 return createObject(REDIS_SET
,d
);
2773 static robj
*createHashObject(void) {
2774 /* All the Hashes start as zipmaps. Will be automatically converted
2775 * into hash tables if there are enough elements or big elements
2777 unsigned char *zm
= zipmapNew();
2778 robj
*o
= createObject(REDIS_HASH
,zm
);
2779 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2783 static robj
*createZsetObject(void) {
2784 zset
*zs
= zmalloc(sizeof(*zs
));
2786 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2787 zs
->zsl
= zslCreate();
2788 return createObject(REDIS_ZSET
,zs
);
2791 static void freeStringObject(robj
*o
) {
2792 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2797 static void freeListObject(robj
*o
) {
2798 listRelease((list
*) o
->ptr
);
2801 static void freeSetObject(robj
*o
) {
2802 dictRelease((dict
*) o
->ptr
);
2805 static void freeZsetObject(robj
*o
) {
2808 dictRelease(zs
->dict
);
2813 static void freeHashObject(robj
*o
) {
2814 switch (o
->encoding
) {
2815 case REDIS_ENCODING_HT
:
2816 dictRelease((dict
*) o
->ptr
);
2818 case REDIS_ENCODING_ZIPMAP
:
2827 static void incrRefCount(robj
*o
) {
2831 static void decrRefCount(void *obj
) {
2834 /* Object is a key of a swapped out value, or in the process of being
2836 if (server
.vm_enabled
&&
2837 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2839 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2840 redisAssert(o
->type
== REDIS_STRING
);
2841 freeStringObject(o
);
2842 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2843 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2844 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2845 !listAddNodeHead(server
.objfreelist
,o
))
2847 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2848 server
.vm_stats_swapped_objects
--;
2851 /* Object is in memory, or in the process of being swapped out. */
2852 if (--(o
->refcount
) == 0) {
2853 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2854 vmCancelThreadedIOJob(obj
);
2856 case REDIS_STRING
: freeStringObject(o
); break;
2857 case REDIS_LIST
: freeListObject(o
); break;
2858 case REDIS_SET
: freeSetObject(o
); break;
2859 case REDIS_ZSET
: freeZsetObject(o
); break;
2860 case REDIS_HASH
: freeHashObject(o
); break;
2861 default: redisAssert(0); break;
2863 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2864 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2865 !listAddNodeHead(server
.objfreelist
,o
))
2867 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2871 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2872 dictEntry
*de
= dictFind(db
->dict
,key
);
2874 robj
*key
= dictGetEntryKey(de
);
2875 robj
*val
= dictGetEntryVal(de
);
2877 if (server
.vm_enabled
) {
2878 if (key
->storage
== REDIS_VM_MEMORY
||
2879 key
->storage
== REDIS_VM_SWAPPING
)
2881 /* If we were swapping the object out, stop it, this key
2883 if (key
->storage
== REDIS_VM_SWAPPING
)
2884 vmCancelThreadedIOJob(key
);
2885 /* Update the access time of the key for the aging algorithm. */
2886 key
->vm
.atime
= server
.unixtime
;
2888 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2890 /* Our value was swapped on disk. Bring it at home. */
2891 redisAssert(val
== NULL
);
2892 val
= vmLoadObject(key
);
2893 dictGetEntryVal(de
) = val
;
2895 /* Clients blocked by the VM subsystem may be waiting for
2897 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2906 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2907 expireIfNeeded(db
,key
);
2908 return lookupKey(db
,key
);
2911 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2912 deleteIfVolatile(db
,key
);
2913 return lookupKey(db
,key
);
2916 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2917 robj
*o
= lookupKeyRead(c
->db
, key
);
2918 if (!o
) addReply(c
,reply
);
2922 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2923 robj
*o
= lookupKeyWrite(c
->db
, key
);
2924 if (!o
) addReply(c
,reply
);
2928 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2929 if (o
->type
!= type
) {
2930 addReply(c
,shared
.wrongtypeerr
);
2936 static int deleteKey(redisDb
*db
, robj
*key
) {
2939 /* We need to protect key from destruction: after the first dictDelete()
2940 * it may happen that 'key' is no longer valid if we don't increment
2941 * it's count. This may happen when we get the object reference directly
2942 * from the hash table with dictRandomKey() or dict iterators */
2944 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2945 retval
= dictDelete(db
->dict
,key
);
2948 return retval
== DICT_OK
;
2951 /* Check if the nul-terminated string 's' can be represented by a long
2952 * (that is, is a number that fits into long without any other space or
2953 * character before or after the digits).
2955 * If so, the function returns REDIS_OK and *longval is set to the value
2956 * of the number. Otherwise REDIS_ERR is returned */
2957 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2958 char buf
[32], *endptr
;
2962 value
= strtol(s
, &endptr
, 10);
2963 if (endptr
[0] != '\0') return REDIS_ERR
;
2964 slen
= snprintf(buf
,32,"%ld",value
);
2966 /* If the number converted back into a string is not identical
2967 * then it's not possible to encode the string as integer */
2968 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2969 if (longval
) *longval
= value
;
2973 /* Try to encode a string object in order to save space */
2974 static robj
*tryObjectEncoding(robj
*o
) {
2978 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2979 return o
; /* Already encoded */
2981 /* It's not safe to encode shared objects: shared objects can be shared
2982 * everywhere in the "object space" of Redis. Encoded objects can only
2983 * appear as "values" (and not, for instance, as keys) */
2984 if (o
->refcount
> 1) return o
;
2986 /* Currently we try to encode only strings */
2987 redisAssert(o
->type
== REDIS_STRING
);
2989 /* Check if we can represent this string as a long integer */
2990 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
2992 /* Ok, this object can be encoded */
2993 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2995 incrRefCount(shared
.integers
[value
]);
2996 return shared
.integers
[value
];
2998 o
->encoding
= REDIS_ENCODING_INT
;
3000 o
->ptr
= (void*) value
;
3005 /* Get a decoded version of an encoded object (returned as a new object).
3006 * If the object is already raw-encoded just increment the ref count. */
3007 static robj
*getDecodedObject(robj
*o
) {
3010 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3014 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3017 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3018 dec
= createStringObject(buf
,strlen(buf
));
3021 redisAssert(1 != 1);
3025 /* Compare two string objects via strcmp() or alike.
3026 * Note that the objects may be integer-encoded. In such a case we
3027 * use snprintf() to get a string representation of the numbers on the stack
3028 * and compare the strings, it's much faster than calling getDecodedObject().
3030 * Important note: if objects are not integer encoded, but binary-safe strings,
3031 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3033 static int compareStringObjects(robj
*a
, robj
*b
) {
3034 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3035 char bufa
[128], bufb
[128], *astr
, *bstr
;
3038 if (a
== b
) return 0;
3039 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3040 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3046 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3047 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3053 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3056 static size_t stringObjectLen(robj
*o
) {
3057 redisAssert(o
->type
== REDIS_STRING
);
3058 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3059 return sdslen(o
->ptr
);
3063 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3067 static int getDoubleFromObject(redisClient
*c
, robj
*o
, double *value
) {
3071 if (o
&& o
->type
!= REDIS_STRING
) {
3072 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3078 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3079 parsedValue
= strtod(o
->ptr
, &eptr
);
3080 else if (o
->encoding
== REDIS_ENCODING_INT
)
3081 parsedValue
= (long)o
->ptr
;
3083 redisAssert(1 != 1);
3085 if (eptr
!= NULL
&& *eptr
!= '\0') {
3086 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3090 *value
= parsedValue
;
3095 static int getLongLongFromObject(redisClient
*c
, robj
*o
, long long *value
) {
3096 long long parsedValue
;
3099 if (o
&& o
->type
!= REDIS_STRING
) {
3100 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3106 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3107 parsedValue
= strtoll(o
->ptr
, &eptr
, 10);
3108 else if (o
->encoding
== REDIS_ENCODING_INT
)
3109 parsedValue
= (long)o
->ptr
;
3111 redisAssert(1 != 1);
3113 if (eptr
!= NULL
&& *eptr
!= '\0') {
3114 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3118 *value
= parsedValue
;
3123 static int getLongFromObject(redisClient
*c
, robj
*o
, long *value
) {
3124 long long actualValue
;
3126 if (getLongLongFromObject(c
, o
, &actualValue
) != REDIS_OK
) return REDIS_ERR
;
3128 if (actualValue
< LONG_MIN
|| actualValue
> LONG_MAX
) {
3129 addReplySds(c
,sdsnew("-ERR value is out of range\r\n"));
3133 *value
= actualValue
;
3138 /*============================ RDB saving/loading =========================== */
3140 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3141 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3145 static int rdbSaveTime(FILE *fp
, time_t t
) {
3146 int32_t t32
= (int32_t) t
;
3147 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3151 /* check rdbLoadLen() comments for more info */
3152 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3153 unsigned char buf
[2];
3156 /* Save a 6 bit len */
3157 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3158 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3159 } else if (len
< (1<<14)) {
3160 /* Save a 14 bit len */
3161 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3163 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3165 /* Save a 32 bit len */
3166 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3167 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3169 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3174 /* String objects in the form "2391" "-100" without any space and with a
3175 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3176 * encoded as integers to save space */
3177 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3179 char *endptr
, buf
[32];
3181 /* Check if it's possible to encode this value as a number */
3182 value
= strtoll(s
, &endptr
, 10);
3183 if (endptr
[0] != '\0') return 0;
3184 snprintf(buf
,32,"%lld",value
);
3186 /* If the number converted back into a string is not identical
3187 * then it's not possible to encode the string as integer */
3188 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3190 /* Finally check if it fits in our ranges */
3191 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3192 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3193 enc
[1] = value
&0xFF;
3195 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3196 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3197 enc
[1] = value
&0xFF;
3198 enc
[2] = (value
>>8)&0xFF;
3200 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3201 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3202 enc
[1] = value
&0xFF;
3203 enc
[2] = (value
>>8)&0xFF;
3204 enc
[3] = (value
>>16)&0xFF;
3205 enc
[4] = (value
>>24)&0xFF;
3212 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3213 size_t comprlen
, outlen
;
3217 /* We require at least four bytes compression for this to be worth it */
3218 if (len
<= 4) return 0;
3220 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3221 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3222 if (comprlen
== 0) {
3226 /* Data compressed! Let's save it on disk */
3227 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3228 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3229 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3230 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3231 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3240 /* Save a string objet as [len][data] on disk. If the object is a string
3241 * representation of an integer value we try to safe it in a special form */
3242 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3245 /* Try integer encoding */
3247 unsigned char buf
[5];
3248 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3249 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3254 /* Try LZF compression - under 20 bytes it's unable to compress even
3255 * aaaaaaaaaaaaaaaaaa so skip it */
3256 if (server
.rdbcompression
&& len
> 20) {
3259 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3260 if (retval
== -1) return -1;
3261 if (retval
> 0) return 0;
3262 /* retval == 0 means data can't be compressed, save the old way */
3265 /* Store verbatim */
3266 if (rdbSaveLen(fp
,len
) == -1) return -1;
3267 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3271 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3272 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3275 /* Avoid incr/decr ref count business when possible.
3276 * This plays well with copy-on-write given that we are probably
3277 * in a child process (BGSAVE). Also this makes sure key objects
3278 * of swapped objects are not incRefCount-ed (an assert does not allow
3279 * this in order to avoid bugs) */
3280 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3281 obj
= getDecodedObject(obj
);
3282 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3285 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3290 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3291 * 8 bit integer specifing the length of the representation.
3292 * This 8 bit integer has special values in order to specify the following
3298 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3299 unsigned char buf
[128];
3305 } else if (!isfinite(val
)) {
3307 buf
[0] = (val
< 0) ? 255 : 254;
3309 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3310 buf
[0] = strlen((char*)buf
+1);
3313 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3317 /* Save a Redis object. */
3318 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3319 if (o
->type
== REDIS_STRING
) {
3320 /* Save a string value */
3321 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3322 } else if (o
->type
== REDIS_LIST
) {
3323 /* Save a list value */
3324 list
*list
= o
->ptr
;
3328 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3329 listRewind(list
,&li
);
3330 while((ln
= listNext(&li
))) {
3331 robj
*eleobj
= listNodeValue(ln
);
3333 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3335 } else if (o
->type
== REDIS_SET
) {
3336 /* Save a set value */
3338 dictIterator
*di
= dictGetIterator(set
);
3341 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3342 while((de
= dictNext(di
)) != NULL
) {
3343 robj
*eleobj
= dictGetEntryKey(de
);
3345 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3347 dictReleaseIterator(di
);
3348 } else if (o
->type
== REDIS_ZSET
) {
3349 /* Save a set value */
3351 dictIterator
*di
= dictGetIterator(zs
->dict
);
3354 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3355 while((de
= dictNext(di
)) != NULL
) {
3356 robj
*eleobj
= dictGetEntryKey(de
);
3357 double *score
= dictGetEntryVal(de
);
3359 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3360 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3362 dictReleaseIterator(di
);
3363 } else if (o
->type
== REDIS_HASH
) {
3364 /* Save a hash value */
3365 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3366 unsigned char *p
= zipmapRewind(o
->ptr
);
3367 unsigned int count
= zipmapLen(o
->ptr
);
3368 unsigned char *key
, *val
;
3369 unsigned int klen
, vlen
;
3371 if (rdbSaveLen(fp
,count
) == -1) return -1;
3372 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3373 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3374 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3377 dictIterator
*di
= dictGetIterator(o
->ptr
);
3380 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3381 while((de
= dictNext(di
)) != NULL
) {
3382 robj
*key
= dictGetEntryKey(de
);
3383 robj
*val
= dictGetEntryVal(de
);
3385 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3386 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3388 dictReleaseIterator(di
);
3396 /* Return the length the object will have on disk if saved with
3397 * the rdbSaveObject() function. Currently we use a trick to get
3398 * this length with very little changes to the code. In the future
3399 * we could switch to a faster solution. */
3400 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3401 if (fp
== NULL
) fp
= server
.devnull
;
3403 assert(rdbSaveObject(fp
,o
) != 1);
3407 /* Return the number of pages required to save this object in the swap file */
3408 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3409 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3411 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3414 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3415 static int rdbSave(char *filename
) {
3416 dictIterator
*di
= NULL
;
3421 time_t now
= time(NULL
);
3423 /* Wait for I/O therads to terminate, just in case this is a
3424 * foreground-saving, to avoid seeking the swap file descriptor at the
3426 if (server
.vm_enabled
)
3427 waitEmptyIOJobsQueue();
3429 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3430 fp
= fopen(tmpfile
,"w");
3432 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3435 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3436 for (j
= 0; j
< server
.dbnum
; j
++) {
3437 redisDb
*db
= server
.db
+j
;
3439 if (dictSize(d
) == 0) continue;
3440 di
= dictGetIterator(d
);
3446 /* Write the SELECT DB opcode */
3447 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3448 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3450 /* Iterate this DB writing every entry */
3451 while((de
= dictNext(di
)) != NULL
) {
3452 robj
*key
= dictGetEntryKey(de
);
3453 robj
*o
= dictGetEntryVal(de
);
3454 time_t expiretime
= getExpire(db
,key
);
3456 /* Save the expire time */
3457 if (expiretime
!= -1) {
3458 /* If this key is already expired skip it */
3459 if (expiretime
< now
) continue;
3460 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3461 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3463 /* Save the key and associated value. This requires special
3464 * handling if the value is swapped out. */
3465 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3466 key
->storage
== REDIS_VM_SWAPPING
) {
3467 /* Save type, key, value */
3468 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3469 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3470 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3472 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3474 /* Get a preview of the object in memory */
3475 po
= vmPreviewObject(key
);
3476 /* Save type, key, value */
3477 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3478 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3479 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3480 /* Remove the loaded object from memory */
3484 dictReleaseIterator(di
);
3487 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3489 /* Make sure data will not remain on the OS's output buffers */
3494 /* Use RENAME to make sure the DB file is changed atomically only
3495 * if the generate DB file is ok. */
3496 if (rename(tmpfile
,filename
) == -1) {
3497 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3501 redisLog(REDIS_NOTICE
,"DB saved on disk");
3503 server
.lastsave
= time(NULL
);
3509 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3510 if (di
) dictReleaseIterator(di
);
3514 static int rdbSaveBackground(char *filename
) {
3517 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3518 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3519 if ((childpid
= fork()) == 0) {
3521 if (server
.vm_enabled
) vmReopenSwapFile();
3523 if (rdbSave(filename
) == REDIS_OK
) {
3530 if (childpid
== -1) {
3531 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3535 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3536 server
.bgsavechildpid
= childpid
;
3537 updateDictResizePolicy();
3540 return REDIS_OK
; /* unreached */
3543 static void rdbRemoveTempFile(pid_t childpid
) {
3546 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3550 static int rdbLoadType(FILE *fp
) {
3552 if (fread(&type
,1,1,fp
) == 0) return -1;
3556 static time_t rdbLoadTime(FILE *fp
) {
3558 if (fread(&t32
,4,1,fp
) == 0) return -1;
3559 return (time_t) t32
;
3562 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3563 * of this file for a description of how this are stored on disk.
3565 * isencoded is set to 1 if the readed length is not actually a length but
3566 * an "encoding type", check the above comments for more info */
3567 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3568 unsigned char buf
[2];
3572 if (isencoded
) *isencoded
= 0;
3573 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3574 type
= (buf
[0]&0xC0)>>6;
3575 if (type
== REDIS_RDB_6BITLEN
) {
3576 /* Read a 6 bit len */
3578 } else if (type
== REDIS_RDB_ENCVAL
) {
3579 /* Read a 6 bit len encoding type */
3580 if (isencoded
) *isencoded
= 1;
3582 } else if (type
== REDIS_RDB_14BITLEN
) {
3583 /* Read a 14 bit len */
3584 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3585 return ((buf
[0]&0x3F)<<8)|buf
[1];
3587 /* Read a 32 bit len */
3588 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3593 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3594 unsigned char enc
[4];
3597 if (enctype
== REDIS_RDB_ENC_INT8
) {
3598 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3599 val
= (signed char)enc
[0];
3600 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3602 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3603 v
= enc
[0]|(enc
[1]<<8);
3605 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3607 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3608 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3611 val
= 0; /* anti-warning */
3614 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3617 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3618 unsigned int len
, clen
;
3619 unsigned char *c
= NULL
;
3622 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3623 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3624 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3625 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3626 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3627 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3629 return createObject(REDIS_STRING
,val
);
3636 static robj
*rdbLoadStringObject(FILE*fp
) {
3641 len
= rdbLoadLen(fp
,&isencoded
);
3644 case REDIS_RDB_ENC_INT8
:
3645 case REDIS_RDB_ENC_INT16
:
3646 case REDIS_RDB_ENC_INT32
:
3647 return rdbLoadIntegerObject(fp
,len
);
3648 case REDIS_RDB_ENC_LZF
:
3649 return rdbLoadLzfStringObject(fp
);
3655 if (len
== REDIS_RDB_LENERR
) return NULL
;
3656 val
= sdsnewlen(NULL
,len
);
3657 if (len
&& fread(val
,len
,1,fp
) == 0) {
3661 return createObject(REDIS_STRING
,val
);
3664 /* For information about double serialization check rdbSaveDoubleValue() */
3665 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3669 if (fread(&len
,1,1,fp
) == 0) return -1;
3671 case 255: *val
= R_NegInf
; return 0;
3672 case 254: *val
= R_PosInf
; return 0;
3673 case 253: *val
= R_Nan
; return 0;
3675 if (fread(buf
,len
,1,fp
) == 0) return -1;
3677 sscanf(buf
, "%lg", val
);
3682 /* Load a Redis object of the specified type from the specified file.
3683 * On success a newly allocated object is returned, otherwise NULL. */
3684 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3687 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3688 if (type
== REDIS_STRING
) {
3689 /* Read string value */
3690 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3691 o
= tryObjectEncoding(o
);
3692 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3693 /* Read list/set value */
3696 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3697 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3698 /* It's faster to expand the dict to the right size asap in order
3699 * to avoid rehashing */
3700 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3701 dictExpand(o
->ptr
,listlen
);
3702 /* Load every single element of the list/set */
3706 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3707 ele
= tryObjectEncoding(ele
);
3708 if (type
== REDIS_LIST
) {
3709 listAddNodeTail((list
*)o
->ptr
,ele
);
3711 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3714 } else if (type
== REDIS_ZSET
) {
3715 /* Read list/set value */
3719 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3720 o
= createZsetObject();
3722 /* Load every single element of the list/set */
3725 double *score
= zmalloc(sizeof(double));
3727 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3728 ele
= tryObjectEncoding(ele
);
3729 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3730 dictAdd(zs
->dict
,ele
,score
);
3731 zslInsert(zs
->zsl
,*score
,ele
);
3732 incrRefCount(ele
); /* added to skiplist */
3734 } else if (type
== REDIS_HASH
) {
3737 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3738 o
= createHashObject();
3739 /* Too many entries? Use an hash table. */
3740 if (hashlen
> server
.hash_max_zipmap_entries
)
3741 convertToRealHash(o
);
3742 /* Load every key/value, then set it into the zipmap or hash
3743 * table, as needed. */
3747 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3748 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3749 /* If we are using a zipmap and there are too big values
3750 * the object is converted to real hash table encoding. */
3751 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3752 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3753 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3755 convertToRealHash(o
);
3758 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3759 unsigned char *zm
= o
->ptr
;
3761 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3762 val
->ptr
,sdslen(val
->ptr
),NULL
);
3767 key
= tryObjectEncoding(key
);
3768 val
= tryObjectEncoding(val
);
3769 dictAdd((dict
*)o
->ptr
,key
,val
);
3778 static int rdbLoad(char *filename
) {
3780 robj
*keyobj
= NULL
;
3782 int type
, retval
, rdbver
;
3783 dict
*d
= server
.db
[0].dict
;
3784 redisDb
*db
= server
.db
+0;
3786 time_t expiretime
= -1, now
= time(NULL
);
3787 long long loadedkeys
= 0;
3789 fp
= fopen(filename
,"r");
3790 if (!fp
) return REDIS_ERR
;
3791 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3793 if (memcmp(buf
,"REDIS",5) != 0) {
3795 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3798 rdbver
= atoi(buf
+5);
3801 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3808 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3809 if (type
== REDIS_EXPIRETIME
) {
3810 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3811 /* We read the time so we need to read the object type again */
3812 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3814 if (type
== REDIS_EOF
) break;
3815 /* Handle SELECT DB opcode as a special case */
3816 if (type
== REDIS_SELECTDB
) {
3817 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3819 if (dbid
>= (unsigned)server
.dbnum
) {
3820 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3823 db
= server
.db
+dbid
;
3828 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3830 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3831 /* Add the new object in the hash table */
3832 retval
= dictAdd(d
,keyobj
,o
);
3833 if (retval
== DICT_ERR
) {
3834 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3837 /* Set the expire time if needed */
3838 if (expiretime
!= -1) {
3839 setExpire(db
,keyobj
,expiretime
);
3840 /* Delete this key if already expired */
3841 if (expiretime
< now
) deleteKey(db
,keyobj
);
3845 /* Handle swapping while loading big datasets when VM is on */
3847 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3848 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3849 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3856 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3857 if (keyobj
) decrRefCount(keyobj
);
3858 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3860 return REDIS_ERR
; /* Just to avoid warning */
3863 /*================================== Commands =============================== */
3865 static void authCommand(redisClient
*c
) {
3866 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3867 c
->authenticated
= 1;
3868 addReply(c
,shared
.ok
);
3870 c
->authenticated
= 0;
3871 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3875 static void pingCommand(redisClient
*c
) {
3876 addReply(c
,shared
.pong
);
3879 static void echoCommand(redisClient
*c
) {
3880 addReplyBulk(c
,c
->argv
[1]);
3883 /*=================================== Strings =============================== */
3885 static void setGenericCommand(redisClient
*c
, int nx
) {
3888 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3889 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3890 if (retval
== DICT_ERR
) {
3892 /* If the key is about a swapped value, we want a new key object
3893 * to overwrite the old. So we delete the old key in the database.
3894 * This will also make sure that swap pages about the old object
3895 * will be marked as free. */
3896 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3897 incrRefCount(c
->argv
[1]);
3898 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3899 incrRefCount(c
->argv
[2]);
3901 addReply(c
,shared
.czero
);
3905 incrRefCount(c
->argv
[1]);
3906 incrRefCount(c
->argv
[2]);
3909 removeExpire(c
->db
,c
->argv
[1]);
3910 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3913 static void setCommand(redisClient
*c
) {
3914 setGenericCommand(c
,0);
3917 static void setnxCommand(redisClient
*c
) {
3918 setGenericCommand(c
,1);
3921 static int getGenericCommand(redisClient
*c
) {
3924 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3927 if (o
->type
!= REDIS_STRING
) {
3928 addReply(c
,shared
.wrongtypeerr
);
3936 static void getCommand(redisClient
*c
) {
3937 getGenericCommand(c
);
3940 static void getsetCommand(redisClient
*c
) {
3941 if (getGenericCommand(c
) == REDIS_ERR
) return;
3942 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3943 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3945 incrRefCount(c
->argv
[1]);
3947 incrRefCount(c
->argv
[2]);
3949 removeExpire(c
->db
,c
->argv
[1]);
3952 static void mgetCommand(redisClient
*c
) {
3955 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3956 for (j
= 1; j
< c
->argc
; j
++) {
3957 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3959 addReply(c
,shared
.nullbulk
);
3961 if (o
->type
!= REDIS_STRING
) {
3962 addReply(c
,shared
.nullbulk
);
3970 static void msetGenericCommand(redisClient
*c
, int nx
) {
3971 int j
, busykeys
= 0;
3973 if ((c
->argc
% 2) == 0) {
3974 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3977 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3978 * set nothing at all if at least one already key exists. */
3980 for (j
= 1; j
< c
->argc
; j
+= 2) {
3981 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3987 addReply(c
, shared
.czero
);
3991 for (j
= 1; j
< c
->argc
; j
+= 2) {
3994 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
3995 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3996 if (retval
== DICT_ERR
) {
3997 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3998 incrRefCount(c
->argv
[j
+1]);
4000 incrRefCount(c
->argv
[j
]);
4001 incrRefCount(c
->argv
[j
+1]);
4003 removeExpire(c
->db
,c
->argv
[j
]);
4005 server
.dirty
+= (c
->argc
-1)/2;
4006 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4009 static void msetCommand(redisClient
*c
) {
4010 msetGenericCommand(c
,0);
4013 static void msetnxCommand(redisClient
*c
) {
4014 msetGenericCommand(c
,1);
4017 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4022 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4024 if (getLongLongFromObject(c
, o
, &value
) != REDIS_OK
) return;
4027 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4028 o
= tryObjectEncoding(o
);
4029 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4030 if (retval
== DICT_ERR
) {
4031 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4032 removeExpire(c
->db
,c
->argv
[1]);
4034 incrRefCount(c
->argv
[1]);
4037 addReply(c
,shared
.colon
);
4039 addReply(c
,shared
.crlf
);
4042 static void incrCommand(redisClient
*c
) {
4043 incrDecrCommand(c
,1);
4046 static void decrCommand(redisClient
*c
) {
4047 incrDecrCommand(c
,-1);
4050 static void incrbyCommand(redisClient
*c
) {
4053 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4055 incrDecrCommand(c
,incr
);
4058 static void decrbyCommand(redisClient
*c
) {
4061 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4063 incrDecrCommand(c
,-incr
);
4066 static void appendCommand(redisClient
*c
) {
4071 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4073 /* Create the key */
4074 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4075 incrRefCount(c
->argv
[1]);
4076 incrRefCount(c
->argv
[2]);
4077 totlen
= stringObjectLen(c
->argv
[2]);
4081 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4084 o
= dictGetEntryVal(de
);
4085 if (o
->type
!= REDIS_STRING
) {
4086 addReply(c
,shared
.wrongtypeerr
);
4089 /* If the object is specially encoded or shared we have to make
4091 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4092 robj
*decoded
= getDecodedObject(o
);
4094 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4095 decrRefCount(decoded
);
4096 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4099 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4100 o
->ptr
= sdscatlen(o
->ptr
,
4101 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4103 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4104 (unsigned long) c
->argv
[2]->ptr
);
4106 totlen
= sdslen(o
->ptr
);
4109 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4112 static void substrCommand(redisClient
*c
) {
4114 long start
= atoi(c
->argv
[2]->ptr
);
4115 long end
= atoi(c
->argv
[3]->ptr
);
4116 size_t rangelen
, strlen
;
4119 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4120 checkType(c
,o
,REDIS_STRING
)) return;
4122 o
= getDecodedObject(o
);
4123 strlen
= sdslen(o
->ptr
);
4125 /* convert negative indexes */
4126 if (start
< 0) start
= strlen
+start
;
4127 if (end
< 0) end
= strlen
+end
;
4128 if (start
< 0) start
= 0;
4129 if (end
< 0) end
= 0;
4131 /* indexes sanity checks */
4132 if (start
> end
|| (size_t)start
>= strlen
) {
4133 /* Out of range start or start > end result in null reply */
4134 addReply(c
,shared
.nullbulk
);
4138 if ((size_t)end
>= strlen
) end
= strlen
-1;
4139 rangelen
= (end
-start
)+1;
4141 /* Return the result */
4142 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4143 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4144 addReplySds(c
,range
);
4145 addReply(c
,shared
.crlf
);
4149 /* ========================= Type agnostic commands ========================= */
4151 static void delCommand(redisClient
*c
) {
4154 for (j
= 1; j
< c
->argc
; j
++) {
4155 if (deleteKey(c
->db
,c
->argv
[j
])) {
4160 addReplyLong(c
,deleted
);
4163 static void existsCommand(redisClient
*c
) {
4164 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4167 static void selectCommand(redisClient
*c
) {
4168 int id
= atoi(c
->argv
[1]->ptr
);
4170 if (selectDb(c
,id
) == REDIS_ERR
) {
4171 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4173 addReply(c
,shared
.ok
);
4177 static void randomkeyCommand(redisClient
*c
) {
4181 de
= dictGetRandomKey(c
->db
->dict
);
4182 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4185 addReply(c
,shared
.plus
);
4186 addReply(c
,shared
.crlf
);
4188 addReply(c
,shared
.plus
);
4189 addReply(c
,dictGetEntryKey(de
));
4190 addReply(c
,shared
.crlf
);
4194 static void keysCommand(redisClient
*c
) {
4197 sds pattern
= c
->argv
[1]->ptr
;
4198 int plen
= sdslen(pattern
);
4199 unsigned long numkeys
= 0;
4200 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4202 di
= dictGetIterator(c
->db
->dict
);
4204 decrRefCount(lenobj
);
4205 while((de
= dictNext(di
)) != NULL
) {
4206 robj
*keyobj
= dictGetEntryKey(de
);
4208 sds key
= keyobj
->ptr
;
4209 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4210 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4211 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4212 addReplyBulk(c
,keyobj
);
4217 dictReleaseIterator(di
);
4218 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4221 static void dbsizeCommand(redisClient
*c
) {
4223 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4226 static void lastsaveCommand(redisClient
*c
) {
4228 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4231 static void typeCommand(redisClient
*c
) {
4235 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4240 case REDIS_STRING
: type
= "+string"; break;
4241 case REDIS_LIST
: type
= "+list"; break;
4242 case REDIS_SET
: type
= "+set"; break;
4243 case REDIS_ZSET
: type
= "+zset"; break;
4244 case REDIS_HASH
: type
= "+hash"; break;
4245 default: type
= "+unknown"; break;
4248 addReplySds(c
,sdsnew(type
));
4249 addReply(c
,shared
.crlf
);
4252 static void saveCommand(redisClient
*c
) {
4253 if (server
.bgsavechildpid
!= -1) {
4254 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4257 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4258 addReply(c
,shared
.ok
);
4260 addReply(c
,shared
.err
);
4264 static void bgsaveCommand(redisClient
*c
) {
4265 if (server
.bgsavechildpid
!= -1) {
4266 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4269 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4270 char *status
= "+Background saving started\r\n";
4271 addReplySds(c
,sdsnew(status
));
4273 addReply(c
,shared
.err
);
4277 static void shutdownCommand(redisClient
*c
) {
4278 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4279 /* Kill the saving child if there is a background saving in progress.
4280 We want to avoid race conditions, for instance our saving child may
4281 overwrite the synchronous saving did by SHUTDOWN. */
4282 if (server
.bgsavechildpid
!= -1) {
4283 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4284 kill(server
.bgsavechildpid
,SIGKILL
);
4285 rdbRemoveTempFile(server
.bgsavechildpid
);
4287 if (server
.appendonly
) {
4288 /* Append only file: fsync() the AOF and exit */
4289 fsync(server
.appendfd
);
4290 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4293 /* Snapshotting. Perform a SYNC SAVE and exit */
4294 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4295 if (server
.daemonize
)
4296 unlink(server
.pidfile
);
4297 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4298 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4299 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4302 /* Ooops.. error saving! The best we can do is to continue
4303 * operating. Note that if there was a background saving process,
4304 * in the next cron() Redis will be notified that the background
4305 * saving aborted, handling special stuff like slaves pending for
4306 * synchronization... */
4307 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4309 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4314 static void renameGenericCommand(redisClient
*c
, int nx
) {
4317 /* To use the same key as src and dst is probably an error */
4318 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4319 addReply(c
,shared
.sameobjecterr
);
4323 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4327 deleteIfVolatile(c
->db
,c
->argv
[2]);
4328 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4331 addReply(c
,shared
.czero
);
4334 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4336 incrRefCount(c
->argv
[2]);
4338 deleteKey(c
->db
,c
->argv
[1]);
4340 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4343 static void renameCommand(redisClient
*c
) {
4344 renameGenericCommand(c
,0);
4347 static void renamenxCommand(redisClient
*c
) {
4348 renameGenericCommand(c
,1);
4351 static void moveCommand(redisClient
*c
) {
4356 /* Obtain source and target DB pointers */
4359 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4360 addReply(c
,shared
.outofrangeerr
);
4364 selectDb(c
,srcid
); /* Back to the source DB */
4366 /* If the user is moving using as target the same
4367 * DB as the source DB it is probably an error. */
4369 addReply(c
,shared
.sameobjecterr
);
4373 /* Check if the element exists and get a reference */
4374 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4376 addReply(c
,shared
.czero
);
4380 /* Try to add the element to the target DB */
4381 deleteIfVolatile(dst
,c
->argv
[1]);
4382 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4383 addReply(c
,shared
.czero
);
4386 incrRefCount(c
->argv
[1]);
4389 /* OK! key moved, free the entry in the source DB */
4390 deleteKey(src
,c
->argv
[1]);
4392 addReply(c
,shared
.cone
);
4395 /* =================================== Lists ================================ */
4396 static void pushGenericCommand(redisClient
*c
, int where
) {
4400 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4402 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4403 addReply(c
,shared
.cone
);
4406 lobj
= createListObject();
4408 if (where
== REDIS_HEAD
) {
4409 listAddNodeHead(list
,c
->argv
[2]);
4411 listAddNodeTail(list
,c
->argv
[2]);
4413 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4414 incrRefCount(c
->argv
[1]);
4415 incrRefCount(c
->argv
[2]);
4417 if (lobj
->type
!= REDIS_LIST
) {
4418 addReply(c
,shared
.wrongtypeerr
);
4421 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4422 addReply(c
,shared
.cone
);
4426 if (where
== REDIS_HEAD
) {
4427 listAddNodeHead(list
,c
->argv
[2]);
4429 listAddNodeTail(list
,c
->argv
[2]);
4431 incrRefCount(c
->argv
[2]);
4434 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4437 static void lpushCommand(redisClient
*c
) {
4438 pushGenericCommand(c
,REDIS_HEAD
);
4441 static void rpushCommand(redisClient
*c
) {
4442 pushGenericCommand(c
,REDIS_TAIL
);
4445 static void llenCommand(redisClient
*c
) {
4449 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4450 checkType(c
,o
,REDIS_LIST
)) return;
4453 addReplyUlong(c
,listLength(l
));
4456 static void lindexCommand(redisClient
*c
) {
4458 int index
= atoi(c
->argv
[2]->ptr
);
4462 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4463 checkType(c
,o
,REDIS_LIST
)) return;
4466 ln
= listIndex(list
, index
);
4468 addReply(c
,shared
.nullbulk
);
4470 robj
*ele
= listNodeValue(ln
);
4471 addReplyBulk(c
,ele
);
4475 static void lsetCommand(redisClient
*c
) {
4477 int index
= atoi(c
->argv
[2]->ptr
);
4481 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4482 checkType(c
,o
,REDIS_LIST
)) return;
4485 ln
= listIndex(list
, index
);
4487 addReply(c
,shared
.outofrangeerr
);
4489 robj
*ele
= listNodeValue(ln
);
4492 listNodeValue(ln
) = c
->argv
[3];
4493 incrRefCount(c
->argv
[3]);
4494 addReply(c
,shared
.ok
);
4499 static void popGenericCommand(redisClient
*c
, int where
) {
4504 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4505 checkType(c
,o
,REDIS_LIST
)) return;
4508 if (where
== REDIS_HEAD
)
4509 ln
= listFirst(list
);
4511 ln
= listLast(list
);
4514 addReply(c
,shared
.nullbulk
);
4516 robj
*ele
= listNodeValue(ln
);
4517 addReplyBulk(c
,ele
);
4518 listDelNode(list
,ln
);
4519 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4524 static void lpopCommand(redisClient
*c
) {
4525 popGenericCommand(c
,REDIS_HEAD
);
4528 static void rpopCommand(redisClient
*c
) {
4529 popGenericCommand(c
,REDIS_TAIL
);
4532 static void lrangeCommand(redisClient
*c
) {
4534 int start
= atoi(c
->argv
[2]->ptr
);
4535 int end
= atoi(c
->argv
[3]->ptr
);
4542 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4543 || checkType(c
,o
,REDIS_LIST
)) return;
4545 llen
= listLength(list
);
4547 /* convert negative indexes */
4548 if (start
< 0) start
= llen
+start
;
4549 if (end
< 0) end
= llen
+end
;
4550 if (start
< 0) start
= 0;
4551 if (end
< 0) end
= 0;
4553 /* indexes sanity checks */
4554 if (start
> end
|| start
>= llen
) {
4555 /* Out of range start or start > end result in empty list */
4556 addReply(c
,shared
.emptymultibulk
);
4559 if (end
>= llen
) end
= llen
-1;
4560 rangelen
= (end
-start
)+1;
4562 /* Return the result in form of a multi-bulk reply */
4563 ln
= listIndex(list
, start
);
4564 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4565 for (j
= 0; j
< rangelen
; j
++) {
4566 ele
= listNodeValue(ln
);
4567 addReplyBulk(c
,ele
);
4572 static void ltrimCommand(redisClient
*c
) {
4574 int start
= atoi(c
->argv
[2]->ptr
);
4575 int end
= atoi(c
->argv
[3]->ptr
);
4577 int j
, ltrim
, rtrim
;
4581 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4582 checkType(c
,o
,REDIS_LIST
)) return;
4584 llen
= listLength(list
);
4586 /* convert negative indexes */
4587 if (start
< 0) start
= llen
+start
;
4588 if (end
< 0) end
= llen
+end
;
4589 if (start
< 0) start
= 0;
4590 if (end
< 0) end
= 0;
4592 /* indexes sanity checks */
4593 if (start
> end
|| start
>= llen
) {
4594 /* Out of range start or start > end result in empty list */
4598 if (end
>= llen
) end
= llen
-1;
4603 /* Remove list elements to perform the trim */
4604 for (j
= 0; j
< ltrim
; j
++) {
4605 ln
= listFirst(list
);
4606 listDelNode(list
,ln
);
4608 for (j
= 0; j
< rtrim
; j
++) {
4609 ln
= listLast(list
);
4610 listDelNode(list
,ln
);
4612 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4614 addReply(c
,shared
.ok
);
4617 static void lremCommand(redisClient
*c
) {
4620 listNode
*ln
, *next
;
4621 int toremove
= atoi(c
->argv
[2]->ptr
);
4625 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4626 checkType(c
,o
,REDIS_LIST
)) return;
4630 toremove
= -toremove
;
4633 ln
= fromtail
? list
->tail
: list
->head
;
4635 robj
*ele
= listNodeValue(ln
);
4637 next
= fromtail
? ln
->prev
: ln
->next
;
4638 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4639 listDelNode(list
,ln
);
4642 if (toremove
&& removed
== toremove
) break;
4646 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4647 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4650 /* This is the semantic of this command:
4651 * RPOPLPUSH srclist dstlist:
4652 * IF LLEN(srclist) > 0
4653 * element = RPOP srclist
4654 * LPUSH dstlist element
4661 * The idea is to be able to get an element from a list in a reliable way
4662 * since the element is not just returned but pushed against another list
4663 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4665 static void rpoplpushcommand(redisClient
*c
) {
4670 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4671 checkType(c
,sobj
,REDIS_LIST
)) return;
4672 srclist
= sobj
->ptr
;
4673 ln
= listLast(srclist
);
4676 addReply(c
,shared
.nullbulk
);
4678 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4679 robj
*ele
= listNodeValue(ln
);
4682 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4683 addReply(c
,shared
.wrongtypeerr
);
4687 /* Add the element to the target list (unless it's directly
4688 * passed to some BLPOP-ing client */
4689 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4691 /* Create the list if the key does not exist */
4692 dobj
= createListObject();
4693 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4694 incrRefCount(c
->argv
[2]);
4696 dstlist
= dobj
->ptr
;
4697 listAddNodeHead(dstlist
,ele
);
4701 /* Send the element to the client as reply as well */
4702 addReplyBulk(c
,ele
);
4704 /* Finally remove the element from the source list */
4705 listDelNode(srclist
,ln
);
4706 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4711 /* ==================================== Sets ================================ */
4713 static void saddCommand(redisClient
*c
) {
4716 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4718 set
= createSetObject();
4719 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4720 incrRefCount(c
->argv
[1]);
4722 if (set
->type
!= REDIS_SET
) {
4723 addReply(c
,shared
.wrongtypeerr
);
4727 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4728 incrRefCount(c
->argv
[2]);
4730 addReply(c
,shared
.cone
);
4732 addReply(c
,shared
.czero
);
4736 static void sremCommand(redisClient
*c
) {
4739 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4740 checkType(c
,set
,REDIS_SET
)) return;
4742 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4744 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4745 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4746 addReply(c
,shared
.cone
);
4748 addReply(c
,shared
.czero
);
4752 static void smoveCommand(redisClient
*c
) {
4753 robj
*srcset
, *dstset
;
4755 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4756 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4758 /* If the source key does not exist return 0, if it's of the wrong type
4760 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4761 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4764 /* Error if the destination key is not a set as well */
4765 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4766 addReply(c
,shared
.wrongtypeerr
);
4769 /* Remove the element from the source set */
4770 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4771 /* Key not found in the src set! return zero */
4772 addReply(c
,shared
.czero
);
4775 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4776 deleteKey(c
->db
,c
->argv
[1]);
4778 /* Add the element to the destination set */
4780 dstset
= createSetObject();
4781 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4782 incrRefCount(c
->argv
[2]);
4784 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4785 incrRefCount(c
->argv
[3]);
4786 addReply(c
,shared
.cone
);
4789 static void sismemberCommand(redisClient
*c
) {
4792 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4793 checkType(c
,set
,REDIS_SET
)) return;
4795 if (dictFind(set
->ptr
,c
->argv
[2]))
4796 addReply(c
,shared
.cone
);
4798 addReply(c
,shared
.czero
);
4801 static void scardCommand(redisClient
*c
) {
4805 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4806 checkType(c
,o
,REDIS_SET
)) return;
4809 addReplyUlong(c
,dictSize(s
));
4812 static void spopCommand(redisClient
*c
) {
4816 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4817 checkType(c
,set
,REDIS_SET
)) return;
4819 de
= dictGetRandomKey(set
->ptr
);
4821 addReply(c
,shared
.nullbulk
);
4823 robj
*ele
= dictGetEntryKey(de
);
4825 addReplyBulk(c
,ele
);
4826 dictDelete(set
->ptr
,ele
);
4827 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4828 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4833 static void srandmemberCommand(redisClient
*c
) {
4837 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4838 checkType(c
,set
,REDIS_SET
)) return;
4840 de
= dictGetRandomKey(set
->ptr
);
4842 addReply(c
,shared
.nullbulk
);
4844 robj
*ele
= dictGetEntryKey(de
);
4846 addReplyBulk(c
,ele
);
4850 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4851 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4853 return dictSize(*d1
)-dictSize(*d2
);
4856 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4857 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4860 robj
*lenobj
= NULL
, *dstset
= NULL
;
4861 unsigned long j
, cardinality
= 0;
4863 for (j
= 0; j
< setsnum
; j
++) {
4867 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4868 lookupKeyRead(c
->db
,setskeys
[j
]);
4872 if (deleteKey(c
->db
,dstkey
))
4874 addReply(c
,shared
.czero
);
4876 addReply(c
,shared
.emptymultibulk
);
4880 if (setobj
->type
!= REDIS_SET
) {
4882 addReply(c
,shared
.wrongtypeerr
);
4885 dv
[j
] = setobj
->ptr
;
4887 /* Sort sets from the smallest to largest, this will improve our
4888 * algorithm's performace */
4889 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4891 /* The first thing we should output is the total number of elements...
4892 * since this is a multi-bulk write, but at this stage we don't know
4893 * the intersection set size, so we use a trick, append an empty object
4894 * to the output list and save the pointer to later modify it with the
4897 lenobj
= createObject(REDIS_STRING
,NULL
);
4899 decrRefCount(lenobj
);
4901 /* If we have a target key where to store the resulting set
4902 * create this key with an empty set inside */
4903 dstset
= createSetObject();
4906 /* Iterate all the elements of the first (smallest) set, and test
4907 * the element against all the other sets, if at least one set does
4908 * not include the element it is discarded */
4909 di
= dictGetIterator(dv
[0]);
4911 while((de
= dictNext(di
)) != NULL
) {
4914 for (j
= 1; j
< setsnum
; j
++)
4915 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4917 continue; /* at least one set does not contain the member */
4918 ele
= dictGetEntryKey(de
);
4920 addReplyBulk(c
,ele
);
4923 dictAdd(dstset
->ptr
,ele
,NULL
);
4927 dictReleaseIterator(di
);
4930 /* Store the resulting set into the target, if the intersection
4931 * is not an empty set. */
4932 deleteKey(c
->db
,dstkey
);
4933 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4934 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4935 incrRefCount(dstkey
);
4936 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4938 decrRefCount(dstset
);
4939 addReply(c
,shared
.czero
);
4943 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4948 static void sinterCommand(redisClient
*c
) {
4949 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4952 static void sinterstoreCommand(redisClient
*c
) {
4953 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4956 #define REDIS_OP_UNION 0
4957 #define REDIS_OP_DIFF 1
4958 #define REDIS_OP_INTER 2
4960 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4961 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4964 robj
*dstset
= NULL
;
4965 int j
, cardinality
= 0;
4967 for (j
= 0; j
< setsnum
; j
++) {
4971 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4972 lookupKeyRead(c
->db
,setskeys
[j
]);
4977 if (setobj
->type
!= REDIS_SET
) {
4979 addReply(c
,shared
.wrongtypeerr
);
4982 dv
[j
] = setobj
->ptr
;
4985 /* We need a temp set object to store our union. If the dstkey
4986 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4987 * this set object will be the resulting object to set into the target key*/
4988 dstset
= createSetObject();
4990 /* Iterate all the elements of all the sets, add every element a single
4991 * time to the result set */
4992 for (j
= 0; j
< setsnum
; j
++) {
4993 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4994 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4996 di
= dictGetIterator(dv
[j
]);
4998 while((de
= dictNext(di
)) != NULL
) {
5001 /* dictAdd will not add the same element multiple times */
5002 ele
= dictGetEntryKey(de
);
5003 if (op
== REDIS_OP_UNION
|| j
== 0) {
5004 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5008 } else if (op
== REDIS_OP_DIFF
) {
5009 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5014 dictReleaseIterator(di
);
5016 /* result set is empty? Exit asap. */
5017 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5020 /* Output the content of the resulting set, if not in STORE mode */
5022 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5023 di
= dictGetIterator(dstset
->ptr
);
5024 while((de
= dictNext(di
)) != NULL
) {
5027 ele
= dictGetEntryKey(de
);
5028 addReplyBulk(c
,ele
);
5030 dictReleaseIterator(di
);
5031 decrRefCount(dstset
);
5033 /* If we have a target key where to store the resulting set
5034 * create this key with the result set inside */
5035 deleteKey(c
->db
,dstkey
);
5036 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5037 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5038 incrRefCount(dstkey
);
5039 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5041 decrRefCount(dstset
);
5042 addReply(c
,shared
.czero
);
5049 static void sunionCommand(redisClient
*c
) {
5050 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5053 static void sunionstoreCommand(redisClient
*c
) {
5054 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5057 static void sdiffCommand(redisClient
*c
) {
5058 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5061 static void sdiffstoreCommand(redisClient
*c
) {
5062 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5065 /* ==================================== ZSets =============================== */
5067 /* ZSETs are ordered sets using two data structures to hold the same elements
5068 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5071 * The elements are added to an hash table mapping Redis objects to scores.
5072 * At the same time the elements are added to a skip list mapping scores
5073 * to Redis objects (so objects are sorted by scores in this "view"). */
5075 /* This skiplist implementation is almost a C translation of the original
5076 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5077 * Alternative to Balanced Trees", modified in three ways:
5078 * a) this implementation allows for repeated values.
5079 * b) the comparison is not just by key (our 'score') but by satellite data.
5080 * c) there is a back pointer, so it's a doubly linked list with the back
5081 * pointers being only at "level 1". This allows to traverse the list
5082 * from tail to head, useful for ZREVRANGE. */
5084 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5085 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5087 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5089 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5095 static zskiplist
*zslCreate(void) {
5099 zsl
= zmalloc(sizeof(*zsl
));
5102 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5103 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5104 zsl
->header
->forward
[j
] = NULL
;
5106 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5107 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5108 zsl
->header
->span
[j
] = 0;
5110 zsl
->header
->backward
= NULL
;
5115 static void zslFreeNode(zskiplistNode
*node
) {
5116 decrRefCount(node
->obj
);
5117 zfree(node
->forward
);
5122 static void zslFree(zskiplist
*zsl
) {
5123 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5125 zfree(zsl
->header
->forward
);
5126 zfree(zsl
->header
->span
);
5129 next
= node
->forward
[0];
5136 static int zslRandomLevel(void) {
5138 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5140 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5143 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5144 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5145 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5149 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5150 /* store rank that is crossed to reach the insert position */
5151 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5153 while (x
->forward
[i
] &&
5154 (x
->forward
[i
]->score
< score
||
5155 (x
->forward
[i
]->score
== score
&&
5156 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5157 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5162 /* we assume the key is not already inside, since we allow duplicated
5163 * scores, and the re-insertion of score and redis object should never
5164 * happpen since the caller of zslInsert() should test in the hash table
5165 * if the element is already inside or not. */
5166 level
= zslRandomLevel();
5167 if (level
> zsl
->level
) {
5168 for (i
= zsl
->level
; i
< level
; i
++) {
5170 update
[i
] = zsl
->header
;
5171 update
[i
]->span
[i
-1] = zsl
->length
;
5175 x
= zslCreateNode(level
,score
,obj
);
5176 for (i
= 0; i
< level
; i
++) {
5177 x
->forward
[i
] = update
[i
]->forward
[i
];
5178 update
[i
]->forward
[i
] = x
;
5180 /* update span covered by update[i] as x is inserted here */
5182 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5183 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5187 /* increment span for untouched levels */
5188 for (i
= level
; i
< zsl
->level
; i
++) {
5189 update
[i
]->span
[i
-1]++;
5192 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5194 x
->forward
[0]->backward
= x
;
5200 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5201 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5203 for (i
= 0; i
< zsl
->level
; i
++) {
5204 if (update
[i
]->forward
[i
] == x
) {
5206 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5208 update
[i
]->forward
[i
] = x
->forward
[i
];
5210 /* invariant: i > 0, because update[0]->forward[0]
5211 * is always equal to x */
5212 update
[i
]->span
[i
-1] -= 1;
5215 if (x
->forward
[0]) {
5216 x
->forward
[0]->backward
= x
->backward
;
5218 zsl
->tail
= x
->backward
;
5220 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5225 /* Delete an element with matching score/object from the skiplist. */
5226 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5227 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5231 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5232 while (x
->forward
[i
] &&
5233 (x
->forward
[i
]->score
< score
||
5234 (x
->forward
[i
]->score
== score
&&
5235 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5239 /* We may have multiple elements with the same score, what we need
5240 * is to find the element with both the right score and object. */
5242 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5243 zslDeleteNode(zsl
, x
, update
);
5247 return 0; /* not found */
5249 return 0; /* not found */
5252 /* Delete all the elements with score between min and max from the skiplist.
5253 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5254 * Note that this function takes the reference to the hash table view of the
5255 * sorted set, in order to remove the elements from the hash table too. */
5256 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5257 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5258 unsigned long removed
= 0;
5262 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5263 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5267 /* We may have multiple elements with the same score, what we need
5268 * is to find the element with both the right score and object. */
5270 while (x
&& x
->score
<= max
) {
5271 zskiplistNode
*next
= x
->forward
[0];
5272 zslDeleteNode(zsl
, x
, update
);
5273 dictDelete(dict
,x
->obj
);
5278 return removed
; /* not found */
5281 /* Delete all the elements with rank between start and end from the skiplist.
5282 * Start and end are inclusive. Note that start and end need to be 1-based */
5283 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5284 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5285 unsigned long traversed
= 0, removed
= 0;
5289 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5290 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5291 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5299 while (x
&& traversed
<= end
) {
5300 zskiplistNode
*next
= x
->forward
[0];
5301 zslDeleteNode(zsl
, x
, update
);
5302 dictDelete(dict
,x
->obj
);
5311 /* Find the first node having a score equal or greater than the specified one.
5312 * Returns NULL if there is no match. */
5313 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5318 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5319 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5322 /* We may have multiple elements with the same score, what we need
5323 * is to find the element with both the right score and object. */
5324 return x
->forward
[0];
5327 /* Find the rank for an element by both score and key.
5328 * Returns 0 when the element cannot be found, rank otherwise.
5329 * Note that the rank is 1-based due to the span of zsl->header to the
5331 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5333 unsigned long rank
= 0;
5337 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5338 while (x
->forward
[i
] &&
5339 (x
->forward
[i
]->score
< score
||
5340 (x
->forward
[i
]->score
== score
&&
5341 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5342 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5346 /* x might be equal to zsl->header, so test if obj is non-NULL */
5347 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5354 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5355 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5357 unsigned long traversed
= 0;
5361 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5362 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5364 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5367 if (traversed
== rank
) {
5374 /* The actual Z-commands implementations */
5376 /* This generic command implements both ZADD and ZINCRBY.
5377 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5378 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5379 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5384 zsetobj
= lookupKeyWrite(c
->db
,key
);
5385 if (zsetobj
== NULL
) {
5386 zsetobj
= createZsetObject();
5387 dictAdd(c
->db
->dict
,key
,zsetobj
);
5390 if (zsetobj
->type
!= REDIS_ZSET
) {
5391 addReply(c
,shared
.wrongtypeerr
);
5397 /* Ok now since we implement both ZADD and ZINCRBY here the code
5398 * needs to handle the two different conditions. It's all about setting
5399 * '*score', that is, the new score to set, to the right value. */
5400 score
= zmalloc(sizeof(double));
5404 /* Read the old score. If the element was not present starts from 0 */
5405 de
= dictFind(zs
->dict
,ele
);
5407 double *oldscore
= dictGetEntryVal(de
);
5408 *score
= *oldscore
+ scoreval
;
5416 /* What follows is a simple remove and re-insert operation that is common
5417 * to both ZADD and ZINCRBY... */
5418 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5419 /* case 1: New element */
5420 incrRefCount(ele
); /* added to hash */
5421 zslInsert(zs
->zsl
,*score
,ele
);
5422 incrRefCount(ele
); /* added to skiplist */
5425 addReplyDouble(c
,*score
);
5427 addReply(c
,shared
.cone
);
5432 /* case 2: Score update operation */
5433 de
= dictFind(zs
->dict
,ele
);
5434 redisAssert(de
!= NULL
);
5435 oldscore
= dictGetEntryVal(de
);
5436 if (*score
!= *oldscore
) {
5439 /* Remove and insert the element in the skip list with new score */
5440 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5441 redisAssert(deleted
!= 0);
5442 zslInsert(zs
->zsl
,*score
,ele
);
5444 /* Update the score in the hash table */
5445 dictReplace(zs
->dict
,ele
,score
);
5451 addReplyDouble(c
,*score
);
5453 addReply(c
,shared
.czero
);
5457 static void zaddCommand(redisClient
*c
) {
5460 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5462 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5465 static void zincrbyCommand(redisClient
*c
) {
5468 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5470 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5473 static void zremCommand(redisClient
*c
) {
5480 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5481 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5484 de
= dictFind(zs
->dict
,c
->argv
[2]);
5486 addReply(c
,shared
.czero
);
5489 /* Delete from the skiplist */
5490 oldscore
= dictGetEntryVal(de
);
5491 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5492 redisAssert(deleted
!= 0);
5494 /* Delete from the hash table */
5495 dictDelete(zs
->dict
,c
->argv
[2]);
5496 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5497 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5499 addReply(c
,shared
.cone
);
5502 static void zremrangebyscoreCommand(redisClient
*c
) {
5509 if ((getDoubleFromObject(c
, c
->argv
[2], &min
) != REDIS_OK
) ||
5510 (getDoubleFromObject(c
, c
->argv
[3], &max
) != REDIS_OK
)) return;
5512 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5513 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5516 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5517 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5518 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5519 server
.dirty
+= deleted
;
5520 addReplyLong(c
,deleted
);
5523 static void zremrangebyrankCommand(redisClient
*c
) {
5531 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5532 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5534 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5535 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5537 llen
= zs
->zsl
->length
;
5539 /* convert negative indexes */
5540 if (start
< 0) start
= llen
+start
;
5541 if (end
< 0) end
= llen
+end
;
5542 if (start
< 0) start
= 0;
5543 if (end
< 0) end
= 0;
5545 /* indexes sanity checks */
5546 if (start
> end
|| start
>= llen
) {
5547 addReply(c
,shared
.czero
);
5550 if (end
>= llen
) end
= llen
-1;
5552 /* increment start and end because zsl*Rank functions
5553 * use 1-based rank */
5554 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5555 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5556 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5557 server
.dirty
+= deleted
;
5558 addReplyLong(c
, deleted
);
5566 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5567 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5568 unsigned long size1
, size2
;
5569 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5570 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5571 return size1
- size2
;
5574 #define REDIS_AGGR_SUM 1
5575 #define REDIS_AGGR_MIN 2
5576 #define REDIS_AGGR_MAX 3
5578 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5579 if (aggregate
== REDIS_AGGR_SUM
) {
5580 *target
= *target
+ val
;
5581 } else if (aggregate
== REDIS_AGGR_MIN
) {
5582 *target
= val
< *target
? val
: *target
;
5583 } else if (aggregate
== REDIS_AGGR_MAX
) {
5584 *target
= val
> *target
? val
: *target
;
5587 redisAssert(0 != 0);
5591 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5593 int aggregate
= REDIS_AGGR_SUM
;
5600 /* expect zsetnum input keys to be given */
5601 zsetnum
= atoi(c
->argv
[2]->ptr
);
5603 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5607 /* test if the expected number of keys would overflow */
5608 if (3+zsetnum
> c
->argc
) {
5609 addReply(c
,shared
.syntaxerr
);
5613 /* read keys to be used for input */
5614 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5615 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5616 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5620 if (zsetobj
->type
!= REDIS_ZSET
) {
5622 addReply(c
,shared
.wrongtypeerr
);
5625 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5628 /* default all weights to 1 */
5629 src
[i
].weight
= 1.0;
5632 /* parse optional extra arguments */
5634 int remaining
= c
->argc
- j
;
5637 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5639 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5640 if (getDoubleFromObject(c
, c
->argv
[j
], &src
[i
].weight
) != REDIS_OK
)
5643 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5645 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5646 aggregate
= REDIS_AGGR_SUM
;
5647 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5648 aggregate
= REDIS_AGGR_MIN
;
5649 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5650 aggregate
= REDIS_AGGR_MAX
;
5653 addReply(c
,shared
.syntaxerr
);
5659 addReply(c
,shared
.syntaxerr
);
5665 /* sort sets from the smallest to largest, this will improve our
5666 * algorithm's performance */
5667 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5669 dstobj
= createZsetObject();
5670 dstzset
= dstobj
->ptr
;
5672 if (op
== REDIS_OP_INTER
) {
5673 /* skip going over all entries if the smallest zset is NULL or empty */
5674 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5675 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5676 * from small to large, all src[i > 0].dict are non-empty too */
5677 di
= dictGetIterator(src
[0].dict
);
5678 while((de
= dictNext(di
)) != NULL
) {
5679 double *score
= zmalloc(sizeof(double)), value
;
5680 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5682 for (j
= 1; j
< zsetnum
; j
++) {
5683 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5685 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5686 zunionInterAggregate(score
, value
, aggregate
);
5692 /* skip entry when not present in every source dict */
5696 robj
*o
= dictGetEntryKey(de
);
5697 dictAdd(dstzset
->dict
,o
,score
);
5698 incrRefCount(o
); /* added to dictionary */
5699 zslInsert(dstzset
->zsl
,*score
,o
);
5700 incrRefCount(o
); /* added to skiplist */
5703 dictReleaseIterator(di
);
5705 } else if (op
== REDIS_OP_UNION
) {
5706 for (i
= 0; i
< zsetnum
; i
++) {
5707 if (!src
[i
].dict
) continue;
5709 di
= dictGetIterator(src
[i
].dict
);
5710 while((de
= dictNext(di
)) != NULL
) {
5711 /* skip key when already processed */
5712 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5714 double *score
= zmalloc(sizeof(double)), value
;
5715 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5717 /* because the zsets are sorted by size, its only possible
5718 * for sets at larger indices to hold this entry */
5719 for (j
= (i
+1); j
< zsetnum
; j
++) {
5720 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5722 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5723 zunionInterAggregate(score
, value
, aggregate
);
5727 robj
*o
= dictGetEntryKey(de
);
5728 dictAdd(dstzset
->dict
,o
,score
);
5729 incrRefCount(o
); /* added to dictionary */
5730 zslInsert(dstzset
->zsl
,*score
,o
);
5731 incrRefCount(o
); /* added to skiplist */
5733 dictReleaseIterator(di
);
5736 /* unknown operator */
5737 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5740 deleteKey(c
->db
,dstkey
);
5741 if (dstzset
->zsl
->length
) {
5742 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5743 incrRefCount(dstkey
);
5744 addReplyLong(c
, dstzset
->zsl
->length
);
5747 decrRefCount(dstobj
);
5748 addReply(c
, shared
.czero
);
5753 static void zunionCommand(redisClient
*c
) {
5754 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5757 static void zinterCommand(redisClient
*c
) {
5758 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5761 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5773 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5774 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5776 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5778 } else if (c
->argc
>= 5) {
5779 addReply(c
,shared
.syntaxerr
);
5783 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5784 || checkType(c
,o
,REDIS_ZSET
)) return;
5789 /* convert negative indexes */
5790 if (start
< 0) start
= llen
+start
;
5791 if (end
< 0) end
= llen
+end
;
5792 if (start
< 0) start
= 0;
5793 if (end
< 0) end
= 0;
5795 /* indexes sanity checks */
5796 if (start
> end
|| start
>= llen
) {
5797 /* Out of range start or start > end result in empty list */
5798 addReply(c
,shared
.emptymultibulk
);
5801 if (end
>= llen
) end
= llen
-1;
5802 rangelen
= (end
-start
)+1;
5804 /* check if starting point is trivial, before searching
5805 * the element in log(N) time */
5807 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5810 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5813 /* Return the result in form of a multi-bulk reply */
5814 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5815 withscores
? (rangelen
*2) : rangelen
));
5816 for (j
= 0; j
< rangelen
; j
++) {
5818 addReplyBulk(c
,ele
);
5820 addReplyDouble(c
,ln
->score
);
5821 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5825 static void zrangeCommand(redisClient
*c
) {
5826 zrangeGenericCommand(c
,0);
5829 static void zrevrangeCommand(redisClient
*c
) {
5830 zrangeGenericCommand(c
,1);
5833 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5834 * If justcount is non-zero, just the count is returned. */
5835 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5838 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5839 int offset
= 0, limit
= -1;
5843 /* Parse the min-max interval. If one of the values is prefixed
5844 * by the "(" character, it's considered "open". For instance
5845 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5846 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5847 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5848 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5851 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5853 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5854 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5857 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5860 /* Parse "WITHSCORES": note that if the command was called with
5861 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5862 * enter the following paths to parse WITHSCORES and LIMIT. */
5863 if (c
->argc
== 5 || c
->argc
== 8) {
5864 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5869 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5873 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5878 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5879 addReply(c
,shared
.syntaxerr
);
5881 } else if (c
->argc
== (7 + withscores
)) {
5882 offset
= atoi(c
->argv
[5]->ptr
);
5883 limit
= atoi(c
->argv
[6]->ptr
);
5884 if (offset
< 0) offset
= 0;
5887 /* Ok, lookup the key and get the range */
5888 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5890 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5892 if (o
->type
!= REDIS_ZSET
) {
5893 addReply(c
,shared
.wrongtypeerr
);
5895 zset
*zsetobj
= o
->ptr
;
5896 zskiplist
*zsl
= zsetobj
->zsl
;
5898 robj
*ele
, *lenobj
= NULL
;
5899 unsigned long rangelen
= 0;
5901 /* Get the first node with the score >= min, or with
5902 * score > min if 'minex' is true. */
5903 ln
= zslFirstWithScore(zsl
,min
);
5904 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5907 /* No element matching the speciifed interval */
5908 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5912 /* We don't know in advance how many matching elements there
5913 * are in the list, so we push this object that will represent
5914 * the multi-bulk length in the output buffer, and will "fix"
5917 lenobj
= createObject(REDIS_STRING
,NULL
);
5919 decrRefCount(lenobj
);
5922 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5925 ln
= ln
->forward
[0];
5928 if (limit
== 0) break;
5931 addReplyBulk(c
,ele
);
5933 addReplyDouble(c
,ln
->score
);
5935 ln
= ln
->forward
[0];
5937 if (limit
> 0) limit
--;
5940 addReplyLong(c
,(long)rangelen
);
5942 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5943 withscores
? (rangelen
*2) : rangelen
);
5949 static void zrangebyscoreCommand(redisClient
*c
) {
5950 genericZrangebyscoreCommand(c
,0);
5953 static void zcountCommand(redisClient
*c
) {
5954 genericZrangebyscoreCommand(c
,1);
5957 static void zcardCommand(redisClient
*c
) {
5961 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5962 checkType(c
,o
,REDIS_ZSET
)) return;
5965 addReplyUlong(c
,zs
->zsl
->length
);
5968 static void zscoreCommand(redisClient
*c
) {
5973 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5974 checkType(c
,o
,REDIS_ZSET
)) return;
5977 de
= dictFind(zs
->dict
,c
->argv
[2]);
5979 addReply(c
,shared
.nullbulk
);
5981 double *score
= dictGetEntryVal(de
);
5983 addReplyDouble(c
,*score
);
5987 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5995 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5996 checkType(c
,o
,REDIS_ZSET
)) return;
6000 de
= dictFind(zs
->dict
,c
->argv
[2]);
6002 addReply(c
,shared
.nullbulk
);
6006 score
= dictGetEntryVal(de
);
6007 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6010 addReplyLong(c
, zsl
->length
- rank
);
6012 addReplyLong(c
, rank
-1);
6015 addReply(c
,shared
.nullbulk
);
6019 static void zrankCommand(redisClient
*c
) {
6020 zrankGenericCommand(c
, 0);
6023 static void zrevrankCommand(redisClient
*c
) {
6024 zrankGenericCommand(c
, 1);
6027 /* =================================== Hashes =============================== */
6028 static void hsetCommand(redisClient
*c
) {
6030 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6033 o
= createHashObject();
6034 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6035 incrRefCount(c
->argv
[1]);
6037 if (o
->type
!= REDIS_HASH
) {
6038 addReply(c
,shared
.wrongtypeerr
);
6042 /* We want to convert the zipmap into an hash table right now if the
6043 * entry to be added is too big. Note that we check if the object
6044 * is integer encoded before to try fetching the length in the test below.
6045 * This is because integers are small, but currently stringObjectLen()
6046 * performs a slow conversion: not worth it. */
6047 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
6048 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
6049 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
6050 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
6051 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
6053 convertToRealHash(o
);
6056 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6057 unsigned char *zm
= o
->ptr
;
6058 robj
*valobj
= getDecodedObject(c
->argv
[3]);
6060 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6061 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
6062 decrRefCount(valobj
);
6065 /* And here there is the second check for hash conversion. */
6066 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6067 convertToRealHash(o
);
6069 c
->argv
[2] = tryObjectEncoding(c
->argv
[2]);
6070 /* note that c->argv[3] is already encoded, as the latest arg
6071 * of a bulk command is always integer encoded if possible. */
6072 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
6073 incrRefCount(c
->argv
[2]);
6077 incrRefCount(c
->argv
[3]);
6080 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
6083 static void hmsetCommand(redisClient
*c
) {
6085 robj
*o
, *key
, *val
;
6087 if ((c
->argc
% 2) == 1) {
6088 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6092 if ((o
= lookupKeyWrite(c
->db
,c
->argv
[1])) == NULL
) {
6093 o
= createHashObject();
6094 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6095 incrRefCount(c
->argv
[1]);
6097 if (o
->type
!= REDIS_HASH
) {
6098 addReply(c
,shared
.wrongtypeerr
);
6103 /* We want to convert the zipmap into an hash table right now if the
6104 * entry to be added is too big. */
6105 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6106 for (i
= 2; i
< c
->argc
; i
+=2) {
6107 if ((c
->argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6108 sdslen(c
->argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) ||
6109 (c
->argv
[i
+1]->encoding
== REDIS_ENCODING_RAW
&&
6110 sdslen(c
->argv
[i
+1]->ptr
) > server
.hash_max_zipmap_value
)) {
6111 convertToRealHash(o
);
6117 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6118 unsigned char *zm
= o
->ptr
;
6120 for (i
= 2; i
< c
->argc
; i
+=2) {
6121 key
= getDecodedObject(c
->argv
[i
]);
6122 val
= getDecodedObject(c
->argv
[i
+1]);
6123 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
6124 val
->ptr
,sdslen(val
->ptr
),NULL
);
6130 /* And here there is the second check for hash conversion. */
6131 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6132 convertToRealHash(o
);
6134 for (i
= 2; i
< c
->argc
; i
+=2) {
6135 key
= tryObjectEncoding(c
->argv
[i
]);
6136 val
= tryObjectEncoding(c
->argv
[i
+1]);
6137 if (dictReplace(o
->ptr
,key
,val
)) {
6144 addReply(c
, shared
.ok
);
6147 static void hincrbyCommand(redisClient
*c
) {
6148 long long value
= 0, incr
= 0;
6149 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6152 o
= createHashObject();
6153 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6154 incrRefCount(c
->argv
[1]);
6156 if (o
->type
!= REDIS_HASH
) {
6157 addReply(c
,shared
.wrongtypeerr
);
6162 if (getLongLongFromObject(c
, c
->argv
[3], &incr
) != REDIS_OK
) return;
6164 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6165 unsigned char *zm
= o
->ptr
;
6166 unsigned char *zval
;
6169 /* Find value if already present in hash */
6170 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6172 /* strtoll needs the char* to have a trailing \0, but
6173 * the zipmap doesn't include them. */
6174 sds szval
= sdsnewlen(zval
, zvlen
);
6175 value
= strtoll(szval
,NULL
,10);
6180 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6181 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6182 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6186 /* Check if the zipmap needs to be converted. */
6187 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6188 convertToRealHash(o
);
6193 /* Find value if already present in hash */
6194 de
= dictFind(o
->ptr
,c
->argv
[2]);
6196 hval
= dictGetEntryVal(de
);
6197 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6198 value
= strtoll(hval
->ptr
,NULL
,10);
6199 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6200 value
= (long)hval
->ptr
;
6202 redisAssert(1 != 1);
6206 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6207 hval
= tryObjectEncoding(hval
);
6208 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6209 incrRefCount(c
->argv
[2]);
6214 addReplyLongLong(c
, value
);
6217 static void hgetCommand(redisClient
*c
) {
6220 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6221 checkType(c
,o
,REDIS_HASH
)) return;
6223 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6224 unsigned char *zm
= o
->ptr
;
6229 field
= getDecodedObject(c
->argv
[2]);
6230 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6231 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6232 addReplySds(c
,sdsnewlen(val
,vlen
));
6233 addReply(c
,shared
.crlf
);
6234 decrRefCount(field
);
6237 addReply(c
,shared
.nullbulk
);
6238 decrRefCount(field
);
6242 struct dictEntry
*de
;
6244 de
= dictFind(o
->ptr
,c
->argv
[2]);
6246 addReply(c
,shared
.nullbulk
);
6248 robj
*e
= dictGetEntryVal(de
);
6255 static void hmgetCommand(redisClient
*c
) {
6258 robj
*o
= lookupKeyRead(c
->db
, c
->argv
[1]);
6260 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6261 for (i
= 2; i
< c
->argc
; i
++) {
6262 addReply(c
,shared
.nullbulk
);
6266 if (o
->type
!= REDIS_HASH
) {
6267 addReply(c
,shared
.wrongtypeerr
);
6272 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6273 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6274 unsigned char *zm
= o
->ptr
;
6279 for (i
= 2; i
< c
->argc
; i
++) {
6280 field
= getDecodedObject(c
->argv
[i
]);
6281 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
),&v
,&vlen
)) {
6282 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6283 addReplySds(c
,sdsnewlen(v
,vlen
));
6284 addReply(c
,shared
.crlf
);
6286 addReply(c
,shared
.nullbulk
);
6288 decrRefCount(field
);
6293 for (i
= 2; i
< c
->argc
; i
++) {
6294 de
= dictFind(o
->ptr
,c
->argv
[i
]);
6296 addReplyBulk(c
,(robj
*)dictGetEntryVal(de
));
6298 addReply(c
,shared
.nullbulk
);
6304 static void hdelCommand(redisClient
*c
) {
6308 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6309 checkType(c
,o
,REDIS_HASH
)) return;
6311 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6312 robj
*field
= getDecodedObject(c
->argv
[2]);
6314 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6315 (unsigned char*) field
->ptr
,
6316 sdslen(field
->ptr
), &deleted
);
6317 decrRefCount(field
);
6318 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6319 deleteKey(c
->db
,c
->argv
[1]);
6321 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6322 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6323 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6325 if (deleted
) server
.dirty
++;
6326 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6329 static void hlenCommand(redisClient
*c
) {
6333 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6334 checkType(c
,o
,REDIS_HASH
)) return;
6336 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6337 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6338 addReplyUlong(c
,len
);
6341 #define REDIS_GETALL_KEYS 1
6342 #define REDIS_GETALL_VALS 2
6343 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6345 unsigned long count
= 0;
6347 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6348 || checkType(c
,o
,REDIS_HASH
)) return;
6350 lenobj
= createObject(REDIS_STRING
,NULL
);
6352 decrRefCount(lenobj
);
6354 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6355 unsigned char *p
= zipmapRewind(o
->ptr
);
6356 unsigned char *field
, *val
;
6357 unsigned int flen
, vlen
;
6359 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6362 if (flags
& REDIS_GETALL_KEYS
) {
6363 aux
= createStringObject((char*)field
,flen
);
6364 addReplyBulk(c
,aux
);
6368 if (flags
& REDIS_GETALL_VALS
) {
6369 aux
= createStringObject((char*)val
,vlen
);
6370 addReplyBulk(c
,aux
);
6376 dictIterator
*di
= dictGetIterator(o
->ptr
);
6379 while((de
= dictNext(di
)) != NULL
) {
6380 robj
*fieldobj
= dictGetEntryKey(de
);
6381 robj
*valobj
= dictGetEntryVal(de
);
6383 if (flags
& REDIS_GETALL_KEYS
) {
6384 addReplyBulk(c
,fieldobj
);
6387 if (flags
& REDIS_GETALL_VALS
) {
6388 addReplyBulk(c
,valobj
);
6392 dictReleaseIterator(di
);
6394 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6397 static void hkeysCommand(redisClient
*c
) {
6398 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6401 static void hvalsCommand(redisClient
*c
) {
6402 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6405 static void hgetallCommand(redisClient
*c
) {
6406 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6409 static void hexistsCommand(redisClient
*c
) {
6413 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6414 checkType(c
,o
,REDIS_HASH
)) return;
6416 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6418 unsigned char *zm
= o
->ptr
;
6420 field
= getDecodedObject(c
->argv
[2]);
6421 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6422 decrRefCount(field
);
6424 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6426 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6429 static void convertToRealHash(robj
*o
) {
6430 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6431 unsigned int klen
, vlen
;
6432 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6434 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6435 p
= zipmapRewind(zm
);
6436 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6437 robj
*keyobj
, *valobj
;
6439 keyobj
= createStringObject((char*)key
,klen
);
6440 valobj
= createStringObject((char*)val
,vlen
);
6441 keyobj
= tryObjectEncoding(keyobj
);
6442 valobj
= tryObjectEncoding(valobj
);
6443 dictAdd(dict
,keyobj
,valobj
);
6445 o
->encoding
= REDIS_ENCODING_HT
;
6450 /* ========================= Non type-specific commands ==================== */
6452 static void flushdbCommand(redisClient
*c
) {
6453 server
.dirty
+= dictSize(c
->db
->dict
);
6454 dictEmpty(c
->db
->dict
);
6455 dictEmpty(c
->db
->expires
);
6456 addReply(c
,shared
.ok
);
6459 static void flushallCommand(redisClient
*c
) {
6460 server
.dirty
+= emptyDb();
6461 addReply(c
,shared
.ok
);
6462 if (server
.bgsavechildpid
!= -1) {
6463 kill(server
.bgsavechildpid
,SIGKILL
);
6464 rdbRemoveTempFile(server
.bgsavechildpid
);
6466 rdbSave(server
.dbfilename
);
6470 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6471 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6473 so
->pattern
= pattern
;
6477 /* Return the value associated to the key with a name obtained
6478 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6479 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6483 int prefixlen
, sublen
, postfixlen
;
6484 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6488 char buf
[REDIS_SORTKEY_MAX
+1];
6491 /* If the pattern is "#" return the substitution object itself in order
6492 * to implement the "SORT ... GET #" feature. */
6493 spat
= pattern
->ptr
;
6494 if (spat
[0] == '#' && spat
[1] == '\0') {
6498 /* The substitution object may be specially encoded. If so we create
6499 * a decoded object on the fly. Otherwise getDecodedObject will just
6500 * increment the ref count, that we'll decrement later. */
6501 subst
= getDecodedObject(subst
);
6504 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6505 p
= strchr(spat
,'*');
6507 decrRefCount(subst
);
6512 sublen
= sdslen(ssub
);
6513 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6514 memcpy(keyname
.buf
,spat
,prefixlen
);
6515 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6516 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6517 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6518 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6520 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6521 decrRefCount(subst
);
6523 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6524 return lookupKeyRead(db
,&keyobj
);
6527 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6528 * the additional parameter is not standard but a BSD-specific we have to
6529 * pass sorting parameters via the global 'server' structure */
6530 static int sortCompare(const void *s1
, const void *s2
) {
6531 const redisSortObject
*so1
= s1
, *so2
= s2
;
6534 if (!server
.sort_alpha
) {
6535 /* Numeric sorting. Here it's trivial as we precomputed scores */
6536 if (so1
->u
.score
> so2
->u
.score
) {
6538 } else if (so1
->u
.score
< so2
->u
.score
) {
6544 /* Alphanumeric sorting */
6545 if (server
.sort_bypattern
) {
6546 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6547 /* At least one compare object is NULL */
6548 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6550 else if (so1
->u
.cmpobj
== NULL
)
6555 /* We have both the objects, use strcoll */
6556 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6559 /* Compare elements directly */
6562 dec1
= getDecodedObject(so1
->obj
);
6563 dec2
= getDecodedObject(so2
->obj
);
6564 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6569 return server
.sort_desc
? -cmp
: cmp
;
6572 /* The SORT command is the most complex command in Redis. Warning: this code
6573 * is optimized for speed and a bit less for readability */
6574 static void sortCommand(redisClient
*c
) {
6577 int desc
= 0, alpha
= 0;
6578 int limit_start
= 0, limit_count
= -1, start
, end
;
6579 int j
, dontsort
= 0, vectorlen
;
6580 int getop
= 0; /* GET operation counter */
6581 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6582 redisSortObject
*vector
; /* Resulting vector to sort */
6584 /* Lookup the key to sort. It must be of the right types */
6585 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6586 if (sortval
== NULL
) {
6587 addReply(c
,shared
.emptymultibulk
);
6590 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6591 sortval
->type
!= REDIS_ZSET
)
6593 addReply(c
,shared
.wrongtypeerr
);
6597 /* Create a list of operations to perform for every sorted element.
6598 * Operations can be GET/DEL/INCR/DECR */
6599 operations
= listCreate();
6600 listSetFreeMethod(operations
,zfree
);
6603 /* Now we need to protect sortval incrementing its count, in the future
6604 * SORT may have options able to overwrite/delete keys during the sorting
6605 * and the sorted key itself may get destroied */
6606 incrRefCount(sortval
);
6608 /* The SORT command has an SQL-alike syntax, parse it */
6609 while(j
< c
->argc
) {
6610 int leftargs
= c
->argc
-j
-1;
6611 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6613 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6615 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6617 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6618 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6619 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6621 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6622 storekey
= c
->argv
[j
+1];
6624 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6625 sortby
= c
->argv
[j
+1];
6626 /* If the BY pattern does not contain '*', i.e. it is constant,
6627 * we don't need to sort nor to lookup the weight keys. */
6628 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6630 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6631 listAddNodeTail(operations
,createSortOperation(
6632 REDIS_SORT_GET
,c
->argv
[j
+1]));
6636 decrRefCount(sortval
);
6637 listRelease(operations
);
6638 addReply(c
,shared
.syntaxerr
);
6644 /* Load the sorting vector with all the objects to sort */
6645 switch(sortval
->type
) {
6646 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6647 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6648 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6649 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6651 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6654 if (sortval
->type
== REDIS_LIST
) {
6655 list
*list
= sortval
->ptr
;
6659 listRewind(list
,&li
);
6660 while((ln
= listNext(&li
))) {
6661 robj
*ele
= ln
->value
;
6662 vector
[j
].obj
= ele
;
6663 vector
[j
].u
.score
= 0;
6664 vector
[j
].u
.cmpobj
= NULL
;
6672 if (sortval
->type
== REDIS_SET
) {
6675 zset
*zs
= sortval
->ptr
;
6679 di
= dictGetIterator(set
);
6680 while((setele
= dictNext(di
)) != NULL
) {
6681 vector
[j
].obj
= dictGetEntryKey(setele
);
6682 vector
[j
].u
.score
= 0;
6683 vector
[j
].u
.cmpobj
= NULL
;
6686 dictReleaseIterator(di
);
6688 redisAssert(j
== vectorlen
);
6690 /* Now it's time to load the right scores in the sorting vector */
6691 if (dontsort
== 0) {
6692 for (j
= 0; j
< vectorlen
; j
++) {
6696 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6697 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6699 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6701 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6702 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6704 /* Don't need to decode the object if it's
6705 * integer-encoded (the only encoding supported) so
6706 * far. We can just cast it */
6707 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6708 vector
[j
].u
.score
= (long)byval
->ptr
;
6710 redisAssert(1 != 1);
6715 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6716 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6718 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6719 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6721 redisAssert(1 != 1);
6728 /* We are ready to sort the vector... perform a bit of sanity check
6729 * on the LIMIT option too. We'll use a partial version of quicksort. */
6730 start
= (limit_start
< 0) ? 0 : limit_start
;
6731 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6732 if (start
>= vectorlen
) {
6733 start
= vectorlen
-1;
6736 if (end
>= vectorlen
) end
= vectorlen
-1;
6738 if (dontsort
== 0) {
6739 server
.sort_desc
= desc
;
6740 server
.sort_alpha
= alpha
;
6741 server
.sort_bypattern
= sortby
? 1 : 0;
6742 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6743 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6745 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6748 /* Send command output to the output buffer, performing the specified
6749 * GET/DEL/INCR/DECR operations if any. */
6750 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6751 if (storekey
== NULL
) {
6752 /* STORE option not specified, sent the sorting result to client */
6753 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6754 for (j
= start
; j
<= end
; j
++) {
6758 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6759 listRewind(operations
,&li
);
6760 while((ln
= listNext(&li
))) {
6761 redisSortOperation
*sop
= ln
->value
;
6762 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6765 if (sop
->type
== REDIS_SORT_GET
) {
6766 if (!val
|| val
->type
!= REDIS_STRING
) {
6767 addReply(c
,shared
.nullbulk
);
6769 addReplyBulk(c
,val
);
6772 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6777 robj
*listObject
= createListObject();
6778 list
*listPtr
= (list
*) listObject
->ptr
;
6780 /* STORE option specified, set the sorting result as a List object */
6781 for (j
= start
; j
<= end
; j
++) {
6786 listAddNodeTail(listPtr
,vector
[j
].obj
);
6787 incrRefCount(vector
[j
].obj
);
6789 listRewind(operations
,&li
);
6790 while((ln
= listNext(&li
))) {
6791 redisSortOperation
*sop
= ln
->value
;
6792 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6795 if (sop
->type
== REDIS_SORT_GET
) {
6796 if (!val
|| val
->type
!= REDIS_STRING
) {
6797 listAddNodeTail(listPtr
,createStringObject("",0));
6799 listAddNodeTail(listPtr
,val
);
6803 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6807 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6808 incrRefCount(storekey
);
6810 /* Note: we add 1 because the DB is dirty anyway since even if the
6811 * SORT result is empty a new key is set and maybe the old content
6813 server
.dirty
+= 1+outputlen
;
6814 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6818 decrRefCount(sortval
);
6819 listRelease(operations
);
6820 for (j
= 0; j
< vectorlen
; j
++) {
6821 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6822 decrRefCount(vector
[j
].u
.cmpobj
);
6827 /* Convert an amount of bytes into a human readable string in the form
6828 * of 100B, 2G, 100M, 4K, and so forth. */
6829 static void bytesToHuman(char *s
, unsigned long long n
) {
6834 sprintf(s
,"%lluB",n
);
6836 } else if (n
< (1024*1024)) {
6837 d
= (double)n
/(1024);
6838 sprintf(s
,"%.2fK",d
);
6839 } else if (n
< (1024LL*1024*1024)) {
6840 d
= (double)n
/(1024*1024);
6841 sprintf(s
,"%.2fM",d
);
6842 } else if (n
< (1024LL*1024*1024*1024)) {
6843 d
= (double)n
/(1024LL*1024*1024);
6844 sprintf(s
,"%.2fG",d
);
6848 /* Create the string returned by the INFO command. This is decoupled
6849 * by the INFO command itself as we need to report the same information
6850 * on memory corruption problems. */
6851 static sds
genRedisInfoString(void) {
6853 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6857 bytesToHuman(hmem
,zmalloc_used_memory());
6858 info
= sdscatprintf(sdsempty(),
6859 "redis_version:%s\r\n"
6861 "multiplexing_api:%s\r\n"
6862 "process_id:%ld\r\n"
6863 "uptime_in_seconds:%ld\r\n"
6864 "uptime_in_days:%ld\r\n"
6865 "connected_clients:%d\r\n"
6866 "connected_slaves:%d\r\n"
6867 "blocked_clients:%d\r\n"
6868 "used_memory:%zu\r\n"
6869 "used_memory_human:%s\r\n"
6870 "changes_since_last_save:%lld\r\n"
6871 "bgsave_in_progress:%d\r\n"
6872 "last_save_time:%ld\r\n"
6873 "bgrewriteaof_in_progress:%d\r\n"
6874 "total_connections_received:%lld\r\n"
6875 "total_commands_processed:%lld\r\n"
6876 "expired_keys:%lld\r\n"
6877 "hash_max_zipmap_entries:%ld\r\n"
6878 "hash_max_zipmap_value:%ld\r\n"
6879 "pubsub_channels:%ld\r\n"
6880 "pubsub_patterns:%u\r\n"
6884 (sizeof(long) == 8) ? "64" : "32",
6889 listLength(server
.clients
)-listLength(server
.slaves
),
6890 listLength(server
.slaves
),
6891 server
.blpop_blocked_clients
,
6892 zmalloc_used_memory(),
6895 server
.bgsavechildpid
!= -1,
6897 server
.bgrewritechildpid
!= -1,
6898 server
.stat_numconnections
,
6899 server
.stat_numcommands
,
6900 server
.stat_expiredkeys
,
6901 server
.hash_max_zipmap_entries
,
6902 server
.hash_max_zipmap_value
,
6903 dictSize(server
.pubsub_channels
),
6904 listLength(server
.pubsub_patterns
),
6905 server
.vm_enabled
!= 0,
6906 server
.masterhost
== NULL
? "master" : "slave"
6908 if (server
.masterhost
) {
6909 info
= sdscatprintf(info
,
6910 "master_host:%s\r\n"
6911 "master_port:%d\r\n"
6912 "master_link_status:%s\r\n"
6913 "master_last_io_seconds_ago:%d\r\n"
6916 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6918 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6921 if (server
.vm_enabled
) {
6923 info
= sdscatprintf(info
,
6924 "vm_conf_max_memory:%llu\r\n"
6925 "vm_conf_page_size:%llu\r\n"
6926 "vm_conf_pages:%llu\r\n"
6927 "vm_stats_used_pages:%llu\r\n"
6928 "vm_stats_swapped_objects:%llu\r\n"
6929 "vm_stats_swappin_count:%llu\r\n"
6930 "vm_stats_swappout_count:%llu\r\n"
6931 "vm_stats_io_newjobs_len:%lu\r\n"
6932 "vm_stats_io_processing_len:%lu\r\n"
6933 "vm_stats_io_processed_len:%lu\r\n"
6934 "vm_stats_io_active_threads:%lu\r\n"
6935 "vm_stats_blocked_clients:%lu\r\n"
6936 ,(unsigned long long) server
.vm_max_memory
,
6937 (unsigned long long) server
.vm_page_size
,
6938 (unsigned long long) server
.vm_pages
,
6939 (unsigned long long) server
.vm_stats_used_pages
,
6940 (unsigned long long) server
.vm_stats_swapped_objects
,
6941 (unsigned long long) server
.vm_stats_swapins
,
6942 (unsigned long long) server
.vm_stats_swapouts
,
6943 (unsigned long) listLength(server
.io_newjobs
),
6944 (unsigned long) listLength(server
.io_processing
),
6945 (unsigned long) listLength(server
.io_processed
),
6946 (unsigned long) server
.io_active_threads
,
6947 (unsigned long) server
.vm_blocked_clients
6951 for (j
= 0; j
< server
.dbnum
; j
++) {
6952 long long keys
, vkeys
;
6954 keys
= dictSize(server
.db
[j
].dict
);
6955 vkeys
= dictSize(server
.db
[j
].expires
);
6956 if (keys
|| vkeys
) {
6957 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6964 static void infoCommand(redisClient
*c
) {
6965 sds info
= genRedisInfoString();
6966 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6967 (unsigned long)sdslen(info
)));
6968 addReplySds(c
,info
);
6969 addReply(c
,shared
.crlf
);
6972 static void monitorCommand(redisClient
*c
) {
6973 /* ignore MONITOR if aleady slave or in monitor mode */
6974 if (c
->flags
& REDIS_SLAVE
) return;
6976 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6978 listAddNodeTail(server
.monitors
,c
);
6979 addReply(c
,shared
.ok
);
6982 /* ================================= Expire ================================= */
6983 static int removeExpire(redisDb
*db
, robj
*key
) {
6984 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6991 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6992 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7000 /* Return the expire time of the specified key, or -1 if no expire
7001 * is associated with this key (i.e. the key is non volatile) */
7002 static time_t getExpire(redisDb
*db
, robj
*key
) {
7005 /* No expire? return ASAP */
7006 if (dictSize(db
->expires
) == 0 ||
7007 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7009 return (time_t) dictGetEntryVal(de
);
7012 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7016 /* No expire? return ASAP */
7017 if (dictSize(db
->expires
) == 0 ||
7018 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7020 /* Lookup the expire */
7021 when
= (time_t) dictGetEntryVal(de
);
7022 if (time(NULL
) <= when
) return 0;
7024 /* Delete the key */
7025 dictDelete(db
->expires
,key
);
7026 server
.stat_expiredkeys
++;
7027 return dictDelete(db
->dict
,key
) == DICT_OK
;
7030 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7033 /* No expire? return ASAP */
7034 if (dictSize(db
->expires
) == 0 ||
7035 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7037 /* Delete the key */
7039 server
.stat_expiredkeys
++;
7040 dictDelete(db
->expires
,key
);
7041 return dictDelete(db
->dict
,key
) == DICT_OK
;
7044 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7048 if (getLongFromObject(c
, param
, &seconds
) != REDIS_OK
) return;
7052 de
= dictFind(c
->db
->dict
,key
);
7054 addReply(c
,shared
.czero
);
7058 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7059 addReply(c
, shared
.cone
);
7062 time_t when
= time(NULL
)+seconds
;
7063 if (setExpire(c
->db
,key
,when
)) {
7064 addReply(c
,shared
.cone
);
7067 addReply(c
,shared
.czero
);
7073 static void expireCommand(redisClient
*c
) {
7074 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7077 static void expireatCommand(redisClient
*c
) {
7078 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7081 static void ttlCommand(redisClient
*c
) {
7085 expire
= getExpire(c
->db
,c
->argv
[1]);
7087 ttl
= (int) (expire
-time(NULL
));
7088 if (ttl
< 0) ttl
= -1;
7090 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7093 /* ================================ MULTI/EXEC ============================== */
7095 /* Client state initialization for MULTI/EXEC */
7096 static void initClientMultiState(redisClient
*c
) {
7097 c
->mstate
.commands
= NULL
;
7098 c
->mstate
.count
= 0;
7101 /* Release all the resources associated with MULTI/EXEC state */
7102 static void freeClientMultiState(redisClient
*c
) {
7105 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7107 multiCmd
*mc
= c
->mstate
.commands
+j
;
7109 for (i
= 0; i
< mc
->argc
; i
++)
7110 decrRefCount(mc
->argv
[i
]);
7113 zfree(c
->mstate
.commands
);
7116 /* Add a new command into the MULTI commands queue */
7117 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7121 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7122 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7123 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7126 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7127 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7128 for (j
= 0; j
< c
->argc
; j
++)
7129 incrRefCount(mc
->argv
[j
]);
7133 static void multiCommand(redisClient
*c
) {
7134 c
->flags
|= REDIS_MULTI
;
7135 addReply(c
,shared
.ok
);
7138 static void discardCommand(redisClient
*c
) {
7139 if (!(c
->flags
& REDIS_MULTI
)) {
7140 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7144 freeClientMultiState(c
);
7145 initClientMultiState(c
);
7146 c
->flags
&= (~REDIS_MULTI
);
7147 addReply(c
,shared
.ok
);
7150 static void execCommand(redisClient
*c
) {
7155 if (!(c
->flags
& REDIS_MULTI
)) {
7156 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7160 orig_argv
= c
->argv
;
7161 orig_argc
= c
->argc
;
7162 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7163 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7164 c
->argc
= c
->mstate
.commands
[j
].argc
;
7165 c
->argv
= c
->mstate
.commands
[j
].argv
;
7166 call(c
,c
->mstate
.commands
[j
].cmd
);
7168 c
->argv
= orig_argv
;
7169 c
->argc
= orig_argc
;
7170 freeClientMultiState(c
);
7171 initClientMultiState(c
);
7172 c
->flags
&= (~REDIS_MULTI
);
7175 /* =========================== Blocking Operations ========================= */
7177 /* Currently Redis blocking operations support is limited to list POP ops,
7178 * so the current implementation is not fully generic, but it is also not
7179 * completely specific so it will not require a rewrite to support new
7180 * kind of blocking operations in the future.
7182 * Still it's important to note that list blocking operations can be already
7183 * used as a notification mechanism in order to implement other blocking
7184 * operations at application level, so there must be a very strong evidence
7185 * of usefulness and generality before new blocking operations are implemented.
7187 * This is how the current blocking POP works, we use BLPOP as example:
7188 * - If the user calls BLPOP and the key exists and contains a non empty list
7189 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7190 * if there is not to block.
7191 * - If instead BLPOP is called and the key does not exists or the list is
7192 * empty we need to block. In order to do so we remove the notification for
7193 * new data to read in the client socket (so that we'll not serve new
7194 * requests if the blocking request is not served). Also we put the client
7195 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7196 * blocking for this keys.
7197 * - If a PUSH operation against a key with blocked clients waiting is
7198 * performed, we serve the first in the list: basically instead to push
7199 * the new element inside the list we return it to the (first / oldest)
7200 * blocking client, unblock the client, and remove it form the list.
7202 * The above comment and the source code should be enough in order to understand
7203 * the implementation and modify / fix it later.
7206 /* Set a client in blocking mode for the specified key, with the specified
7208 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7213 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7214 c
->blockingkeysnum
= numkeys
;
7215 c
->blockingto
= timeout
;
7216 for (j
= 0; j
< numkeys
; j
++) {
7217 /* Add the key in the client structure, to map clients -> keys */
7218 c
->blockingkeys
[j
] = keys
[j
];
7219 incrRefCount(keys
[j
]);
7221 /* And in the other "side", to map keys -> clients */
7222 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7226 /* For every key we take a list of clients blocked for it */
7228 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7229 incrRefCount(keys
[j
]);
7230 assert(retval
== DICT_OK
);
7232 l
= dictGetEntryVal(de
);
7234 listAddNodeTail(l
,c
);
7236 /* Mark the client as a blocked client */
7237 c
->flags
|= REDIS_BLOCKED
;
7238 server
.blpop_blocked_clients
++;
7241 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7242 static void unblockClientWaitingData(redisClient
*c
) {
7247 assert(c
->blockingkeys
!= NULL
);
7248 /* The client may wait for multiple keys, so unblock it for every key. */
7249 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7250 /* Remove this client from the list of clients waiting for this key. */
7251 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7253 l
= dictGetEntryVal(de
);
7254 listDelNode(l
,listSearchKey(l
,c
));
7255 /* If the list is empty we need to remove it to avoid wasting memory */
7256 if (listLength(l
) == 0)
7257 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7258 decrRefCount(c
->blockingkeys
[j
]);
7260 /* Cleanup the client structure */
7261 zfree(c
->blockingkeys
);
7262 c
->blockingkeys
= NULL
;
7263 c
->flags
&= (~REDIS_BLOCKED
);
7264 server
.blpop_blocked_clients
--;
7265 /* We want to process data if there is some command waiting
7266 * in the input buffer. Note that this is safe even if
7267 * unblockClientWaitingData() gets called from freeClient() because
7268 * freeClient() will be smart enough to call this function
7269 * *after* c->querybuf was set to NULL. */
7270 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7273 /* This should be called from any function PUSHing into lists.
7274 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7275 * 'ele' is the element pushed.
7277 * If the function returns 0 there was no client waiting for a list push
7280 * If the function returns 1 there was a client waiting for a list push
7281 * against this key, the element was passed to this client thus it's not
7282 * needed to actually add it to the list and the caller should return asap. */
7283 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7284 struct dictEntry
*de
;
7285 redisClient
*receiver
;
7289 de
= dictFind(c
->db
->blockingkeys
,key
);
7290 if (de
== NULL
) return 0;
7291 l
= dictGetEntryVal(de
);
7294 receiver
= ln
->value
;
7296 addReplySds(receiver
,sdsnew("*2\r\n"));
7297 addReplyBulk(receiver
,key
);
7298 addReplyBulk(receiver
,ele
);
7299 unblockClientWaitingData(receiver
);
7303 /* Blocking RPOP/LPOP */
7304 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7309 for (j
= 1; j
< c
->argc
-1; j
++) {
7310 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7312 if (o
->type
!= REDIS_LIST
) {
7313 addReply(c
,shared
.wrongtypeerr
);
7316 list
*list
= o
->ptr
;
7317 if (listLength(list
) != 0) {
7318 /* If the list contains elements fall back to the usual
7319 * non-blocking POP operation */
7320 robj
*argv
[2], **orig_argv
;
7323 /* We need to alter the command arguments before to call
7324 * popGenericCommand() as the command takes a single key. */
7325 orig_argv
= c
->argv
;
7326 orig_argc
= c
->argc
;
7327 argv
[1] = c
->argv
[j
];
7331 /* Also the return value is different, we need to output
7332 * the multi bulk reply header and the key name. The
7333 * "real" command will add the last element (the value)
7334 * for us. If this souds like an hack to you it's just
7335 * because it is... */
7336 addReplySds(c
,sdsnew("*2\r\n"));
7337 addReplyBulk(c
,argv
[1]);
7338 popGenericCommand(c
,where
);
7340 /* Fix the client structure with the original stuff */
7341 c
->argv
= orig_argv
;
7342 c
->argc
= orig_argc
;
7348 /* If the list is empty or the key does not exists we must block */
7349 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7350 if (timeout
> 0) timeout
+= time(NULL
);
7351 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7354 static void blpopCommand(redisClient
*c
) {
7355 blockingPopGenericCommand(c
,REDIS_HEAD
);
7358 static void brpopCommand(redisClient
*c
) {
7359 blockingPopGenericCommand(c
,REDIS_TAIL
);
7362 /* =============================== Replication ============================= */
7364 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7365 ssize_t nwritten
, ret
= size
;
7366 time_t start
= time(NULL
);
7370 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7371 nwritten
= write(fd
,ptr
,size
);
7372 if (nwritten
== -1) return -1;
7376 if ((time(NULL
)-start
) > timeout
) {
7384 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7385 ssize_t nread
, totread
= 0;
7386 time_t start
= time(NULL
);
7390 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7391 nread
= read(fd
,ptr
,size
);
7392 if (nread
== -1) return -1;
7397 if ((time(NULL
)-start
) > timeout
) {
7405 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7412 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7415 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7426 static void syncCommand(redisClient
*c
) {
7427 /* ignore SYNC if aleady slave or in monitor mode */
7428 if (c
->flags
& REDIS_SLAVE
) return;
7430 /* SYNC can't be issued when the server has pending data to send to
7431 * the client about already issued commands. We need a fresh reply
7432 * buffer registering the differences between the BGSAVE and the current
7433 * dataset, so that we can copy to other slaves if needed. */
7434 if (listLength(c
->reply
) != 0) {
7435 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7439 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7440 /* Here we need to check if there is a background saving operation
7441 * in progress, or if it is required to start one */
7442 if (server
.bgsavechildpid
!= -1) {
7443 /* Ok a background save is in progress. Let's check if it is a good
7444 * one for replication, i.e. if there is another slave that is
7445 * registering differences since the server forked to save */
7450 listRewind(server
.slaves
,&li
);
7451 while((ln
= listNext(&li
))) {
7453 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7456 /* Perfect, the server is already registering differences for
7457 * another slave. Set the right state, and copy the buffer. */
7458 listRelease(c
->reply
);
7459 c
->reply
= listDup(slave
->reply
);
7460 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7461 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7463 /* No way, we need to wait for the next BGSAVE in order to
7464 * register differences */
7465 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7466 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7469 /* Ok we don't have a BGSAVE in progress, let's start one */
7470 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7471 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7472 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7473 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7476 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7479 c
->flags
|= REDIS_SLAVE
;
7481 listAddNodeTail(server
.slaves
,c
);
7485 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7486 redisClient
*slave
= privdata
;
7488 REDIS_NOTUSED(mask
);
7489 char buf
[REDIS_IOBUF_LEN
];
7490 ssize_t nwritten
, buflen
;
7492 if (slave
->repldboff
== 0) {
7493 /* Write the bulk write count before to transfer the DB. In theory here
7494 * we don't know how much room there is in the output buffer of the
7495 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7496 * operations) will never be smaller than the few bytes we need. */
7499 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7501 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7509 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7510 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7512 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7513 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7517 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7518 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7523 slave
->repldboff
+= nwritten
;
7524 if (slave
->repldboff
== slave
->repldbsize
) {
7525 close(slave
->repldbfd
);
7526 slave
->repldbfd
= -1;
7527 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7528 slave
->replstate
= REDIS_REPL_ONLINE
;
7529 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7530 sendReplyToClient
, slave
) == AE_ERR
) {
7534 addReplySds(slave
,sdsempty());
7535 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7539 /* This function is called at the end of every backgrond saving.
7540 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7541 * otherwise REDIS_ERR is passed to the function.
7543 * The goal of this function is to handle slaves waiting for a successful
7544 * background saving in order to perform non-blocking synchronization. */
7545 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7547 int startbgsave
= 0;
7550 listRewind(server
.slaves
,&li
);
7551 while((ln
= listNext(&li
))) {
7552 redisClient
*slave
= ln
->value
;
7554 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7556 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7557 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7558 struct redis_stat buf
;
7560 if (bgsaveerr
!= REDIS_OK
) {
7562 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7565 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7566 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7568 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7571 slave
->repldboff
= 0;
7572 slave
->repldbsize
= buf
.st_size
;
7573 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7574 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7575 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7582 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7585 listRewind(server
.slaves
,&li
);
7586 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7587 while((ln
= listNext(&li
))) {
7588 redisClient
*slave
= ln
->value
;
7590 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7597 static int syncWithMaster(void) {
7598 char buf
[1024], tmpfile
[256], authcmd
[1024];
7600 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7601 int dfd
, maxtries
= 5;
7604 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7609 /* AUTH with the master if required. */
7610 if(server
.masterauth
) {
7611 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7612 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7614 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7618 /* Read the AUTH result. */
7619 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7621 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7625 if (buf
[0] != '+') {
7627 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7632 /* Issue the SYNC command */
7633 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7635 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7639 /* Read the bulk write count */
7640 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7642 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7646 if (buf
[0] != '$') {
7648 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7651 dumpsize
= strtol(buf
+1,NULL
,10);
7652 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7653 /* Read the bulk write data on a temp file */
7655 snprintf(tmpfile
,256,
7656 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7657 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7658 if (dfd
!= -1) break;
7663 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7667 int nread
, nwritten
;
7669 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7671 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7677 nwritten
= write(dfd
,buf
,nread
);
7678 if (nwritten
== -1) {
7679 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7687 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7688 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7694 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7695 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7699 server
.master
= createClient(fd
);
7700 server
.master
->flags
|= REDIS_MASTER
;
7701 server
.master
->authenticated
= 1;
7702 server
.replstate
= REDIS_REPL_CONNECTED
;
7706 static void slaveofCommand(redisClient
*c
) {
7707 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7708 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7709 if (server
.masterhost
) {
7710 sdsfree(server
.masterhost
);
7711 server
.masterhost
= NULL
;
7712 if (server
.master
) freeClient(server
.master
);
7713 server
.replstate
= REDIS_REPL_NONE
;
7714 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7717 sdsfree(server
.masterhost
);
7718 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7719 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7720 if (server
.master
) freeClient(server
.master
);
7721 server
.replstate
= REDIS_REPL_CONNECT
;
7722 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7723 server
.masterhost
, server
.masterport
);
7725 addReply(c
,shared
.ok
);
7728 /* ============================ Maxmemory directive ======================== */
7730 /* Try to free one object form the pre-allocated objects free list.
7731 * This is useful under low mem conditions as by default we take 1 million
7732 * free objects allocated. On success REDIS_OK is returned, otherwise
7734 static int tryFreeOneObjectFromFreelist(void) {
7737 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7738 if (listLength(server
.objfreelist
)) {
7739 listNode
*head
= listFirst(server
.objfreelist
);
7740 o
= listNodeValue(head
);
7741 listDelNode(server
.objfreelist
,head
);
7742 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7746 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7751 /* This function gets called when 'maxmemory' is set on the config file to limit
7752 * the max memory used by the server, and we are out of memory.
7753 * This function will try to, in order:
7755 * - Free objects from the free list
7756 * - Try to remove keys with an EXPIRE set
7758 * It is not possible to free enough memory to reach used-memory < maxmemory
7759 * the server will start refusing commands that will enlarge even more the
7762 static void freeMemoryIfNeeded(void) {
7763 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7764 int j
, k
, freed
= 0;
7766 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7767 for (j
= 0; j
< server
.dbnum
; j
++) {
7769 robj
*minkey
= NULL
;
7770 struct dictEntry
*de
;
7772 if (dictSize(server
.db
[j
].expires
)) {
7774 /* From a sample of three keys drop the one nearest to
7775 * the natural expire */
7776 for (k
= 0; k
< 3; k
++) {
7779 de
= dictGetRandomKey(server
.db
[j
].expires
);
7780 t
= (time_t) dictGetEntryVal(de
);
7781 if (minttl
== -1 || t
< minttl
) {
7782 minkey
= dictGetEntryKey(de
);
7786 deleteKey(server
.db
+j
,minkey
);
7789 if (!freed
) return; /* nothing to free... */
7793 /* ============================== Append Only file ========================== */
7795 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7796 sds buf
= sdsempty();
7802 /* The DB this command was targetting is not the same as the last command
7803 * we appendend. To issue a SELECT command is needed. */
7804 if (dictid
!= server
.appendseldb
) {
7807 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7808 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7809 (unsigned long)strlen(seldb
),seldb
);
7810 server
.appendseldb
= dictid
;
7813 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7814 * EXPIREs into EXPIREATs calls */
7815 if (cmd
->proc
== expireCommand
) {
7818 tmpargv
[0] = createStringObject("EXPIREAT",8);
7819 tmpargv
[1] = argv
[1];
7820 incrRefCount(argv
[1]);
7821 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7822 tmpargv
[2] = createObject(REDIS_STRING
,
7823 sdscatprintf(sdsempty(),"%ld",when
));
7827 /* Append the actual command */
7828 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7829 for (j
= 0; j
< argc
; j
++) {
7832 o
= getDecodedObject(o
);
7833 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7834 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7835 buf
= sdscatlen(buf
,"\r\n",2);
7839 /* Free the objects from the modified argv for EXPIREAT */
7840 if (cmd
->proc
== expireCommand
) {
7841 for (j
= 0; j
< 3; j
++)
7842 decrRefCount(argv
[j
]);
7845 /* We want to perform a single write. This should be guaranteed atomic
7846 * at least if the filesystem we are writing is a real physical one.
7847 * While this will save us against the server being killed I don't think
7848 * there is much to do about the whole server stopping for power problems
7850 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7851 if (nwritten
!= (signed)sdslen(buf
)) {
7852 /* Ooops, we are in troubles. The best thing to do for now is
7853 * to simply exit instead to give the illusion that everything is
7854 * working as expected. */
7855 if (nwritten
== -1) {
7856 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7858 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7862 /* If a background append only file rewriting is in progress we want to
7863 * accumulate the differences between the child DB and the current one
7864 * in a buffer, so that when the child process will do its work we
7865 * can append the differences to the new append only file. */
7866 if (server
.bgrewritechildpid
!= -1)
7867 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7871 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7872 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7873 now
-server
.lastfsync
> 1))
7875 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7876 server
.lastfsync
= now
;
7880 /* In Redis commands are always executed in the context of a client, so in
7881 * order to load the append only file we need to create a fake client. */
7882 static struct redisClient
*createFakeClient(void) {
7883 struct redisClient
*c
= zmalloc(sizeof(*c
));
7887 c
->querybuf
= sdsempty();
7891 /* We set the fake client as a slave waiting for the synchronization
7892 * so that Redis will not try to send replies to this client. */
7893 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7894 c
->reply
= listCreate();
7895 listSetFreeMethod(c
->reply
,decrRefCount
);
7896 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7900 static void freeFakeClient(struct redisClient
*c
) {
7901 sdsfree(c
->querybuf
);
7902 listRelease(c
->reply
);
7906 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7907 * error (the append only file is zero-length) REDIS_ERR is returned. On
7908 * fatal error an error message is logged and the program exists. */
7909 int loadAppendOnlyFile(char *filename
) {
7910 struct redisClient
*fakeClient
;
7911 FILE *fp
= fopen(filename
,"r");
7912 struct redis_stat sb
;
7913 unsigned long long loadedkeys
= 0;
7915 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7919 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7923 fakeClient
= createFakeClient();
7930 struct redisCommand
*cmd
;
7932 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7938 if (buf
[0] != '*') goto fmterr
;
7940 argv
= zmalloc(sizeof(robj
*)*argc
);
7941 for (j
= 0; j
< argc
; j
++) {
7942 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7943 if (buf
[0] != '$') goto fmterr
;
7944 len
= strtol(buf
+1,NULL
,10);
7945 argsds
= sdsnewlen(NULL
,len
);
7946 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7947 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7948 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7951 /* Command lookup */
7952 cmd
= lookupCommand(argv
[0]->ptr
);
7954 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7957 /* Try object encoding */
7958 if (cmd
->flags
& REDIS_CMD_BULK
)
7959 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
7960 /* Run the command in the context of a fake client */
7961 fakeClient
->argc
= argc
;
7962 fakeClient
->argv
= argv
;
7963 cmd
->proc(fakeClient
);
7964 /* Discard the reply objects list from the fake client */
7965 while(listLength(fakeClient
->reply
))
7966 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7967 /* Clean up, ready for the next command */
7968 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7970 /* Handle swapping while loading big datasets when VM is on */
7972 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7973 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7974 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7979 freeFakeClient(fakeClient
);
7984 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7986 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7990 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7994 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7995 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7999 /* Avoid the incr/decr ref count business if possible to help
8000 * copy-on-write (we are often in a child process when this function
8002 * Also makes sure that key objects don't get incrRefCount-ed when VM
8004 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8005 obj
= getDecodedObject(obj
);
8008 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8009 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8010 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8012 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8013 if (decrrc
) decrRefCount(obj
);
8016 if (decrrc
) decrRefCount(obj
);
8020 /* Write binary-safe string into a file in the bulkformat
8021 * $<count>\r\n<payload>\r\n */
8022 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8025 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8026 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8027 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8028 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8032 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8033 static int fwriteBulkDouble(FILE *fp
, double d
) {
8034 char buf
[128], dbuf
[128];
8036 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8037 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8038 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8039 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8043 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8044 static int fwriteBulkLong(FILE *fp
, long l
) {
8045 char buf
[128], lbuf
[128];
8047 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8048 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8049 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8050 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8054 /* Write a sequence of commands able to fully rebuild the dataset into
8055 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8056 static int rewriteAppendOnlyFile(char *filename
) {
8057 dictIterator
*di
= NULL
;
8062 time_t now
= time(NULL
);
8064 /* Note that we have to use a different temp name here compared to the
8065 * one used by rewriteAppendOnlyFileBackground() function. */
8066 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8067 fp
= fopen(tmpfile
,"w");
8069 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8072 for (j
= 0; j
< server
.dbnum
; j
++) {
8073 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8074 redisDb
*db
= server
.db
+j
;
8076 if (dictSize(d
) == 0) continue;
8077 di
= dictGetIterator(d
);
8083 /* SELECT the new DB */
8084 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8085 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8087 /* Iterate this DB writing every entry */
8088 while((de
= dictNext(di
)) != NULL
) {
8093 key
= dictGetEntryKey(de
);
8094 /* If the value for this key is swapped, load a preview in memory.
8095 * We use a "swapped" flag to remember if we need to free the
8096 * value object instead to just increment the ref count anyway
8097 * in order to avoid copy-on-write of pages if we are forked() */
8098 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8099 key
->storage
== REDIS_VM_SWAPPING
) {
8100 o
= dictGetEntryVal(de
);
8103 o
= vmPreviewObject(key
);
8106 expiretime
= getExpire(db
,key
);
8108 /* Save the key and associated value */
8109 if (o
->type
== REDIS_STRING
) {
8110 /* Emit a SET command */
8111 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8112 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8114 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8115 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8116 } else if (o
->type
== REDIS_LIST
) {
8117 /* Emit the RPUSHes needed to rebuild the list */
8118 list
*list
= o
->ptr
;
8122 listRewind(list
,&li
);
8123 while((ln
= listNext(&li
))) {
8124 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8125 robj
*eleobj
= listNodeValue(ln
);
8127 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8128 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8129 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8131 } else if (o
->type
== REDIS_SET
) {
8132 /* Emit the SADDs needed to rebuild the set */
8134 dictIterator
*di
= dictGetIterator(set
);
8137 while((de
= dictNext(di
)) != NULL
) {
8138 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8139 robj
*eleobj
= dictGetEntryKey(de
);
8141 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8142 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8143 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8145 dictReleaseIterator(di
);
8146 } else if (o
->type
== REDIS_ZSET
) {
8147 /* Emit the ZADDs needed to rebuild the sorted set */
8149 dictIterator
*di
= dictGetIterator(zs
->dict
);
8152 while((de
= dictNext(di
)) != NULL
) {
8153 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8154 robj
*eleobj
= dictGetEntryKey(de
);
8155 double *score
= dictGetEntryVal(de
);
8157 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8158 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8159 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8160 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8162 dictReleaseIterator(di
);
8163 } else if (o
->type
== REDIS_HASH
) {
8164 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8166 /* Emit the HSETs needed to rebuild the hash */
8167 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8168 unsigned char *p
= zipmapRewind(o
->ptr
);
8169 unsigned char *field
, *val
;
8170 unsigned int flen
, vlen
;
8172 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8173 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8174 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8175 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8177 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8181 dictIterator
*di
= dictGetIterator(o
->ptr
);
8184 while((de
= dictNext(di
)) != NULL
) {
8185 robj
*field
= dictGetEntryKey(de
);
8186 robj
*val
= dictGetEntryVal(de
);
8188 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8189 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8190 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8191 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8193 dictReleaseIterator(di
);
8198 /* Save the expire time */
8199 if (expiretime
!= -1) {
8200 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8201 /* If this key is already expired skip it */
8202 if (expiretime
< now
) continue;
8203 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8204 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8205 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8207 if (swapped
) decrRefCount(o
);
8209 dictReleaseIterator(di
);
8212 /* Make sure data will not remain on the OS's output buffers */
8217 /* Use RENAME to make sure the DB file is changed atomically only
8218 * if the generate DB file is ok. */
8219 if (rename(tmpfile
,filename
) == -1) {
8220 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8224 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8230 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8231 if (di
) dictReleaseIterator(di
);
8235 /* This is how rewriting of the append only file in background works:
8237 * 1) The user calls BGREWRITEAOF
8238 * 2) Redis calls this function, that forks():
8239 * 2a) the child rewrite the append only file in a temp file.
8240 * 2b) the parent accumulates differences in server.bgrewritebuf.
8241 * 3) When the child finished '2a' exists.
8242 * 4) The parent will trap the exit code, if it's OK, will append the
8243 * data accumulated into server.bgrewritebuf into the temp file, and
8244 * finally will rename(2) the temp file in the actual file name.
8245 * The the new file is reopened as the new append only file. Profit!
8247 static int rewriteAppendOnlyFileBackground(void) {
8250 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8251 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8252 if ((childpid
= fork()) == 0) {
8256 if (server
.vm_enabled
) vmReopenSwapFile();
8258 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8259 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8266 if (childpid
== -1) {
8267 redisLog(REDIS_WARNING
,
8268 "Can't rewrite append only file in background: fork: %s",
8272 redisLog(REDIS_NOTICE
,
8273 "Background append only file rewriting started by pid %d",childpid
);
8274 server
.bgrewritechildpid
= childpid
;
8275 updateDictResizePolicy();
8276 /* We set appendseldb to -1 in order to force the next call to the
8277 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8278 * accumulated by the parent into server.bgrewritebuf will start
8279 * with a SELECT statement and it will be safe to merge. */
8280 server
.appendseldb
= -1;
8283 return REDIS_OK
; /* unreached */
8286 static void bgrewriteaofCommand(redisClient
*c
) {
8287 if (server
.bgrewritechildpid
!= -1) {
8288 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8291 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8292 char *status
= "+Background append only file rewriting started\r\n";
8293 addReplySds(c
,sdsnew(status
));
8295 addReply(c
,shared
.err
);
8299 static void aofRemoveTempFile(pid_t childpid
) {
8302 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8306 /* Virtual Memory is composed mainly of two subsystems:
8307 * - Blocking Virutal Memory
8308 * - Threaded Virtual Memory I/O
8309 * The two parts are not fully decoupled, but functions are split among two
8310 * different sections of the source code (delimited by comments) in order to
8311 * make more clear what functionality is about the blocking VM and what about
8312 * the threaded (not blocking) VM.
8316 * Redis VM is a blocking VM (one that blocks reading swapped values from
8317 * disk into memory when a value swapped out is needed in memory) that is made
8318 * unblocking by trying to examine the command argument vector in order to
8319 * load in background values that will likely be needed in order to exec
8320 * the command. The command is executed only once all the relevant keys
8321 * are loaded into memory.
8323 * This basically is almost as simple of a blocking VM, but almost as parallel
8324 * as a fully non-blocking VM.
8327 /* =================== Virtual Memory - Blocking Side ====================== */
8329 /* substitute the first occurrence of '%p' with the process pid in the
8330 * swap file name. */
8331 static void expandVmSwapFilename(void) {
8332 char *p
= strstr(server
.vm_swap_file
,"%p");
8338 new = sdscat(new,server
.vm_swap_file
);
8339 new = sdscatprintf(new,"%ld",(long) getpid());
8340 new = sdscat(new,p
+2);
8341 zfree(server
.vm_swap_file
);
8342 server
.vm_swap_file
= new;
8345 static void vmInit(void) {
8350 if (server
.vm_max_threads
!= 0)
8351 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8353 expandVmSwapFilename();
8354 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8355 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8356 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8358 if (server
.vm_fp
== NULL
) {
8359 redisLog(REDIS_WARNING
,
8360 "Impossible to open the swap file: %s. Exiting.",
8364 server
.vm_fd
= fileno(server
.vm_fp
);
8365 server
.vm_next_page
= 0;
8366 server
.vm_near_pages
= 0;
8367 server
.vm_stats_used_pages
= 0;
8368 server
.vm_stats_swapped_objects
= 0;
8369 server
.vm_stats_swapouts
= 0;
8370 server
.vm_stats_swapins
= 0;
8371 totsize
= server
.vm_pages
*server
.vm_page_size
;
8372 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8373 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8374 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8378 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8380 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8381 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8382 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8383 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8385 /* Initialize threaded I/O (used by Virtual Memory) */
8386 server
.io_newjobs
= listCreate();
8387 server
.io_processing
= listCreate();
8388 server
.io_processed
= listCreate();
8389 server
.io_ready_clients
= listCreate();
8390 pthread_mutex_init(&server
.io_mutex
,NULL
);
8391 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8392 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8393 server
.io_active_threads
= 0;
8394 if (pipe(pipefds
) == -1) {
8395 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8399 server
.io_ready_pipe_read
= pipefds
[0];
8400 server
.io_ready_pipe_write
= pipefds
[1];
8401 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8402 /* LZF requires a lot of stack */
8403 pthread_attr_init(&server
.io_threads_attr
);
8404 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8405 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8406 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8407 /* Listen for events in the threaded I/O pipe */
8408 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8409 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8410 oom("creating file event");
8413 /* Mark the page as used */
8414 static void vmMarkPageUsed(off_t page
) {
8415 off_t byte
= page
/8;
8417 redisAssert(vmFreePage(page
) == 1);
8418 server
.vm_bitmap
[byte
] |= 1<<bit
;
8421 /* Mark N contiguous pages as used, with 'page' being the first. */
8422 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8425 for (j
= 0; j
< count
; j
++)
8426 vmMarkPageUsed(page
+j
);
8427 server
.vm_stats_used_pages
+= count
;
8428 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8429 (long long)count
, (long long)page
);
8432 /* Mark the page as free */
8433 static void vmMarkPageFree(off_t page
) {
8434 off_t byte
= page
/8;
8436 redisAssert(vmFreePage(page
) == 0);
8437 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8440 /* Mark N contiguous pages as free, with 'page' being the first. */
8441 static void vmMarkPagesFree(off_t page
, off_t count
) {
8444 for (j
= 0; j
< count
; j
++)
8445 vmMarkPageFree(page
+j
);
8446 server
.vm_stats_used_pages
-= count
;
8447 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8448 (long long)count
, (long long)page
);
8451 /* Test if the page is free */
8452 static int vmFreePage(off_t page
) {
8453 off_t byte
= page
/8;
8455 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8458 /* Find N contiguous free pages storing the first page of the cluster in *first.
8459 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8460 * REDIS_ERR is returned.
8462 * This function uses a simple algorithm: we try to allocate
8463 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8464 * again from the start of the swap file searching for free spaces.
8466 * If it looks pretty clear that there are no free pages near our offset
8467 * we try to find less populated places doing a forward jump of
8468 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8469 * without hurry, and then we jump again and so forth...
8471 * This function can be improved using a free list to avoid to guess
8472 * too much, since we could collect data about freed pages.
8474 * note: I implemented this function just after watching an episode of
8475 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8477 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8478 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8480 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8481 server
.vm_near_pages
= 0;
8482 server
.vm_next_page
= 0;
8484 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8485 base
= server
.vm_next_page
;
8487 while(offset
< server
.vm_pages
) {
8488 off_t
this = base
+offset
;
8490 /* If we overflow, restart from page zero */
8491 if (this >= server
.vm_pages
) {
8492 this -= server
.vm_pages
;
8494 /* Just overflowed, what we found on tail is no longer
8495 * interesting, as it's no longer contiguous. */
8499 if (vmFreePage(this)) {
8500 /* This is a free page */
8502 /* Already got N free pages? Return to the caller, with success */
8504 *first
= this-(n
-1);
8505 server
.vm_next_page
= this+1;
8506 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8510 /* The current one is not a free page */
8514 /* Fast-forward if the current page is not free and we already
8515 * searched enough near this place. */
8517 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8518 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8520 /* Note that even if we rewind after the jump, we are don't need
8521 * to make sure numfree is set to zero as we only jump *if* it
8522 * is set to zero. */
8524 /* Otherwise just check the next page */
8531 /* Write the specified object at the specified page of the swap file */
8532 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8533 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8534 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8535 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8536 redisLog(REDIS_WARNING
,
8537 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8541 rdbSaveObject(server
.vm_fp
,o
);
8542 fflush(server
.vm_fp
);
8543 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8547 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8548 * needed to later retrieve the object into the key object.
8549 * If we can't find enough contiguous empty pages to swap the object on disk
8550 * REDIS_ERR is returned. */
8551 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8552 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8555 assert(key
->storage
== REDIS_VM_MEMORY
);
8556 assert(key
->refcount
== 1);
8557 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8558 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8559 key
->vm
.page
= page
;
8560 key
->vm
.usedpages
= pages
;
8561 key
->storage
= REDIS_VM_SWAPPED
;
8562 key
->vtype
= val
->type
;
8563 decrRefCount(val
); /* Deallocate the object from memory. */
8564 vmMarkPagesUsed(page
,pages
);
8565 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8566 (unsigned char*) key
->ptr
,
8567 (unsigned long long) page
, (unsigned long long) pages
);
8568 server
.vm_stats_swapped_objects
++;
8569 server
.vm_stats_swapouts
++;
8573 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8576 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8577 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8578 redisLog(REDIS_WARNING
,
8579 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8583 o
= rdbLoadObject(type
,server
.vm_fp
);
8585 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8588 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8592 /* Load the value object relative to the 'key' object from swap to memory.
8593 * The newly allocated object is returned.
8595 * If preview is true the unserialized object is returned to the caller but
8596 * no changes are made to the key object, nor the pages are marked as freed */
8597 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8600 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8601 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8603 key
->storage
= REDIS_VM_MEMORY
;
8604 key
->vm
.atime
= server
.unixtime
;
8605 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8606 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8607 (unsigned char*) key
->ptr
);
8608 server
.vm_stats_swapped_objects
--;
8610 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8611 (unsigned char*) key
->ptr
);
8613 server
.vm_stats_swapins
++;
8617 /* Plain object loading, from swap to memory */
8618 static robj
*vmLoadObject(robj
*key
) {
8619 /* If we are loading the object in background, stop it, we
8620 * need to load this object synchronously ASAP. */
8621 if (key
->storage
== REDIS_VM_LOADING
)
8622 vmCancelThreadedIOJob(key
);
8623 return vmGenericLoadObject(key
,0);
8626 /* Just load the value on disk, without to modify the key.
8627 * This is useful when we want to perform some operation on the value
8628 * without to really bring it from swap to memory, like while saving the
8629 * dataset or rewriting the append only log. */
8630 static robj
*vmPreviewObject(robj
*key
) {
8631 return vmGenericLoadObject(key
,1);
8634 /* How a good candidate is this object for swapping?
8635 * The better candidate it is, the greater the returned value.
8637 * Currently we try to perform a fast estimation of the object size in
8638 * memory, and combine it with aging informations.
8640 * Basically swappability = idle-time * log(estimated size)
8642 * Bigger objects are preferred over smaller objects, but not
8643 * proportionally, this is why we use the logarithm. This algorithm is
8644 * just a first try and will probably be tuned later. */
8645 static double computeObjectSwappability(robj
*o
) {
8646 time_t age
= server
.unixtime
- o
->vm
.atime
;
8650 struct dictEntry
*de
;
8653 if (age
<= 0) return 0;
8656 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8659 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8664 listNode
*ln
= listFirst(l
);
8666 asize
= sizeof(list
);
8668 robj
*ele
= ln
->value
;
8671 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8672 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8674 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8679 z
= (o
->type
== REDIS_ZSET
);
8680 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8682 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8683 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8688 de
= dictGetRandomKey(d
);
8689 ele
= dictGetEntryKey(de
);
8690 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8691 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8693 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8694 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8698 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8699 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8700 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8701 unsigned int klen
, vlen
;
8702 unsigned char *key
, *val
;
8704 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8708 asize
= len
*(klen
+vlen
+3);
8709 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8711 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8716 de
= dictGetRandomKey(d
);
8717 ele
= dictGetEntryKey(de
);
8718 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8719 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8721 ele
= dictGetEntryVal(de
);
8722 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8723 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8725 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8730 return (double)age
*log(1+asize
);
8733 /* Try to swap an object that's a good candidate for swapping.
8734 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8735 * to swap any object at all.
8737 * If 'usethreaded' is true, Redis will try to swap the object in background
8738 * using I/O threads. */
8739 static int vmSwapOneObject(int usethreads
) {
8741 struct dictEntry
*best
= NULL
;
8742 double best_swappability
= 0;
8743 redisDb
*best_db
= NULL
;
8746 for (j
= 0; j
< server
.dbnum
; j
++) {
8747 redisDb
*db
= server
.db
+j
;
8748 /* Why maxtries is set to 100?
8749 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8750 * are swappable objects */
8753 if (dictSize(db
->dict
) == 0) continue;
8754 for (i
= 0; i
< 5; i
++) {
8756 double swappability
;
8758 if (maxtries
) maxtries
--;
8759 de
= dictGetRandomKey(db
->dict
);
8760 key
= dictGetEntryKey(de
);
8761 val
= dictGetEntryVal(de
);
8762 /* Only swap objects that are currently in memory.
8764 * Also don't swap shared objects if threaded VM is on, as we
8765 * try to ensure that the main thread does not touch the
8766 * object while the I/O thread is using it, but we can't
8767 * control other keys without adding additional mutex. */
8768 if (key
->storage
!= REDIS_VM_MEMORY
||
8769 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8770 if (maxtries
) i
--; /* don't count this try */
8773 swappability
= computeObjectSwappability(val
);
8774 if (!best
|| swappability
> best_swappability
) {
8776 best_swappability
= swappability
;
8781 if (best
== NULL
) return REDIS_ERR
;
8782 key
= dictGetEntryKey(best
);
8783 val
= dictGetEntryVal(best
);
8785 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8786 key
->ptr
, best_swappability
);
8788 /* Unshare the key if needed */
8789 if (key
->refcount
> 1) {
8790 robj
*newkey
= dupStringObject(key
);
8792 key
= dictGetEntryKey(best
) = newkey
;
8796 vmSwapObjectThreaded(key
,val
,best_db
);
8799 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8800 dictGetEntryVal(best
) = NULL
;
8808 static int vmSwapOneObjectBlocking() {
8809 return vmSwapOneObject(0);
8812 static int vmSwapOneObjectThreaded() {
8813 return vmSwapOneObject(1);
8816 /* Return true if it's safe to swap out objects in a given moment.
8817 * Basically we don't want to swap objects out while there is a BGSAVE
8818 * or a BGAEOREWRITE running in backgroud. */
8819 static int vmCanSwapOut(void) {
8820 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8823 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8824 * and was deleted. Otherwise 0 is returned. */
8825 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8829 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8830 foundkey
= dictGetEntryKey(de
);
8831 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8836 /* =================== Virtual Memory - Threaded I/O ======================= */
8838 static void freeIOJob(iojob
*j
) {
8839 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8840 j
->type
== REDIS_IOJOB_DO_SWAP
||
8841 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8842 decrRefCount(j
->val
);
8843 /* We don't decrRefCount the j->key field as we did't incremented
8844 * the count creating IO Jobs. This is because the key field here is
8845 * just used as an indentifier and if a key is removed the Job should
8846 * never be touched again. */
8850 /* Every time a thread finished a Job, it writes a byte into the write side
8851 * of an unix pipe in order to "awake" the main thread, and this function
8853 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8857 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8859 REDIS_NOTUSED(mask
);
8860 REDIS_NOTUSED(privdata
);
8862 /* For every byte we read in the read side of the pipe, there is one
8863 * I/O job completed to process. */
8864 while((retval
= read(fd
,buf
,1)) == 1) {
8868 struct dictEntry
*de
;
8870 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8872 /* Get the processed element (the oldest one) */
8874 assert(listLength(server
.io_processed
) != 0);
8875 if (toprocess
== -1) {
8876 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8877 if (toprocess
<= 0) toprocess
= 1;
8879 ln
= listFirst(server
.io_processed
);
8881 listDelNode(server
.io_processed
,ln
);
8883 /* If this job is marked as canceled, just ignore it */
8888 /* Post process it in the main thread, as there are things we
8889 * can do just here to avoid race conditions and/or invasive locks */
8890 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8891 de
= dictFind(j
->db
->dict
,j
->key
);
8893 key
= dictGetEntryKey(de
);
8894 if (j
->type
== REDIS_IOJOB_LOAD
) {
8897 /* Key loaded, bring it at home */
8898 key
->storage
= REDIS_VM_MEMORY
;
8899 key
->vm
.atime
= server
.unixtime
;
8900 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8901 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8902 (unsigned char*) key
->ptr
);
8903 server
.vm_stats_swapped_objects
--;
8904 server
.vm_stats_swapins
++;
8905 dictGetEntryVal(de
) = j
->val
;
8906 incrRefCount(j
->val
);
8909 /* Handle clients waiting for this key to be loaded. */
8910 handleClientsBlockedOnSwappedKey(db
,key
);
8911 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8912 /* Now we know the amount of pages required to swap this object.
8913 * Let's find some space for it, and queue this task again
8914 * rebranded as REDIS_IOJOB_DO_SWAP. */
8915 if (!vmCanSwapOut() ||
8916 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8918 /* Ooops... no space or we can't swap as there is
8919 * a fork()ed Redis trying to save stuff on disk. */
8921 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8923 /* Note that we need to mark this pages as used now,
8924 * if the job will be canceled, we'll mark them as freed
8926 vmMarkPagesUsed(j
->page
,j
->pages
);
8927 j
->type
= REDIS_IOJOB_DO_SWAP
;
8932 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8935 /* Key swapped. We can finally free some memory. */
8936 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8937 printf("key->storage: %d\n",key
->storage
);
8938 printf("key->name: %s\n",(char*)key
->ptr
);
8939 printf("key->refcount: %d\n",key
->refcount
);
8940 printf("val: %p\n",(void*)j
->val
);
8941 printf("val->type: %d\n",j
->val
->type
);
8942 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8944 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8945 val
= dictGetEntryVal(de
);
8946 key
->vm
.page
= j
->page
;
8947 key
->vm
.usedpages
= j
->pages
;
8948 key
->storage
= REDIS_VM_SWAPPED
;
8949 key
->vtype
= j
->val
->type
;
8950 decrRefCount(val
); /* Deallocate the object from memory. */
8951 dictGetEntryVal(de
) = NULL
;
8952 redisLog(REDIS_DEBUG
,
8953 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8954 (unsigned char*) key
->ptr
,
8955 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8956 server
.vm_stats_swapped_objects
++;
8957 server
.vm_stats_swapouts
++;
8959 /* Put a few more swap requests in queue if we are still
8961 if (trytoswap
&& vmCanSwapOut() &&
8962 zmalloc_used_memory() > server
.vm_max_memory
)
8967 more
= listLength(server
.io_newjobs
) <
8968 (unsigned) server
.vm_max_threads
;
8970 /* Don't waste CPU time if swappable objects are rare. */
8971 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8979 if (processed
== toprocess
) return;
8981 if (retval
< 0 && errno
!= EAGAIN
) {
8982 redisLog(REDIS_WARNING
,
8983 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8988 static void lockThreadedIO(void) {
8989 pthread_mutex_lock(&server
.io_mutex
);
8992 static void unlockThreadedIO(void) {
8993 pthread_mutex_unlock(&server
.io_mutex
);
8996 /* Remove the specified object from the threaded I/O queue if still not
8997 * processed, otherwise make sure to flag it as canceled. */
8998 static void vmCancelThreadedIOJob(robj
*o
) {
9000 server
.io_newjobs
, /* 0 */
9001 server
.io_processing
, /* 1 */
9002 server
.io_processed
/* 2 */
9006 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9009 /* Search for a matching key in one of the queues */
9010 for (i
= 0; i
< 3; i
++) {
9014 listRewind(lists
[i
],&li
);
9015 while ((ln
= listNext(&li
)) != NULL
) {
9016 iojob
*job
= ln
->value
;
9018 if (job
->canceled
) continue; /* Skip this, already canceled. */
9019 if (job
->key
== o
) {
9020 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9021 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9022 /* Mark the pages as free since the swap didn't happened
9023 * or happened but is now discarded. */
9024 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9025 vmMarkPagesFree(job
->page
,job
->pages
);
9026 /* Cancel the job. It depends on the list the job is
9029 case 0: /* io_newjobs */
9030 /* If the job was yet not processed the best thing to do
9031 * is to remove it from the queue at all */
9033 listDelNode(lists
[i
],ln
);
9035 case 1: /* io_processing */
9036 /* Oh Shi- the thread is messing with the Job:
9038 * Probably it's accessing the object if this is a
9039 * PREPARE_SWAP or DO_SWAP job.
9040 * If it's a LOAD job it may be reading from disk and
9041 * if we don't wait for the job to terminate before to
9042 * cancel it, maybe in a few microseconds data can be
9043 * corrupted in this pages. So the short story is:
9045 * Better to wait for the job to move into the
9046 * next queue (processed)... */
9048 /* We try again and again until the job is completed. */
9050 /* But let's wait some time for the I/O thread
9051 * to finish with this job. After all this condition
9052 * should be very rare. */
9055 case 2: /* io_processed */
9056 /* The job was already processed, that's easy...
9057 * just mark it as canceled so that we'll ignore it
9058 * when processing completed jobs. */
9062 /* Finally we have to adjust the storage type of the object
9063 * in order to "UNDO" the operaiton. */
9064 if (o
->storage
== REDIS_VM_LOADING
)
9065 o
->storage
= REDIS_VM_SWAPPED
;
9066 else if (o
->storage
== REDIS_VM_SWAPPING
)
9067 o
->storage
= REDIS_VM_MEMORY
;
9074 assert(1 != 1); /* We should never reach this */
9077 static void *IOThreadEntryPoint(void *arg
) {
9082 pthread_detach(pthread_self());
9084 /* Get a new job to process */
9086 if (listLength(server
.io_newjobs
) == 0) {
9087 /* No new jobs in queue, exit. */
9088 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9089 (long) pthread_self());
9090 server
.io_active_threads
--;
9094 ln
= listFirst(server
.io_newjobs
);
9096 listDelNode(server
.io_newjobs
,ln
);
9097 /* Add the job in the processing queue */
9098 j
->thread
= pthread_self();
9099 listAddNodeTail(server
.io_processing
,j
);
9100 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9102 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9103 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9105 /* Process the Job */
9106 if (j
->type
== REDIS_IOJOB_LOAD
) {
9107 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9108 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9109 FILE *fp
= fopen("/dev/null","w+");
9110 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9112 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9113 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9117 /* Done: insert the job into the processed queue */
9118 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9119 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9121 listDelNode(server
.io_processing
,ln
);
9122 listAddNodeTail(server
.io_processed
,j
);
9125 /* Signal the main thread there is new stuff to process */
9126 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9128 return NULL
; /* never reached */
9131 static void spawnIOThread(void) {
9133 sigset_t mask
, omask
;
9137 sigaddset(&mask
,SIGCHLD
);
9138 sigaddset(&mask
,SIGHUP
);
9139 sigaddset(&mask
,SIGPIPE
);
9140 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9141 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9142 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9146 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9147 server
.io_active_threads
++;
9150 /* We need to wait for the last thread to exit before we are able to
9151 * fork() in order to BGSAVE or BGREWRITEAOF. */
9152 static void waitEmptyIOJobsQueue(void) {
9154 int io_processed_len
;
9157 if (listLength(server
.io_newjobs
) == 0 &&
9158 listLength(server
.io_processing
) == 0 &&
9159 server
.io_active_threads
== 0)
9164 /* While waiting for empty jobs queue condition we post-process some
9165 * finshed job, as I/O threads may be hanging trying to write against
9166 * the io_ready_pipe_write FD but there are so much pending jobs that
9168 io_processed_len
= listLength(server
.io_processed
);
9170 if (io_processed_len
) {
9171 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9172 usleep(1000); /* 1 millisecond */
9174 usleep(10000); /* 10 milliseconds */
9179 static void vmReopenSwapFile(void) {
9180 /* Note: we don't close the old one as we are in the child process
9181 * and don't want to mess at all with the original file object. */
9182 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9183 if (server
.vm_fp
== NULL
) {
9184 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9185 server
.vm_swap_file
);
9188 server
.vm_fd
= fileno(server
.vm_fp
);
9191 /* This function must be called while with threaded IO locked */
9192 static void queueIOJob(iojob
*j
) {
9193 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9194 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9195 listAddNodeTail(server
.io_newjobs
,j
);
9196 if (server
.io_active_threads
< server
.vm_max_threads
)
9200 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9203 assert(key
->storage
== REDIS_VM_MEMORY
);
9204 assert(key
->refcount
== 1);
9206 j
= zmalloc(sizeof(*j
));
9207 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9213 j
->thread
= (pthread_t
) -1;
9214 key
->storage
= REDIS_VM_SWAPPING
;
9222 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9224 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9225 * If there is not already a job loading the key, it is craeted.
9226 * The key is added to the io_keys list in the client structure, and also
9227 * in the hash table mapping swapped keys to waiting clients, that is,
9228 * server.io_waited_keys. */
9229 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9230 struct dictEntry
*de
;
9234 /* If the key does not exist or is already in RAM we don't need to
9235 * block the client at all. */
9236 de
= dictFind(c
->db
->dict
,key
);
9237 if (de
== NULL
) return 0;
9238 o
= dictGetEntryKey(de
);
9239 if (o
->storage
== REDIS_VM_MEMORY
) {
9241 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9242 /* We were swapping the key, undo it! */
9243 vmCancelThreadedIOJob(o
);
9247 /* OK: the key is either swapped, or being loaded just now. */
9249 /* Add the key to the list of keys this client is waiting for.
9250 * This maps clients to keys they are waiting for. */
9251 listAddNodeTail(c
->io_keys
,key
);
9254 /* Add the client to the swapped keys => clients waiting map. */
9255 de
= dictFind(c
->db
->io_keys
,key
);
9259 /* For every key we take a list of clients blocked for it */
9261 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9263 assert(retval
== DICT_OK
);
9265 l
= dictGetEntryVal(de
);
9267 listAddNodeTail(l
,c
);
9269 /* Are we already loading the key from disk? If not create a job */
9270 if (o
->storage
== REDIS_VM_SWAPPED
) {
9273 o
->storage
= REDIS_VM_LOADING
;
9274 j
= zmalloc(sizeof(*j
));
9275 j
->type
= REDIS_IOJOB_LOAD
;
9278 j
->key
->vtype
= o
->vtype
;
9279 j
->page
= o
->vm
.page
;
9282 j
->thread
= (pthread_t
) -1;
9290 /* Preload keys needed for the ZUNION and ZINTER commands. */
9291 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9293 num
= atoi(c
->argv
[2]->ptr
);
9294 for (i
= 0; i
< num
; i
++) {
9295 waitForSwappedKey(c
,c
->argv
[3+i
]);
9299 /* Is this client attempting to run a command against swapped keys?
9300 * If so, block it ASAP, load the keys in background, then resume it.
9302 * The important idea about this function is that it can fail! If keys will
9303 * still be swapped when the client is resumed, this key lookups will
9304 * just block loading keys from disk. In practical terms this should only
9305 * happen with SORT BY command or if there is a bug in this function.
9307 * Return 1 if the client is marked as blocked, 0 if the client can
9308 * continue as the keys it is going to access appear to be in memory. */
9309 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9312 if (cmd
->vm_preload_proc
!= NULL
) {
9313 cmd
->vm_preload_proc(c
);
9315 if (cmd
->vm_firstkey
== 0) return 0;
9316 last
= cmd
->vm_lastkey
;
9317 if (last
< 0) last
= c
->argc
+last
;
9318 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9319 waitForSwappedKey(c
,c
->argv
[j
]);
9322 /* If the client was blocked for at least one key, mark it as blocked. */
9323 if (listLength(c
->io_keys
)) {
9324 c
->flags
|= REDIS_IO_WAIT
;
9325 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9326 server
.vm_blocked_clients
++;
9333 /* Remove the 'key' from the list of blocked keys for a given client.
9335 * The function returns 1 when there are no longer blocking keys after
9336 * the current one was removed (and the client can be unblocked). */
9337 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9341 struct dictEntry
*de
;
9343 /* Remove the key from the list of keys this client is waiting for. */
9344 listRewind(c
->io_keys
,&li
);
9345 while ((ln
= listNext(&li
)) != NULL
) {
9346 if (compareStringObjects(ln
->value
,key
) == 0) {
9347 listDelNode(c
->io_keys
,ln
);
9353 /* Remove the client form the key => waiting clients map. */
9354 de
= dictFind(c
->db
->io_keys
,key
);
9356 l
= dictGetEntryVal(de
);
9357 ln
= listSearchKey(l
,c
);
9360 if (listLength(l
) == 0)
9361 dictDelete(c
->db
->io_keys
,key
);
9363 return listLength(c
->io_keys
) == 0;
9366 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9367 struct dictEntry
*de
;
9372 de
= dictFind(db
->io_keys
,key
);
9375 l
= dictGetEntryVal(de
);
9376 len
= listLength(l
);
9377 /* Note: we can't use something like while(listLength(l)) as the list
9378 * can be freed by the calling function when we remove the last element. */
9381 redisClient
*c
= ln
->value
;
9383 if (dontWaitForSwappedKey(c
,key
)) {
9384 /* Put the client in the list of clients ready to go as we
9385 * loaded all the keys about it. */
9386 listAddNodeTail(server
.io_ready_clients
,c
);
9391 /* =========================== Remote Configuration ========================= */
9393 static void configSetCommand(redisClient
*c
) {
9394 robj
*o
= getDecodedObject(c
->argv
[3]);
9395 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9396 zfree(server
.dbfilename
);
9397 server
.dbfilename
= zstrdup(o
->ptr
);
9398 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9399 zfree(server
.requirepass
);
9400 server
.requirepass
= zstrdup(o
->ptr
);
9401 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9402 zfree(server
.masterauth
);
9403 server
.masterauth
= zstrdup(o
->ptr
);
9404 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9405 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9407 addReplySds(c
,sdscatprintf(sdsempty(),
9408 "-ERR not supported CONFIG parameter %s\r\n",
9409 (char*)c
->argv
[2]->ptr
));
9414 addReply(c
,shared
.ok
);
9417 static void configGetCommand(redisClient
*c
) {
9418 robj
*o
= getDecodedObject(c
->argv
[2]);
9419 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9420 char *pattern
= o
->ptr
;
9424 decrRefCount(lenobj
);
9426 if (stringmatch(pattern
,"dbfilename",0)) {
9427 addReplyBulkCString(c
,"dbfilename");
9428 addReplyBulkCString(c
,server
.dbfilename
);
9431 if (stringmatch(pattern
,"requirepass",0)) {
9432 addReplyBulkCString(c
,"requirepass");
9433 addReplyBulkCString(c
,server
.requirepass
);
9436 if (stringmatch(pattern
,"masterauth",0)) {
9437 addReplyBulkCString(c
,"masterauth");
9438 addReplyBulkCString(c
,server
.masterauth
);
9441 if (stringmatch(pattern
,"maxmemory",0)) {
9444 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9445 addReplyBulkCString(c
,"maxmemory");
9446 addReplyBulkCString(c
,buf
);
9450 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9453 static void configCommand(redisClient
*c
) {
9454 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9455 if (c
->argc
!= 4) goto badarity
;
9456 configSetCommand(c
);
9457 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9458 if (c
->argc
!= 3) goto badarity
;
9459 configGetCommand(c
);
9460 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9461 if (c
->argc
!= 2) goto badarity
;
9462 server
.stat_numcommands
= 0;
9463 server
.stat_numconnections
= 0;
9464 server
.stat_expiredkeys
= 0;
9465 server
.stat_starttime
= time(NULL
);
9466 addReply(c
,shared
.ok
);
9468 addReplySds(c
,sdscatprintf(sdsempty(),
9469 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9474 addReplySds(c
,sdscatprintf(sdsempty(),
9475 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9476 (char*) c
->argv
[1]->ptr
));
9479 /* =========================== Pubsub implementation ======================== */
9481 static void freePubsubPattern(void *p
) {
9482 pubsubPattern
*pat
= p
;
9484 decrRefCount(pat
->pattern
);
9488 static int listMatchPubsubPattern(void *a
, void *b
) {
9489 pubsubPattern
*pa
= a
, *pb
= b
;
9491 return (pa
->client
== pb
->client
) &&
9492 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9495 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9496 * 0 if the client was already subscribed to that channel. */
9497 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9498 struct dictEntry
*de
;
9499 list
*clients
= NULL
;
9502 /* Add the channel to the client -> channels hash table */
9503 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9505 incrRefCount(channel
);
9506 /* Add the client to the channel -> list of clients hash table */
9507 de
= dictFind(server
.pubsub_channels
,channel
);
9509 clients
= listCreate();
9510 dictAdd(server
.pubsub_channels
,channel
,clients
);
9511 incrRefCount(channel
);
9513 clients
= dictGetEntryVal(de
);
9515 listAddNodeTail(clients
,c
);
9517 /* Notify the client */
9518 addReply(c
,shared
.mbulk3
);
9519 addReply(c
,shared
.subscribebulk
);
9520 addReplyBulk(c
,channel
);
9521 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9525 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9526 * 0 if the client was not subscribed to the specified channel. */
9527 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9528 struct dictEntry
*de
;
9533 /* Remove the channel from the client -> channels hash table */
9534 incrRefCount(channel
); /* channel may be just a pointer to the same object
9535 we have in the hash tables. Protect it... */
9536 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9538 /* Remove the client from the channel -> clients list hash table */
9539 de
= dictFind(server
.pubsub_channels
,channel
);
9541 clients
= dictGetEntryVal(de
);
9542 ln
= listSearchKey(clients
,c
);
9544 listDelNode(clients
,ln
);
9545 if (listLength(clients
) == 0) {
9546 /* Free the list and associated hash entry at all if this was
9547 * the latest client, so that it will be possible to abuse
9548 * Redis PUBSUB creating millions of channels. */
9549 dictDelete(server
.pubsub_channels
,channel
);
9552 /* Notify the client */
9554 addReply(c
,shared
.mbulk3
);
9555 addReply(c
,shared
.unsubscribebulk
);
9556 addReplyBulk(c
,channel
);
9557 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9558 listLength(c
->pubsub_patterns
));
9561 decrRefCount(channel
); /* it is finally safe to release it */
9565 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9566 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9569 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9572 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9573 incrRefCount(pattern
);
9574 pat
= zmalloc(sizeof(*pat
));
9575 pat
->pattern
= getDecodedObject(pattern
);
9577 listAddNodeTail(server
.pubsub_patterns
,pat
);
9579 /* Notify the client */
9580 addReply(c
,shared
.mbulk3
);
9581 addReply(c
,shared
.psubscribebulk
);
9582 addReplyBulk(c
,pattern
);
9583 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9587 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9588 * 0 if the client was not subscribed to the specified channel. */
9589 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9594 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9595 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9597 listDelNode(c
->pubsub_patterns
,ln
);
9599 pat
.pattern
= pattern
;
9600 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9601 listDelNode(server
.pubsub_patterns
,ln
);
9603 /* Notify the client */
9605 addReply(c
,shared
.mbulk3
);
9606 addReply(c
,shared
.punsubscribebulk
);
9607 addReplyBulk(c
,pattern
);
9608 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9609 listLength(c
->pubsub_patterns
));
9611 decrRefCount(pattern
);
9615 /* Unsubscribe from all the channels. Return the number of channels the
9616 * client was subscribed from. */
9617 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9618 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9622 while((de
= dictNext(di
)) != NULL
) {
9623 robj
*channel
= dictGetEntryKey(de
);
9625 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9627 dictReleaseIterator(di
);
9631 /* Unsubscribe from all the patterns. Return the number of patterns the
9632 * client was subscribed from. */
9633 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9638 listRewind(c
->pubsub_patterns
,&li
);
9639 while ((ln
= listNext(&li
)) != NULL
) {
9640 robj
*pattern
= ln
->value
;
9642 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9647 /* Publish a message */
9648 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9650 struct dictEntry
*de
;
9654 /* Send to clients listening for that channel */
9655 de
= dictFind(server
.pubsub_channels
,channel
);
9657 list
*list
= dictGetEntryVal(de
);
9661 listRewind(list
,&li
);
9662 while ((ln
= listNext(&li
)) != NULL
) {
9663 redisClient
*c
= ln
->value
;
9665 addReply(c
,shared
.mbulk3
);
9666 addReply(c
,shared
.messagebulk
);
9667 addReplyBulk(c
,channel
);
9668 addReplyBulk(c
,message
);
9672 /* Send to clients listening to matching channels */
9673 if (listLength(server
.pubsub_patterns
)) {
9674 listRewind(server
.pubsub_patterns
,&li
);
9675 channel
= getDecodedObject(channel
);
9676 while ((ln
= listNext(&li
)) != NULL
) {
9677 pubsubPattern
*pat
= ln
->value
;
9679 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9680 sdslen(pat
->pattern
->ptr
),
9681 (char*)channel
->ptr
,
9682 sdslen(channel
->ptr
),0)) {
9683 addReply(pat
->client
,shared
.mbulk3
);
9684 addReply(pat
->client
,shared
.messagebulk
);
9685 addReplyBulk(pat
->client
,channel
);
9686 addReplyBulk(pat
->client
,message
);
9690 decrRefCount(channel
);
9695 static void subscribeCommand(redisClient
*c
) {
9698 for (j
= 1; j
< c
->argc
; j
++)
9699 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9702 static void unsubscribeCommand(redisClient
*c
) {
9704 pubsubUnsubscribeAllChannels(c
,1);
9709 for (j
= 1; j
< c
->argc
; j
++)
9710 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9714 static void psubscribeCommand(redisClient
*c
) {
9717 for (j
= 1; j
< c
->argc
; j
++)
9718 pubsubSubscribePattern(c
,c
->argv
[j
]);
9721 static void punsubscribeCommand(redisClient
*c
) {
9723 pubsubUnsubscribeAllPatterns(c
,1);
9728 for (j
= 1; j
< c
->argc
; j
++)
9729 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9733 static void publishCommand(redisClient
*c
) {
9734 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9735 addReplyLong(c
,receivers
);
9738 /* ================================= Debugging ============================== */
9740 static void debugCommand(redisClient
*c
) {
9741 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9743 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9744 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9745 addReply(c
,shared
.err
);
9749 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9750 addReply(c
,shared
.err
);
9753 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9754 addReply(c
,shared
.ok
);
9755 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9757 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9758 addReply(c
,shared
.err
);
9761 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9762 addReply(c
,shared
.ok
);
9763 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9764 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9768 addReply(c
,shared
.nokeyerr
);
9771 key
= dictGetEntryKey(de
);
9772 val
= dictGetEntryVal(de
);
9773 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9774 key
->storage
== REDIS_VM_SWAPPING
)) {
9778 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9779 strenc
= strencoding
[val
->encoding
];
9781 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9784 addReplySds(c
,sdscatprintf(sdsempty(),
9785 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9786 "encoding:%s serializedlength:%lld\r\n",
9787 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9788 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9790 addReplySds(c
,sdscatprintf(sdsempty(),
9791 "+Key at:%p refcount:%d, value swapped at: page %llu "
9792 "using %llu pages\r\n",
9793 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9794 (unsigned long long) key
->vm
.usedpages
));
9796 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9797 lookupKeyRead(c
->db
,c
->argv
[2]);
9798 addReply(c
,shared
.ok
);
9799 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9800 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9803 if (!server
.vm_enabled
) {
9804 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9808 addReply(c
,shared
.nokeyerr
);
9811 key
= dictGetEntryKey(de
);
9812 val
= dictGetEntryVal(de
);
9813 /* If the key is shared we want to create a copy */
9814 if (key
->refcount
> 1) {
9815 robj
*newkey
= dupStringObject(key
);
9817 key
= dictGetEntryKey(de
) = newkey
;
9820 if (key
->storage
!= REDIS_VM_MEMORY
) {
9821 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9822 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9823 dictGetEntryVal(de
) = NULL
;
9824 addReply(c
,shared
.ok
);
9826 addReply(c
,shared
.err
);
9829 addReplySds(c
,sdsnew(
9830 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9834 static void _redisAssert(char *estr
, char *file
, int line
) {
9835 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9836 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9837 #ifdef HAVE_BACKTRACE
9838 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9843 /* =================================== Main! ================================ */
9846 int linuxOvercommitMemoryValue(void) {
9847 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9851 if (fgets(buf
,64,fp
) == NULL
) {
9860 void linuxOvercommitMemoryWarning(void) {
9861 if (linuxOvercommitMemoryValue() == 0) {
9862 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9865 #endif /* __linux__ */
9867 static void daemonize(void) {
9871 if (fork() != 0) exit(0); /* parent exits */
9872 setsid(); /* create a new session */
9874 /* Every output goes to /dev/null. If Redis is daemonized but
9875 * the 'logfile' is set to 'stdout' in the configuration file
9876 * it will not log at all. */
9877 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9878 dup2(fd
, STDIN_FILENO
);
9879 dup2(fd
, STDOUT_FILENO
);
9880 dup2(fd
, STDERR_FILENO
);
9881 if (fd
> STDERR_FILENO
) close(fd
);
9883 /* Try to write the pid file */
9884 fp
= fopen(server
.pidfile
,"w");
9886 fprintf(fp
,"%d\n",getpid());
9891 static void version() {
9892 printf("Redis server version %s\n", REDIS_VERSION
);
9896 static void usage() {
9897 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9898 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9902 int main(int argc
, char **argv
) {
9907 if (strcmp(argv
[1], "-v") == 0 ||
9908 strcmp(argv
[1], "--version") == 0) version();
9909 if (strcmp(argv
[1], "--help") == 0) usage();
9910 resetServerSaveParams();
9911 loadServerConfig(argv
[1]);
9912 } else if ((argc
> 2)) {
9915 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9917 if (server
.daemonize
) daemonize();
9919 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9921 linuxOvercommitMemoryWarning();
9924 if (server
.appendonly
) {
9925 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9926 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9928 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9929 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9931 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9932 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9934 aeDeleteEventLoop(server
.el
);
9938 /* ============================= Backtrace support ========================= */
9940 #ifdef HAVE_BACKTRACE
9941 static char *findFuncName(void *pointer
, unsigned long *offset
);
9943 static void *getMcontextEip(ucontext_t
*uc
) {
9944 #if defined(__FreeBSD__)
9945 return (void*) uc
->uc_mcontext
.mc_eip
;
9946 #elif defined(__dietlibc__)
9947 return (void*) uc
->uc_mcontext
.eip
;
9948 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9950 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9952 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9954 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9955 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9956 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9958 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9960 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9961 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9962 #elif defined(__ia64__) /* Linux IA64 */
9963 return (void*) uc
->uc_mcontext
.sc_ip
;
9969 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9971 char **messages
= NULL
;
9972 int i
, trace_size
= 0;
9973 unsigned long offset
=0;
9974 ucontext_t
*uc
= (ucontext_t
*) secret
;
9976 REDIS_NOTUSED(info
);
9978 redisLog(REDIS_WARNING
,
9979 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9980 infostring
= genRedisInfoString();
9981 redisLog(REDIS_WARNING
, "%s",infostring
);
9982 /* It's not safe to sdsfree() the returned string under memory
9983 * corruption conditions. Let it leak as we are going to abort */
9985 trace_size
= backtrace(trace
, 100);
9986 /* overwrite sigaction with caller's address */
9987 if (getMcontextEip(uc
) != NULL
) {
9988 trace
[1] = getMcontextEip(uc
);
9990 messages
= backtrace_symbols(trace
, trace_size
);
9992 for (i
=1; i
<trace_size
; ++i
) {
9993 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9995 p
= strchr(messages
[i
],'+');
9996 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9997 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9999 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10002 /* free(messages); Don't call free() with possibly corrupted memory. */
10006 static void setupSigSegvAction(void) {
10007 struct sigaction act
;
10009 sigemptyset (&act
.sa_mask
);
10010 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10011 * is used. Otherwise, sa_handler is used */
10012 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10013 act
.sa_sigaction
= segvHandler
;
10014 sigaction (SIGSEGV
, &act
, NULL
);
10015 sigaction (SIGBUS
, &act
, NULL
);
10016 sigaction (SIGFPE
, &act
, NULL
);
10017 sigaction (SIGILL
, &act
, NULL
);
10018 sigaction (SIGBUS
, &act
, NULL
);
10022 #include "staticsymbols.h"
10023 /* This function try to convert a pointer into a function name. It's used in
10024 * oreder to provide a backtrace under segmentation fault that's able to
10025 * display functions declared as static (otherwise the backtrace is useless). */
10026 static char *findFuncName(void *pointer
, unsigned long *offset
){
10028 unsigned long off
, minoff
= 0;
10030 /* Try to match against the Symbol with the smallest offset */
10031 for (i
=0; symsTable
[i
].pointer
; i
++) {
10032 unsigned long lp
= (unsigned long) pointer
;
10034 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10035 off
=lp
-symsTable
[i
].pointer
;
10036 if (ret
< 0 || off
< minoff
) {
10042 if (ret
== -1) return NULL
;
10044 return symsTable
[ret
].name
;
10046 #else /* HAVE_BACKTRACE */
10047 static void setupSigSegvAction(void) {
10049 #endif /* HAVE_BACKTRACE */