2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 #define REDIS_SHARED_INTEGERS 10000
509 struct sharedObjectsStruct
{
510 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
511 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
512 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
513 *outofrangeerr
, *plus
,
514 *select0
, *select1
, *select2
, *select3
, *select4
,
515 *select5
, *select6
, *select7
, *select8
, *select9
,
516 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
517 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
520 /* Global vars that are actally used as constants. The following double
521 * values are used for double on-disk serialization, and are initialized
522 * at runtime to avoid strange compiler optimizations. */
524 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
526 /* VM threaded I/O request message */
527 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
528 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
529 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
530 typedef struct iojob
{
531 int type
; /* Request type, REDIS_IOJOB_* */
532 redisDb
*db
;/* Redis database */
533 robj
*key
; /* This I/O request is about swapping this key */
534 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
535 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
536 off_t page
; /* Swap page where to read/write the object */
537 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
538 int canceled
; /* True if this command was canceled by blocking side of VM */
539 pthread_t thread
; /* ID of the thread processing this entry */
542 /*================================ Prototypes =============================== */
544 static void freeStringObject(robj
*o
);
545 static void freeListObject(robj
*o
);
546 static void freeSetObject(robj
*o
);
547 static void decrRefCount(void *o
);
548 static robj
*createObject(int type
, void *ptr
);
549 static void freeClient(redisClient
*c
);
550 static int rdbLoad(char *filename
);
551 static void addReply(redisClient
*c
, robj
*obj
);
552 static void addReplySds(redisClient
*c
, sds s
);
553 static void incrRefCount(robj
*o
);
554 static int rdbSaveBackground(char *filename
);
555 static robj
*createStringObject(char *ptr
, size_t len
);
556 static robj
*dupStringObject(robj
*o
);
557 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
558 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
559 static int syncWithMaster(void);
560 static robj
*tryObjectEncoding(robj
*o
);
561 static robj
*getDecodedObject(robj
*o
);
562 static int removeExpire(redisDb
*db
, robj
*key
);
563 static int expireIfNeeded(redisDb
*db
, robj
*key
);
564 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
565 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
566 static int deleteKey(redisDb
*db
, robj
*key
);
567 static time_t getExpire(redisDb
*db
, robj
*key
);
568 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
569 static void updateSlavesWaitingBgsave(int bgsaveerr
);
570 static void freeMemoryIfNeeded(void);
571 static int processCommand(redisClient
*c
);
572 static void setupSigSegvAction(void);
573 static void rdbRemoveTempFile(pid_t childpid
);
574 static void aofRemoveTempFile(pid_t childpid
);
575 static size_t stringObjectLen(robj
*o
);
576 static void processInputBuffer(redisClient
*c
);
577 static zskiplist
*zslCreate(void);
578 static void zslFree(zskiplist
*zsl
);
579 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
580 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
581 static void initClientMultiState(redisClient
*c
);
582 static void freeClientMultiState(redisClient
*c
);
583 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
584 static void unblockClientWaitingData(redisClient
*c
);
585 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
586 static void vmInit(void);
587 static void vmMarkPagesFree(off_t page
, off_t count
);
588 static robj
*vmLoadObject(robj
*key
);
589 static robj
*vmPreviewObject(robj
*key
);
590 static int vmSwapOneObjectBlocking(void);
591 static int vmSwapOneObjectThreaded(void);
592 static int vmCanSwapOut(void);
593 static int tryFreeOneObjectFromFreelist(void);
594 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
596 static void vmCancelThreadedIOJob(robj
*o
);
597 static void lockThreadedIO(void);
598 static void unlockThreadedIO(void);
599 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
600 static void freeIOJob(iojob
*j
);
601 static void queueIOJob(iojob
*j
);
602 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
603 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
604 static void waitEmptyIOJobsQueue(void);
605 static void vmReopenSwapFile(void);
606 static int vmFreePage(off_t page
);
607 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
608 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
609 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
610 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
611 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
612 static struct redisCommand
*lookupCommand(char *name
);
613 static void call(redisClient
*c
, struct redisCommand
*cmd
);
614 static void resetClient(redisClient
*c
);
615 static void convertToRealHash(robj
*o
);
616 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
617 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
618 static void freePubsubPattern(void *p
);
619 static int listMatchPubsubPattern(void *a
, void *b
);
620 static int compareStringObjects(robj
*a
, robj
*b
);
623 static void authCommand(redisClient
*c
);
624 static void pingCommand(redisClient
*c
);
625 static void echoCommand(redisClient
*c
);
626 static void setCommand(redisClient
*c
);
627 static void setnxCommand(redisClient
*c
);
628 static void getCommand(redisClient
*c
);
629 static void delCommand(redisClient
*c
);
630 static void existsCommand(redisClient
*c
);
631 static void incrCommand(redisClient
*c
);
632 static void decrCommand(redisClient
*c
);
633 static void incrbyCommand(redisClient
*c
);
634 static void decrbyCommand(redisClient
*c
);
635 static void selectCommand(redisClient
*c
);
636 static void randomkeyCommand(redisClient
*c
);
637 static void keysCommand(redisClient
*c
);
638 static void dbsizeCommand(redisClient
*c
);
639 static void lastsaveCommand(redisClient
*c
);
640 static void saveCommand(redisClient
*c
);
641 static void bgsaveCommand(redisClient
*c
);
642 static void bgrewriteaofCommand(redisClient
*c
);
643 static void shutdownCommand(redisClient
*c
);
644 static void moveCommand(redisClient
*c
);
645 static void renameCommand(redisClient
*c
);
646 static void renamenxCommand(redisClient
*c
);
647 static void lpushCommand(redisClient
*c
);
648 static void rpushCommand(redisClient
*c
);
649 static void lpopCommand(redisClient
*c
);
650 static void rpopCommand(redisClient
*c
);
651 static void llenCommand(redisClient
*c
);
652 static void lindexCommand(redisClient
*c
);
653 static void lrangeCommand(redisClient
*c
);
654 static void ltrimCommand(redisClient
*c
);
655 static void typeCommand(redisClient
*c
);
656 static void lsetCommand(redisClient
*c
);
657 static void saddCommand(redisClient
*c
);
658 static void sremCommand(redisClient
*c
);
659 static void smoveCommand(redisClient
*c
);
660 static void sismemberCommand(redisClient
*c
);
661 static void scardCommand(redisClient
*c
);
662 static void spopCommand(redisClient
*c
);
663 static void srandmemberCommand(redisClient
*c
);
664 static void sinterCommand(redisClient
*c
);
665 static void sinterstoreCommand(redisClient
*c
);
666 static void sunionCommand(redisClient
*c
);
667 static void sunionstoreCommand(redisClient
*c
);
668 static void sdiffCommand(redisClient
*c
);
669 static void sdiffstoreCommand(redisClient
*c
);
670 static void syncCommand(redisClient
*c
);
671 static void flushdbCommand(redisClient
*c
);
672 static void flushallCommand(redisClient
*c
);
673 static void sortCommand(redisClient
*c
);
674 static void lremCommand(redisClient
*c
);
675 static void rpoplpushcommand(redisClient
*c
);
676 static void infoCommand(redisClient
*c
);
677 static void mgetCommand(redisClient
*c
);
678 static void monitorCommand(redisClient
*c
);
679 static void expireCommand(redisClient
*c
);
680 static void expireatCommand(redisClient
*c
);
681 static void getsetCommand(redisClient
*c
);
682 static void ttlCommand(redisClient
*c
);
683 static void slaveofCommand(redisClient
*c
);
684 static void debugCommand(redisClient
*c
);
685 static void msetCommand(redisClient
*c
);
686 static void msetnxCommand(redisClient
*c
);
687 static void zaddCommand(redisClient
*c
);
688 static void zincrbyCommand(redisClient
*c
);
689 static void zrangeCommand(redisClient
*c
);
690 static void zrangebyscoreCommand(redisClient
*c
);
691 static void zcountCommand(redisClient
*c
);
692 static void zrevrangeCommand(redisClient
*c
);
693 static void zcardCommand(redisClient
*c
);
694 static void zremCommand(redisClient
*c
);
695 static void zscoreCommand(redisClient
*c
);
696 static void zremrangebyscoreCommand(redisClient
*c
);
697 static void multiCommand(redisClient
*c
);
698 static void execCommand(redisClient
*c
);
699 static void discardCommand(redisClient
*c
);
700 static void blpopCommand(redisClient
*c
);
701 static void brpopCommand(redisClient
*c
);
702 static void appendCommand(redisClient
*c
);
703 static void substrCommand(redisClient
*c
);
704 static void zrankCommand(redisClient
*c
);
705 static void zrevrankCommand(redisClient
*c
);
706 static void hsetCommand(redisClient
*c
);
707 static void hgetCommand(redisClient
*c
);
708 static void hmsetCommand(redisClient
*c
);
709 static void hmgetCommand(redisClient
*c
);
710 static void hdelCommand(redisClient
*c
);
711 static void hlenCommand(redisClient
*c
);
712 static void zremrangebyrankCommand(redisClient
*c
);
713 static void zunionCommand(redisClient
*c
);
714 static void zinterCommand(redisClient
*c
);
715 static void hkeysCommand(redisClient
*c
);
716 static void hvalsCommand(redisClient
*c
);
717 static void hgetallCommand(redisClient
*c
);
718 static void hexistsCommand(redisClient
*c
);
719 static void configCommand(redisClient
*c
);
720 static void hincrbyCommand(redisClient
*c
);
721 static void subscribeCommand(redisClient
*c
);
722 static void unsubscribeCommand(redisClient
*c
);
723 static void psubscribeCommand(redisClient
*c
);
724 static void punsubscribeCommand(redisClient
*c
);
725 static void publishCommand(redisClient
*c
);
727 /*================================= Globals ================================= */
730 static struct redisServer server
; /* server global state */
731 static struct redisCommand cmdTable
[] = {
732 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
733 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
734 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
735 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
736 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
738 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
739 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
740 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
742 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
743 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
751 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
754 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
755 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
758 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
759 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
761 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
763 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
764 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
765 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
766 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
767 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
768 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
769 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
772 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
775 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
776 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
781 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
785 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
788 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
789 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
792 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
794 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
795 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
799 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
800 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
812 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
820 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
828 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
831 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
836 {NULL
,NULL
,0,0,NULL
,0,0,0}
839 /*============================ Utility functions ============================ */
841 /* Glob-style pattern matching. */
842 static int stringmatchlen(const char *pattern
, int patternLen
,
843 const char *string
, int stringLen
, int nocase
)
848 while (pattern
[1] == '*') {
853 return 1; /* match */
855 if (stringmatchlen(pattern
+1, patternLen
-1,
856 string
, stringLen
, nocase
))
857 return 1; /* match */
861 return 0; /* no match */
865 return 0; /* no match */
875 not = pattern
[0] == '^';
882 if (pattern
[0] == '\\') {
885 if (pattern
[0] == string
[0])
887 } else if (pattern
[0] == ']') {
889 } else if (patternLen
== 0) {
893 } else if (pattern
[1] == '-' && patternLen
>= 3) {
894 int start
= pattern
[0];
895 int end
= pattern
[2];
903 start
= tolower(start
);
909 if (c
>= start
&& c
<= end
)
913 if (pattern
[0] == string
[0])
916 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
926 return 0; /* no match */
932 if (patternLen
>= 2) {
939 if (pattern
[0] != string
[0])
940 return 0; /* no match */
942 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
943 return 0; /* no match */
951 if (stringLen
== 0) {
952 while(*pattern
== '*') {
959 if (patternLen
== 0 && stringLen
== 0)
964 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
965 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
968 static void redisLog(int level
, const char *fmt
, ...) {
972 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
976 if (level
>= server
.verbosity
) {
982 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
983 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
984 vfprintf(fp
, fmt
, ap
);
990 if (server
.logfile
) fclose(fp
);
993 /*====================== Hash table type implementation ==================== */
995 /* This is an hash table type that uses the SDS dynamic strings libary as
996 * keys and radis objects as values (objects can hold SDS strings,
999 static void dictVanillaFree(void *privdata
, void *val
)
1001 DICT_NOTUSED(privdata
);
1005 static void dictListDestructor(void *privdata
, void *val
)
1007 DICT_NOTUSED(privdata
);
1008 listRelease((list
*)val
);
1011 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1015 DICT_NOTUSED(privdata
);
1017 l1
= sdslen((sds
)key1
);
1018 l2
= sdslen((sds
)key2
);
1019 if (l1
!= l2
) return 0;
1020 return memcmp(key1
, key2
, l1
) == 0;
1023 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1025 DICT_NOTUSED(privdata
);
1027 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1031 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1034 const robj
*o1
= key1
, *o2
= key2
;
1035 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1038 static unsigned int dictObjHash(const void *key
) {
1039 const robj
*o
= key
;
1040 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1043 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1046 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1049 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1050 o2
->encoding
== REDIS_ENCODING_INT
&&
1051 o1
->ptr
== o2
->ptr
) return 1;
1053 o1
= getDecodedObject(o1
);
1054 o2
= getDecodedObject(o2
);
1055 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1061 static unsigned int dictEncObjHash(const void *key
) {
1062 robj
*o
= (robj
*) key
;
1064 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1065 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1067 if (o
->encoding
== REDIS_ENCODING_INT
) {
1071 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1072 return dictGenHashFunction((unsigned char*)buf
, len
);
1076 o
= getDecodedObject(o
);
1077 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1084 /* Sets type and expires */
1085 static dictType setDictType
= {
1086 dictEncObjHash
, /* hash function */
1089 dictEncObjKeyCompare
, /* key compare */
1090 dictRedisObjectDestructor
, /* key destructor */
1091 NULL
/* val destructor */
1094 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1095 static dictType zsetDictType
= {
1096 dictEncObjHash
, /* hash function */
1099 dictEncObjKeyCompare
, /* key compare */
1100 dictRedisObjectDestructor
, /* key destructor */
1101 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1105 static dictType dbDictType
= {
1106 dictObjHash
, /* hash function */
1109 dictObjKeyCompare
, /* key compare */
1110 dictRedisObjectDestructor
, /* key destructor */
1111 dictRedisObjectDestructor
/* val destructor */
1115 static dictType keyptrDictType
= {
1116 dictObjHash
, /* hash function */
1119 dictObjKeyCompare
, /* key compare */
1120 dictRedisObjectDestructor
, /* key destructor */
1121 NULL
/* val destructor */
1124 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1125 static dictType hashDictType
= {
1126 dictEncObjHash
, /* hash function */
1129 dictEncObjKeyCompare
, /* key compare */
1130 dictRedisObjectDestructor
, /* key destructor */
1131 dictRedisObjectDestructor
/* val destructor */
1134 /* Keylist hash table type has unencoded redis objects as keys and
1135 * lists as values. It's used for blocking operations (BLPOP) and to
1136 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1137 static dictType keylistDictType
= {
1138 dictObjHash
, /* hash function */
1141 dictObjKeyCompare
, /* key compare */
1142 dictRedisObjectDestructor
, /* key destructor */
1143 dictListDestructor
/* val destructor */
1146 static void version();
1148 /* ========================= Random utility functions ======================= */
1150 /* Redis generally does not try to recover from out of memory conditions
1151 * when allocating objects or strings, it is not clear if it will be possible
1152 * to report this condition to the client since the networking layer itself
1153 * is based on heap allocation for send buffers, so we simply abort.
1154 * At least the code will be simpler to read... */
1155 static void oom(const char *msg
) {
1156 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1161 /* ====================== Redis server networking stuff ===================== */
1162 static void closeTimedoutClients(void) {
1165 time_t now
= time(NULL
);
1168 listRewind(server
.clients
,&li
);
1169 while ((ln
= listNext(&li
)) != NULL
) {
1170 c
= listNodeValue(ln
);
1171 if (server
.maxidletime
&&
1172 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1173 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1174 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1175 listLength(c
->pubsub_patterns
) == 0 &&
1176 (now
- c
->lastinteraction
> server
.maxidletime
))
1178 redisLog(REDIS_VERBOSE
,"Closing idle client");
1180 } else if (c
->flags
& REDIS_BLOCKED
) {
1181 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1182 addReply(c
,shared
.nullmultibulk
);
1183 unblockClientWaitingData(c
);
1189 static int htNeedsResize(dict
*dict
) {
1190 long long size
, used
;
1192 size
= dictSlots(dict
);
1193 used
= dictSize(dict
);
1194 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1195 (used
*100/size
< REDIS_HT_MINFILL
));
1198 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1199 * we resize the hash table to save memory */
1200 static void tryResizeHashTables(void) {
1203 for (j
= 0; j
< server
.dbnum
; j
++) {
1204 if (htNeedsResize(server
.db
[j
].dict
))
1205 dictResize(server
.db
[j
].dict
);
1206 if (htNeedsResize(server
.db
[j
].expires
))
1207 dictResize(server
.db
[j
].expires
);
1211 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1212 void backgroundSaveDoneHandler(int statloc
) {
1213 int exitcode
= WEXITSTATUS(statloc
);
1214 int bysignal
= WIFSIGNALED(statloc
);
1216 if (!bysignal
&& exitcode
== 0) {
1217 redisLog(REDIS_NOTICE
,
1218 "Background saving terminated with success");
1220 server
.lastsave
= time(NULL
);
1221 } else if (!bysignal
&& exitcode
!= 0) {
1222 redisLog(REDIS_WARNING
, "Background saving error");
1224 redisLog(REDIS_WARNING
,
1225 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1226 rdbRemoveTempFile(server
.bgsavechildpid
);
1228 server
.bgsavechildpid
= -1;
1229 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1230 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1231 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1234 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1236 void backgroundRewriteDoneHandler(int statloc
) {
1237 int exitcode
= WEXITSTATUS(statloc
);
1238 int bysignal
= WIFSIGNALED(statloc
);
1240 if (!bysignal
&& exitcode
== 0) {
1244 redisLog(REDIS_NOTICE
,
1245 "Background append only file rewriting terminated with success");
1246 /* Now it's time to flush the differences accumulated by the parent */
1247 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1248 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1250 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1253 /* Flush our data... */
1254 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1255 (signed) sdslen(server
.bgrewritebuf
)) {
1256 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1260 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1261 /* Now our work is to rename the temp file into the stable file. And
1262 * switch the file descriptor used by the server for append only. */
1263 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1264 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1268 /* Mission completed... almost */
1269 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1270 if (server
.appendfd
!= -1) {
1271 /* If append only is actually enabled... */
1272 close(server
.appendfd
);
1273 server
.appendfd
= fd
;
1275 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1276 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1278 /* If append only is disabled we just generate a dump in this
1279 * format. Why not? */
1282 } else if (!bysignal
&& exitcode
!= 0) {
1283 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1285 redisLog(REDIS_WARNING
,
1286 "Background append only file rewriting terminated by signal %d",
1290 sdsfree(server
.bgrewritebuf
);
1291 server
.bgrewritebuf
= sdsempty();
1292 aofRemoveTempFile(server
.bgrewritechildpid
);
1293 server
.bgrewritechildpid
= -1;
1296 /* This function is called once a background process of some kind terminates,
1297 * as we want to avoid resizing the hash tables when there is a child in order
1298 * to play well with copy-on-write (otherwise when a resize happens lots of
1299 * memory pages are copied). The goal of this function is to update the ability
1300 * for dict.c to resize the hash tables accordingly to the fact we have o not
1301 * running childs. */
1302 static void updateDictResizePolicy(void) {
1303 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1306 dictDisableResize();
1309 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1310 int j
, loops
= server
.cronloops
++;
1311 REDIS_NOTUSED(eventLoop
);
1313 REDIS_NOTUSED(clientData
);
1315 /* We take a cached value of the unix time in the global state because
1316 * with virtual memory and aging there is to store the current time
1317 * in objects at every object access, and accuracy is not needed.
1318 * To access a global var is faster than calling time(NULL) */
1319 server
.unixtime
= time(NULL
);
1321 /* Show some info about non-empty databases */
1322 for (j
= 0; j
< server
.dbnum
; j
++) {
1323 long long size
, used
, vkeys
;
1325 size
= dictSlots(server
.db
[j
].dict
);
1326 used
= dictSize(server
.db
[j
].dict
);
1327 vkeys
= dictSize(server
.db
[j
].expires
);
1328 if (!(loops
% 50) && (used
|| vkeys
)) {
1329 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1330 /* dictPrintStats(server.dict); */
1334 /* We don't want to resize the hash tables while a bacground saving
1335 * is in progress: the saving child is created using fork() that is
1336 * implemented with a copy-on-write semantic in most modern systems, so
1337 * if we resize the HT while there is the saving child at work actually
1338 * a lot of memory movements in the parent will cause a lot of pages
1340 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1343 tryResizeHashTables();
1346 /* Show information about connected clients */
1347 if (!(loops
% 50)) {
1348 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1349 listLength(server
.clients
)-listLength(server
.slaves
),
1350 listLength(server
.slaves
),
1351 zmalloc_used_memory());
1354 /* Close connections of timedout clients */
1355 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1356 closeTimedoutClients();
1358 /* Check if a background saving or AOF rewrite in progress terminated */
1359 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1363 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1364 if (pid
== server
.bgsavechildpid
) {
1365 backgroundSaveDoneHandler(statloc
);
1367 backgroundRewriteDoneHandler(statloc
);
1369 updateDictResizePolicy();
1372 /* If there is not a background saving in progress check if
1373 * we have to save now */
1374 time_t now
= time(NULL
);
1375 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1376 struct saveparam
*sp
= server
.saveparams
+j
;
1378 if (server
.dirty
>= sp
->changes
&&
1379 now
-server
.lastsave
> sp
->seconds
) {
1380 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1381 sp
->changes
, sp
->seconds
);
1382 rdbSaveBackground(server
.dbfilename
);
1388 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1389 * will use few CPU cycles if there are few expiring keys, otherwise
1390 * it will get more aggressive to avoid that too much memory is used by
1391 * keys that can be removed from the keyspace. */
1392 for (j
= 0; j
< server
.dbnum
; j
++) {
1394 redisDb
*db
= server
.db
+j
;
1396 /* Continue to expire if at the end of the cycle more than 25%
1397 * of the keys were expired. */
1399 long num
= dictSize(db
->expires
);
1400 time_t now
= time(NULL
);
1403 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1404 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1409 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1410 t
= (time_t) dictGetEntryVal(de
);
1412 deleteKey(db
,dictGetEntryKey(de
));
1414 server
.stat_expiredkeys
++;
1417 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1420 /* Swap a few keys on disk if we are over the memory limit and VM
1421 * is enbled. Try to free objects from the free list first. */
1422 if (vmCanSwapOut()) {
1423 while (server
.vm_enabled
&& zmalloc_used_memory() >
1424 server
.vm_max_memory
)
1428 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1429 retval
= (server
.vm_max_threads
== 0) ?
1430 vmSwapOneObjectBlocking() :
1431 vmSwapOneObjectThreaded();
1432 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1433 zmalloc_used_memory() >
1434 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1436 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1438 /* Note that when using threade I/O we free just one object,
1439 * because anyway when the I/O thread in charge to swap this
1440 * object out will finish, the handler of completed jobs
1441 * will try to swap more objects if we are still out of memory. */
1442 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1446 /* Check if we should connect to a MASTER */
1447 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1448 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1449 if (syncWithMaster() == REDIS_OK
) {
1450 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1456 /* This function gets called every time Redis is entering the
1457 * main loop of the event driven library, that is, before to sleep
1458 * for ready file descriptors. */
1459 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1460 REDIS_NOTUSED(eventLoop
);
1462 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1466 listRewind(server
.io_ready_clients
,&li
);
1467 while((ln
= listNext(&li
))) {
1468 redisClient
*c
= ln
->value
;
1469 struct redisCommand
*cmd
;
1471 /* Resume the client. */
1472 listDelNode(server
.io_ready_clients
,ln
);
1473 c
->flags
&= (~REDIS_IO_WAIT
);
1474 server
.vm_blocked_clients
--;
1475 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1476 readQueryFromClient
, c
);
1477 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1478 assert(cmd
!= NULL
);
1481 /* There may be more data to process in the input buffer. */
1482 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1483 processInputBuffer(c
);
1488 static void createSharedObjects(void) {
1491 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1492 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1493 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1494 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1495 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1496 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1497 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1498 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1499 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1500 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1501 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1502 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1503 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1504 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1505 "-ERR no such key\r\n"));
1506 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1507 "-ERR syntax error\r\n"));
1508 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1509 "-ERR source and destination objects are the same\r\n"));
1510 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1511 "-ERR index out of range\r\n"));
1512 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1513 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1514 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1515 shared
.select0
= createStringObject("select 0\r\n",10);
1516 shared
.select1
= createStringObject("select 1\r\n",10);
1517 shared
.select2
= createStringObject("select 2\r\n",10);
1518 shared
.select3
= createStringObject("select 3\r\n",10);
1519 shared
.select4
= createStringObject("select 4\r\n",10);
1520 shared
.select5
= createStringObject("select 5\r\n",10);
1521 shared
.select6
= createStringObject("select 6\r\n",10);
1522 shared
.select7
= createStringObject("select 7\r\n",10);
1523 shared
.select8
= createStringObject("select 8\r\n",10);
1524 shared
.select9
= createStringObject("select 9\r\n",10);
1525 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1526 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1527 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1528 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1529 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1530 shared
.mbulk3
= createStringObject("*3\r\n",4);
1531 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1532 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1533 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1537 static void appendServerSaveParams(time_t seconds
, int changes
) {
1538 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1539 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1540 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1541 server
.saveparamslen
++;
1544 static void resetServerSaveParams() {
1545 zfree(server
.saveparams
);
1546 server
.saveparams
= NULL
;
1547 server
.saveparamslen
= 0;
1550 static void initServerConfig() {
1551 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1552 server
.port
= REDIS_SERVERPORT
;
1553 server
.verbosity
= REDIS_VERBOSE
;
1554 server
.maxidletime
= REDIS_MAXIDLETIME
;
1555 server
.saveparams
= NULL
;
1556 server
.logfile
= NULL
; /* NULL = log on standard output */
1557 server
.bindaddr
= NULL
;
1558 server
.glueoutputbuf
= 1;
1559 server
.daemonize
= 0;
1560 server
.appendonly
= 0;
1561 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1562 server
.lastfsync
= time(NULL
);
1563 server
.appendfd
= -1;
1564 server
.appendseldb
= -1; /* Make sure the first time will not match */
1565 server
.pidfile
= zstrdup("/var/run/redis.pid");
1566 server
.dbfilename
= zstrdup("dump.rdb");
1567 server
.appendfilename
= zstrdup("appendonly.aof");
1568 server
.requirepass
= NULL
;
1569 server
.shareobjects
= 0;
1570 server
.rdbcompression
= 1;
1571 server
.maxclients
= 0;
1572 server
.blpop_blocked_clients
= 0;
1573 server
.maxmemory
= 0;
1574 server
.vm_enabled
= 0;
1575 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1576 server
.vm_page_size
= 256; /* 256 bytes per page */
1577 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1578 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1579 server
.vm_max_threads
= 4;
1580 server
.vm_blocked_clients
= 0;
1581 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1582 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1584 resetServerSaveParams();
1586 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1587 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1588 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1589 /* Replication related */
1591 server
.masterauth
= NULL
;
1592 server
.masterhost
= NULL
;
1593 server
.masterport
= 6379;
1594 server
.master
= NULL
;
1595 server
.replstate
= REDIS_REPL_NONE
;
1597 /* Double constants initialization */
1599 R_PosInf
= 1.0/R_Zero
;
1600 R_NegInf
= -1.0/R_Zero
;
1601 R_Nan
= R_Zero
/R_Zero
;
1604 static void initServer() {
1607 signal(SIGHUP
, SIG_IGN
);
1608 signal(SIGPIPE
, SIG_IGN
);
1609 setupSigSegvAction();
1611 server
.devnull
= fopen("/dev/null","w");
1612 if (server
.devnull
== NULL
) {
1613 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1616 server
.clients
= listCreate();
1617 server
.slaves
= listCreate();
1618 server
.monitors
= listCreate();
1619 server
.objfreelist
= listCreate();
1620 createSharedObjects();
1621 server
.el
= aeCreateEventLoop();
1622 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1623 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1624 if (server
.fd
== -1) {
1625 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1628 for (j
= 0; j
< server
.dbnum
; j
++) {
1629 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1630 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1631 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1632 if (server
.vm_enabled
)
1633 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1634 server
.db
[j
].id
= j
;
1636 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1637 server
.pubsub_patterns
= listCreate();
1638 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1639 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1640 server
.cronloops
= 0;
1641 server
.bgsavechildpid
= -1;
1642 server
.bgrewritechildpid
= -1;
1643 server
.bgrewritebuf
= sdsempty();
1644 server
.lastsave
= time(NULL
);
1646 server
.stat_numcommands
= 0;
1647 server
.stat_numconnections
= 0;
1648 server
.stat_expiredkeys
= 0;
1649 server
.stat_starttime
= time(NULL
);
1650 server
.unixtime
= time(NULL
);
1651 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1652 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1653 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1655 if (server
.appendonly
) {
1656 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1657 if (server
.appendfd
== -1) {
1658 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1664 if (server
.vm_enabled
) vmInit();
1667 /* Empty the whole database */
1668 static long long emptyDb() {
1670 long long removed
= 0;
1672 for (j
= 0; j
< server
.dbnum
; j
++) {
1673 removed
+= dictSize(server
.db
[j
].dict
);
1674 dictEmpty(server
.db
[j
].dict
);
1675 dictEmpty(server
.db
[j
].expires
);
1680 static int yesnotoi(char *s
) {
1681 if (!strcasecmp(s
,"yes")) return 1;
1682 else if (!strcasecmp(s
,"no")) return 0;
1686 /* I agree, this is a very rudimental way to load a configuration...
1687 will improve later if the config gets more complex */
1688 static void loadServerConfig(char *filename
) {
1690 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1694 if (filename
[0] == '-' && filename
[1] == '\0')
1697 if ((fp
= fopen(filename
,"r")) == NULL
) {
1698 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1703 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1709 line
= sdstrim(line
," \t\r\n");
1711 /* Skip comments and blank lines*/
1712 if (line
[0] == '#' || line
[0] == '\0') {
1717 /* Split into arguments */
1718 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1719 sdstolower(argv
[0]);
1721 /* Execute config directives */
1722 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1723 server
.maxidletime
= atoi(argv
[1]);
1724 if (server
.maxidletime
< 0) {
1725 err
= "Invalid timeout value"; goto loaderr
;
1727 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1728 server
.port
= atoi(argv
[1]);
1729 if (server
.port
< 1 || server
.port
> 65535) {
1730 err
= "Invalid port"; goto loaderr
;
1732 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1733 server
.bindaddr
= zstrdup(argv
[1]);
1734 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1735 int seconds
= atoi(argv
[1]);
1736 int changes
= atoi(argv
[2]);
1737 if (seconds
< 1 || changes
< 0) {
1738 err
= "Invalid save parameters"; goto loaderr
;
1740 appendServerSaveParams(seconds
,changes
);
1741 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1742 if (chdir(argv
[1]) == -1) {
1743 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1744 argv
[1], strerror(errno
));
1747 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1748 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1749 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1750 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1751 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1753 err
= "Invalid log level. Must be one of debug, notice, warning";
1756 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1759 server
.logfile
= zstrdup(argv
[1]);
1760 if (!strcasecmp(server
.logfile
,"stdout")) {
1761 zfree(server
.logfile
);
1762 server
.logfile
= NULL
;
1764 if (server
.logfile
) {
1765 /* Test if we are able to open the file. The server will not
1766 * be able to abort just for this problem later... */
1767 logfp
= fopen(server
.logfile
,"a");
1768 if (logfp
== NULL
) {
1769 err
= sdscatprintf(sdsempty(),
1770 "Can't open the log file: %s", strerror(errno
));
1775 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1776 server
.dbnum
= atoi(argv
[1]);
1777 if (server
.dbnum
< 1) {
1778 err
= "Invalid number of databases"; goto loaderr
;
1780 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1781 loadServerConfig(argv
[1]);
1782 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1783 server
.maxclients
= atoi(argv
[1]);
1784 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1785 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1786 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1787 server
.masterhost
= sdsnew(argv
[1]);
1788 server
.masterport
= atoi(argv
[2]);
1789 server
.replstate
= REDIS_REPL_CONNECT
;
1790 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1791 server
.masterauth
= zstrdup(argv
[1]);
1792 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1793 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1794 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1796 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1797 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1798 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1800 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1801 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1802 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1804 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1805 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1806 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1808 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1809 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1810 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1812 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1813 if (!strcasecmp(argv
[1],"no")) {
1814 server
.appendfsync
= APPENDFSYNC_NO
;
1815 } else if (!strcasecmp(argv
[1],"always")) {
1816 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1817 } else if (!strcasecmp(argv
[1],"everysec")) {
1818 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1820 err
= "argument must be 'no', 'always' or 'everysec'";
1823 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1824 server
.requirepass
= zstrdup(argv
[1]);
1825 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1826 zfree(server
.pidfile
);
1827 server
.pidfile
= zstrdup(argv
[1]);
1828 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1829 zfree(server
.dbfilename
);
1830 server
.dbfilename
= zstrdup(argv
[1]);
1831 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1832 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1833 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1835 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1836 zfree(server
.vm_swap_file
);
1837 server
.vm_swap_file
= zstrdup(argv
[1]);
1838 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1839 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1840 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1841 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1842 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1843 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1844 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1845 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1846 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1847 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1848 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1849 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1850 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1851 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1853 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1855 for (j
= 0; j
< argc
; j
++)
1860 if (fp
!= stdin
) fclose(fp
);
1864 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1865 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1866 fprintf(stderr
, ">>> '%s'\n", line
);
1867 fprintf(stderr
, "%s\n", err
);
1871 static void freeClientArgv(redisClient
*c
) {
1874 for (j
= 0; j
< c
->argc
; j
++)
1875 decrRefCount(c
->argv
[j
]);
1876 for (j
= 0; j
< c
->mbargc
; j
++)
1877 decrRefCount(c
->mbargv
[j
]);
1882 static void freeClient(redisClient
*c
) {
1885 /* Note that if the client we are freeing is blocked into a blocking
1886 * call, we have to set querybuf to NULL *before* to call
1887 * unblockClientWaitingData() to avoid processInputBuffer() will get
1888 * called. Also it is important to remove the file events after
1889 * this, because this call adds the READABLE event. */
1890 sdsfree(c
->querybuf
);
1892 if (c
->flags
& REDIS_BLOCKED
)
1893 unblockClientWaitingData(c
);
1895 /* Unsubscribe from all the pubsub channels */
1896 pubsubUnsubscribeAllChannels(c
,0);
1897 pubsubUnsubscribeAllPatterns(c
,0);
1898 dictRelease(c
->pubsub_channels
);
1899 listRelease(c
->pubsub_patterns
);
1900 /* Obvious cleanup */
1901 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1902 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1903 listRelease(c
->reply
);
1906 /* Remove from the list of clients */
1907 ln
= listSearchKey(server
.clients
,c
);
1908 redisAssert(ln
!= NULL
);
1909 listDelNode(server
.clients
,ln
);
1910 /* Remove from the list of clients waiting for swapped keys */
1911 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1912 ln
= listSearchKey(server
.io_ready_clients
,c
);
1914 listDelNode(server
.io_ready_clients
,ln
);
1915 server
.vm_blocked_clients
--;
1918 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1919 ln
= listFirst(c
->io_keys
);
1920 dontWaitForSwappedKey(c
,ln
->value
);
1922 listRelease(c
->io_keys
);
1923 /* Master/slave cleanup */
1924 if (c
->flags
& REDIS_SLAVE
) {
1925 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1927 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1928 ln
= listSearchKey(l
,c
);
1929 redisAssert(ln
!= NULL
);
1932 if (c
->flags
& REDIS_MASTER
) {
1933 server
.master
= NULL
;
1934 server
.replstate
= REDIS_REPL_CONNECT
;
1936 /* Release memory */
1939 freeClientMultiState(c
);
1943 #define GLUEREPLY_UP_TO (1024)
1944 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1946 char buf
[GLUEREPLY_UP_TO
];
1951 listRewind(c
->reply
,&li
);
1952 while((ln
= listNext(&li
))) {
1956 objlen
= sdslen(o
->ptr
);
1957 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1958 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1960 listDelNode(c
->reply
,ln
);
1962 if (copylen
== 0) return;
1966 /* Now the output buffer is empty, add the new single element */
1967 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1968 listAddNodeHead(c
->reply
,o
);
1971 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1972 redisClient
*c
= privdata
;
1973 int nwritten
= 0, totwritten
= 0, objlen
;
1976 REDIS_NOTUSED(mask
);
1978 /* Use writev() if we have enough buffers to send */
1979 if (!server
.glueoutputbuf
&&
1980 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1981 !(c
->flags
& REDIS_MASTER
))
1983 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1987 while(listLength(c
->reply
)) {
1988 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1989 glueReplyBuffersIfNeeded(c
);
1991 o
= listNodeValue(listFirst(c
->reply
));
1992 objlen
= sdslen(o
->ptr
);
1995 listDelNode(c
->reply
,listFirst(c
->reply
));
1999 if (c
->flags
& REDIS_MASTER
) {
2000 /* Don't reply to a master */
2001 nwritten
= objlen
- c
->sentlen
;
2003 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2004 if (nwritten
<= 0) break;
2006 c
->sentlen
+= nwritten
;
2007 totwritten
+= nwritten
;
2008 /* If we fully sent the object on head go to the next one */
2009 if (c
->sentlen
== objlen
) {
2010 listDelNode(c
->reply
,listFirst(c
->reply
));
2013 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2014 * bytes, in a single threaded server it's a good idea to serve
2015 * other clients as well, even if a very large request comes from
2016 * super fast link that is always able to accept data (in real world
2017 * scenario think about 'KEYS *' against the loopback interfae) */
2018 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2020 if (nwritten
== -1) {
2021 if (errno
== EAGAIN
) {
2024 redisLog(REDIS_VERBOSE
,
2025 "Error writing to client: %s", strerror(errno
));
2030 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2031 if (listLength(c
->reply
) == 0) {
2033 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2037 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2039 redisClient
*c
= privdata
;
2040 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2042 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2043 int offset
, ion
= 0;
2045 REDIS_NOTUSED(mask
);
2048 while (listLength(c
->reply
)) {
2049 offset
= c
->sentlen
;
2053 /* fill-in the iov[] array */
2054 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2055 o
= listNodeValue(node
);
2056 objlen
= sdslen(o
->ptr
);
2058 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2061 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2062 break; /* no more iovecs */
2064 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2065 iov
[ion
].iov_len
= objlen
- offset
;
2066 willwrite
+= objlen
- offset
;
2067 offset
= 0; /* just for the first item */
2074 /* write all collected blocks at once */
2075 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2076 if (errno
!= EAGAIN
) {
2077 redisLog(REDIS_VERBOSE
,
2078 "Error writing to client: %s", strerror(errno
));
2085 totwritten
+= nwritten
;
2086 offset
= c
->sentlen
;
2088 /* remove written robjs from c->reply */
2089 while (nwritten
&& listLength(c
->reply
)) {
2090 o
= listNodeValue(listFirst(c
->reply
));
2091 objlen
= sdslen(o
->ptr
);
2093 if(nwritten
>= objlen
- offset
) {
2094 listDelNode(c
->reply
, listFirst(c
->reply
));
2095 nwritten
-= objlen
- offset
;
2099 c
->sentlen
+= nwritten
;
2107 c
->lastinteraction
= time(NULL
);
2109 if (listLength(c
->reply
) == 0) {
2111 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2115 static struct redisCommand
*lookupCommand(char *name
) {
2117 while(cmdTable
[j
].name
!= NULL
) {
2118 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2124 /* resetClient prepare the client to process the next command */
2125 static void resetClient(redisClient
*c
) {
2131 /* Call() is the core of Redis execution of a command */
2132 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2135 dirty
= server
.dirty
;
2137 dirty
= server
.dirty
-dirty
;
2139 if (server
.appendonly
&& dirty
)
2140 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2141 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2142 listLength(server
.slaves
))
2143 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2144 if (listLength(server
.monitors
))
2145 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2146 server
.stat_numcommands
++;
2149 /* If this function gets called we already read a whole
2150 * command, argments are in the client argv/argc fields.
2151 * processCommand() execute the command or prepare the
2152 * server for a bulk read from the client.
2154 * If 1 is returned the client is still alive and valid and
2155 * and other operations can be performed by the caller. Otherwise
2156 * if 0 is returned the client was destroied (i.e. after QUIT). */
2157 static int processCommand(redisClient
*c
) {
2158 struct redisCommand
*cmd
;
2160 /* Free some memory if needed (maxmemory setting) */
2161 if (server
.maxmemory
) freeMemoryIfNeeded();
2163 /* Handle the multi bulk command type. This is an alternative protocol
2164 * supported by Redis in order to receive commands that are composed of
2165 * multiple binary-safe "bulk" arguments. The latency of processing is
2166 * a bit higher but this allows things like multi-sets, so if this
2167 * protocol is used only for MSET and similar commands this is a big win. */
2168 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2169 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2170 if (c
->multibulk
<= 0) {
2174 decrRefCount(c
->argv
[c
->argc
-1]);
2178 } else if (c
->multibulk
) {
2179 if (c
->bulklen
== -1) {
2180 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2181 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2185 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2186 decrRefCount(c
->argv
[0]);
2187 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2189 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2194 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2198 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2199 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2203 if (c
->multibulk
== 0) {
2207 /* Here we need to swap the multi-bulk argc/argv with the
2208 * normal argc/argv of the client structure. */
2210 c
->argv
= c
->mbargv
;
2211 c
->mbargv
= auxargv
;
2214 c
->argc
= c
->mbargc
;
2215 c
->mbargc
= auxargc
;
2217 /* We need to set bulklen to something different than -1
2218 * in order for the code below to process the command without
2219 * to try to read the last argument of a bulk command as
2220 * a special argument. */
2222 /* continue below and process the command */
2229 /* -- end of multi bulk commands processing -- */
2231 /* The QUIT command is handled as a special case. Normal command
2232 * procs are unable to close the client connection safely */
2233 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2238 /* Now lookup the command and check ASAP about trivial error conditions
2239 * such wrong arity, bad command name and so forth. */
2240 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2243 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2244 (char*)c
->argv
[0]->ptr
));
2247 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2248 (c
->argc
< -cmd
->arity
)) {
2250 sdscatprintf(sdsempty(),
2251 "-ERR wrong number of arguments for '%s' command\r\n",
2255 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2256 /* This is a bulk command, we have to read the last argument yet. */
2257 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2259 decrRefCount(c
->argv
[c
->argc
-1]);
2260 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2262 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2267 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2268 /* It is possible that the bulk read is already in the
2269 * buffer. Check this condition and handle it accordingly.
2270 * This is just a fast path, alternative to call processInputBuffer().
2271 * It's a good idea since the code is small and this condition
2272 * happens most of the times. */
2273 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2274 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2276 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2278 /* Otherwise return... there is to read the last argument
2279 * from the socket. */
2283 /* Let's try to encode the bulk object to save space. */
2284 if (cmd
->flags
& REDIS_CMD_BULK
)
2285 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2287 /* Check if the user is authenticated */
2288 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2289 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2294 /* Handle the maxmemory directive */
2295 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2296 zmalloc_used_memory() > server
.maxmemory
)
2298 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2303 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2304 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2306 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2307 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2308 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2313 /* Exec the command */
2314 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2315 queueMultiCommand(c
,cmd
);
2316 addReply(c
,shared
.queued
);
2318 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2319 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2323 /* Prepare the client for the next command */
2328 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2333 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2334 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2335 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2336 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2339 if (argc
<= REDIS_STATIC_ARGS
) {
2342 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2345 lenobj
= createObject(REDIS_STRING
,
2346 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2347 lenobj
->refcount
= 0;
2348 outv
[outc
++] = lenobj
;
2349 for (j
= 0; j
< argc
; j
++) {
2350 lenobj
= createObject(REDIS_STRING
,
2351 sdscatprintf(sdsempty(),"$%lu\r\n",
2352 (unsigned long) stringObjectLen(argv
[j
])));
2353 lenobj
->refcount
= 0;
2354 outv
[outc
++] = lenobj
;
2355 outv
[outc
++] = argv
[j
];
2356 outv
[outc
++] = shared
.crlf
;
2359 /* Increment all the refcounts at start and decrement at end in order to
2360 * be sure to free objects if there is no slave in a replication state
2361 * able to be feed with commands */
2362 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2363 listRewind(slaves
,&li
);
2364 while((ln
= listNext(&li
))) {
2365 redisClient
*slave
= ln
->value
;
2367 /* Don't feed slaves that are still waiting for BGSAVE to start */
2368 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2370 /* Feed all the other slaves, MONITORs and so on */
2371 if (slave
->slaveseldb
!= dictid
) {
2375 case 0: selectcmd
= shared
.select0
; break;
2376 case 1: selectcmd
= shared
.select1
; break;
2377 case 2: selectcmd
= shared
.select2
; break;
2378 case 3: selectcmd
= shared
.select3
; break;
2379 case 4: selectcmd
= shared
.select4
; break;
2380 case 5: selectcmd
= shared
.select5
; break;
2381 case 6: selectcmd
= shared
.select6
; break;
2382 case 7: selectcmd
= shared
.select7
; break;
2383 case 8: selectcmd
= shared
.select8
; break;
2384 case 9: selectcmd
= shared
.select9
; break;
2386 selectcmd
= createObject(REDIS_STRING
,
2387 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2388 selectcmd
->refcount
= 0;
2391 addReply(slave
,selectcmd
);
2392 slave
->slaveseldb
= dictid
;
2394 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2396 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2397 if (outv
!= static_outv
) zfree(outv
);
2400 static void processInputBuffer(redisClient
*c
) {
2402 /* Before to process the input buffer, make sure the client is not
2403 * waitig for a blocking operation such as BLPOP. Note that the first
2404 * iteration the client is never blocked, otherwise the processInputBuffer
2405 * would not be called at all, but after the execution of the first commands
2406 * in the input buffer the client may be blocked, and the "goto again"
2407 * will try to reiterate. The following line will make it return asap. */
2408 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2409 if (c
->bulklen
== -1) {
2410 /* Read the first line of the query */
2411 char *p
= strchr(c
->querybuf
,'\n');
2418 query
= c
->querybuf
;
2419 c
->querybuf
= sdsempty();
2420 querylen
= 1+(p
-(query
));
2421 if (sdslen(query
) > querylen
) {
2422 /* leave data after the first line of the query in the buffer */
2423 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2425 *p
= '\0'; /* remove "\n" */
2426 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2427 sdsupdatelen(query
);
2429 /* Now we can split the query in arguments */
2430 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2433 if (c
->argv
) zfree(c
->argv
);
2434 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2436 for (j
= 0; j
< argc
; j
++) {
2437 if (sdslen(argv
[j
])) {
2438 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2446 /* Execute the command. If the client is still valid
2447 * after processCommand() return and there is something
2448 * on the query buffer try to process the next command. */
2449 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2451 /* Nothing to process, argc == 0. Just process the query
2452 * buffer if it's not empty or return to the caller */
2453 if (sdslen(c
->querybuf
)) goto again
;
2456 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2457 redisLog(REDIS_VERBOSE
, "Client protocol error");
2462 /* Bulk read handling. Note that if we are at this point
2463 the client already sent a command terminated with a newline,
2464 we are reading the bulk data that is actually the last
2465 argument of the command. */
2466 int qbl
= sdslen(c
->querybuf
);
2468 if (c
->bulklen
<= qbl
) {
2469 /* Copy everything but the final CRLF as final argument */
2470 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2472 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2473 /* Process the command. If the client is still valid after
2474 * the processing and there is more data in the buffer
2475 * try to parse it. */
2476 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2482 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2483 redisClient
*c
= (redisClient
*) privdata
;
2484 char buf
[REDIS_IOBUF_LEN
];
2487 REDIS_NOTUSED(mask
);
2489 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2491 if (errno
== EAGAIN
) {
2494 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2498 } else if (nread
== 0) {
2499 redisLog(REDIS_VERBOSE
, "Client closed connection");
2504 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2505 c
->lastinteraction
= time(NULL
);
2509 processInputBuffer(c
);
2512 static int selectDb(redisClient
*c
, int id
) {
2513 if (id
< 0 || id
>= server
.dbnum
)
2515 c
->db
= &server
.db
[id
];
2519 static void *dupClientReplyValue(void *o
) {
2520 incrRefCount((robj
*)o
);
2524 static int listMatchObjects(void *a
, void *b
) {
2525 return compareStringObjects(a
,b
) == 0;
2528 static redisClient
*createClient(int fd
) {
2529 redisClient
*c
= zmalloc(sizeof(*c
));
2531 anetNonBlock(NULL
,fd
);
2532 anetTcpNoDelay(NULL
,fd
);
2533 if (!c
) return NULL
;
2536 c
->querybuf
= sdsempty();
2545 c
->lastinteraction
= time(NULL
);
2546 c
->authenticated
= 0;
2547 c
->replstate
= REDIS_REPL_NONE
;
2548 c
->reply
= listCreate();
2549 listSetFreeMethod(c
->reply
,decrRefCount
);
2550 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2551 c
->blockingkeys
= NULL
;
2552 c
->blockingkeysnum
= 0;
2553 c
->io_keys
= listCreate();
2554 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2555 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2556 c
->pubsub_patterns
= listCreate();
2557 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2558 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2559 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2560 readQueryFromClient
, c
) == AE_ERR
) {
2564 listAddNodeTail(server
.clients
,c
);
2565 initClientMultiState(c
);
2569 static void addReply(redisClient
*c
, robj
*obj
) {
2570 if (listLength(c
->reply
) == 0 &&
2571 (c
->replstate
== REDIS_REPL_NONE
||
2572 c
->replstate
== REDIS_REPL_ONLINE
) &&
2573 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2574 sendReplyToClient
, c
) == AE_ERR
) return;
2576 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2577 obj
= dupStringObject(obj
);
2578 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2580 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2583 static void addReplySds(redisClient
*c
, sds s
) {
2584 robj
*o
= createObject(REDIS_STRING
,s
);
2589 static void addReplyDouble(redisClient
*c
, double d
) {
2592 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2593 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2594 (unsigned long) strlen(buf
),buf
));
2597 static void addReplyLong(redisClient
*c
, long l
) {
2602 addReply(c
,shared
.czero
);
2604 } else if (l
== 1) {
2605 addReply(c
,shared
.cone
);
2608 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2609 addReplySds(c
,sdsnewlen(buf
,len
));
2612 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2617 addReply(c
,shared
.czero
);
2619 } else if (ll
== 1) {
2620 addReply(c
,shared
.cone
);
2623 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2624 addReplySds(c
,sdsnewlen(buf
,len
));
2627 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2632 addReply(c
,shared
.czero
);
2634 } else if (ul
== 1) {
2635 addReply(c
,shared
.cone
);
2638 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2639 addReplySds(c
,sdsnewlen(buf
,len
));
2642 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2645 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2646 len
= sdslen(obj
->ptr
);
2648 long n
= (long)obj
->ptr
;
2650 /* Compute how many bytes will take this integer as a radix 10 string */
2656 while((n
= n
/10) != 0) {
2660 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2663 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2664 addReplyBulkLen(c
,obj
);
2666 addReply(c
,shared
.crlf
);
2669 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2670 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2672 addReply(c
,shared
.nullbulk
);
2674 robj
*o
= createStringObject(s
,strlen(s
));
2680 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2685 REDIS_NOTUSED(mask
);
2686 REDIS_NOTUSED(privdata
);
2688 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2689 if (cfd
== AE_ERR
) {
2690 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2693 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2694 if ((c
= createClient(cfd
)) == NULL
) {
2695 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2696 close(cfd
); /* May be already closed, just ingore errors */
2699 /* If maxclient directive is set and this is one client more... close the
2700 * connection. Note that we create the client instead to check before
2701 * for this condition, since now the socket is already set in nonblocking
2702 * mode and we can send an error for free using the Kernel I/O */
2703 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2704 char *err
= "-ERR max number of clients reached\r\n";
2706 /* That's a best effort error message, don't check write errors */
2707 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2708 /* Nothing to do, Just to avoid the warning... */
2713 server
.stat_numconnections
++;
2716 /* ======================= Redis objects implementation ===================== */
2718 static robj
*createObject(int type
, void *ptr
) {
2721 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2722 if (listLength(server
.objfreelist
)) {
2723 listNode
*head
= listFirst(server
.objfreelist
);
2724 o
= listNodeValue(head
);
2725 listDelNode(server
.objfreelist
,head
);
2726 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2728 if (server
.vm_enabled
) {
2729 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2730 o
= zmalloc(sizeof(*o
));
2732 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2736 o
->encoding
= REDIS_ENCODING_RAW
;
2739 if (server
.vm_enabled
) {
2740 /* Note that this code may run in the context of an I/O thread
2741 * and accessing to server.unixtime in theory is an error
2742 * (no locks). But in practice this is safe, and even if we read
2743 * garbage Redis will not fail, as it's just a statistical info */
2744 o
->vm
.atime
= server
.unixtime
;
2745 o
->storage
= REDIS_VM_MEMORY
;
2750 static robj
*createStringObject(char *ptr
, size_t len
) {
2751 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2754 static robj
*dupStringObject(robj
*o
) {
2755 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2756 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2759 static robj
*createListObject(void) {
2760 list
*l
= listCreate();
2762 listSetFreeMethod(l
,decrRefCount
);
2763 return createObject(REDIS_LIST
,l
);
2766 static robj
*createSetObject(void) {
2767 dict
*d
= dictCreate(&setDictType
,NULL
);
2768 return createObject(REDIS_SET
,d
);
2771 static robj
*createHashObject(void) {
2772 /* All the Hashes start as zipmaps. Will be automatically converted
2773 * into hash tables if there are enough elements or big elements
2775 unsigned char *zm
= zipmapNew();
2776 robj
*o
= createObject(REDIS_HASH
,zm
);
2777 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2781 static robj
*createZsetObject(void) {
2782 zset
*zs
= zmalloc(sizeof(*zs
));
2784 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2785 zs
->zsl
= zslCreate();
2786 return createObject(REDIS_ZSET
,zs
);
2789 static void freeStringObject(robj
*o
) {
2790 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2795 static void freeListObject(robj
*o
) {
2796 listRelease((list
*) o
->ptr
);
2799 static void freeSetObject(robj
*o
) {
2800 dictRelease((dict
*) o
->ptr
);
2803 static void freeZsetObject(robj
*o
) {
2806 dictRelease(zs
->dict
);
2811 static void freeHashObject(robj
*o
) {
2812 switch (o
->encoding
) {
2813 case REDIS_ENCODING_HT
:
2814 dictRelease((dict
*) o
->ptr
);
2816 case REDIS_ENCODING_ZIPMAP
:
2825 static void incrRefCount(robj
*o
) {
2829 static void decrRefCount(void *obj
) {
2832 /* Object is a key of a swapped out value, or in the process of being
2834 if (server
.vm_enabled
&&
2835 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2837 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2838 redisAssert(o
->type
== REDIS_STRING
);
2839 freeStringObject(o
);
2840 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2841 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2842 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2843 !listAddNodeHead(server
.objfreelist
,o
))
2845 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2846 server
.vm_stats_swapped_objects
--;
2849 /* Object is in memory, or in the process of being swapped out. */
2850 if (--(o
->refcount
) == 0) {
2851 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2852 vmCancelThreadedIOJob(obj
);
2854 case REDIS_STRING
: freeStringObject(o
); break;
2855 case REDIS_LIST
: freeListObject(o
); break;
2856 case REDIS_SET
: freeSetObject(o
); break;
2857 case REDIS_ZSET
: freeZsetObject(o
); break;
2858 case REDIS_HASH
: freeHashObject(o
); break;
2859 default: redisAssert(0); break;
2861 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2862 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2863 !listAddNodeHead(server
.objfreelist
,o
))
2865 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2869 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2870 dictEntry
*de
= dictFind(db
->dict
,key
);
2872 robj
*key
= dictGetEntryKey(de
);
2873 robj
*val
= dictGetEntryVal(de
);
2875 if (server
.vm_enabled
) {
2876 if (key
->storage
== REDIS_VM_MEMORY
||
2877 key
->storage
== REDIS_VM_SWAPPING
)
2879 /* If we were swapping the object out, stop it, this key
2881 if (key
->storage
== REDIS_VM_SWAPPING
)
2882 vmCancelThreadedIOJob(key
);
2883 /* Update the access time of the key for the aging algorithm. */
2884 key
->vm
.atime
= server
.unixtime
;
2886 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2888 /* Our value was swapped on disk. Bring it at home. */
2889 redisAssert(val
== NULL
);
2890 val
= vmLoadObject(key
);
2891 dictGetEntryVal(de
) = val
;
2893 /* Clients blocked by the VM subsystem may be waiting for
2895 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2904 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2905 expireIfNeeded(db
,key
);
2906 return lookupKey(db
,key
);
2909 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2910 deleteIfVolatile(db
,key
);
2911 return lookupKey(db
,key
);
2914 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2915 robj
*o
= lookupKeyRead(c
->db
, key
);
2916 if (!o
) addReply(c
,reply
);
2920 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2921 robj
*o
= lookupKeyWrite(c
->db
, key
);
2922 if (!o
) addReply(c
,reply
);
2926 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2927 if (o
->type
!= type
) {
2928 addReply(c
,shared
.wrongtypeerr
);
2934 static int deleteKey(redisDb
*db
, robj
*key
) {
2937 /* We need to protect key from destruction: after the first dictDelete()
2938 * it may happen that 'key' is no longer valid if we don't increment
2939 * it's count. This may happen when we get the object reference directly
2940 * from the hash table with dictRandomKey() or dict iterators */
2942 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2943 retval
= dictDelete(db
->dict
,key
);
2946 return retval
== DICT_OK
;
2949 /* Check if the nul-terminated string 's' can be represented by a long
2950 * (that is, is a number that fits into long without any other space or
2951 * character before or after the digits).
2953 * If so, the function returns REDIS_OK and *longval is set to the value
2954 * of the number. Otherwise REDIS_ERR is returned */
2955 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2956 char buf
[32], *endptr
;
2960 value
= strtol(s
, &endptr
, 10);
2961 if (endptr
[0] != '\0') return REDIS_ERR
;
2962 slen
= snprintf(buf
,32,"%ld",value
);
2964 /* If the number converted back into a string is not identical
2965 * then it's not possible to encode the string as integer */
2966 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2967 if (longval
) *longval
= value
;
2971 /* Try to encode a string object in order to save space */
2972 static robj
*tryObjectEncoding(robj
*o
) {
2976 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2977 return o
; /* Already encoded */
2979 /* It's not safe to encode shared objects: shared objects can be shared
2980 * everywhere in the "object space" of Redis. Encoded objects can only
2981 * appear as "values" (and not, for instance, as keys) */
2982 if (o
->refcount
> 1) return o
;
2984 /* Currently we try to encode only strings */
2985 redisAssert(o
->type
== REDIS_STRING
);
2987 /* Check if we can represent this string as a long integer */
2988 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
2990 /* Ok, this object can be encoded */
2991 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2993 incrRefCount(shared
.integers
[value
]);
2994 return shared
.integers
[value
];
2996 o
->encoding
= REDIS_ENCODING_INT
;
2998 o
->ptr
= (void*) value
;
3003 /* Get a decoded version of an encoded object (returned as a new object).
3004 * If the object is already raw-encoded just increment the ref count. */
3005 static robj
*getDecodedObject(robj
*o
) {
3008 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3012 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3015 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3016 dec
= createStringObject(buf
,strlen(buf
));
3019 redisAssert(1 != 1);
3023 /* Compare two string objects via strcmp() or alike.
3024 * Note that the objects may be integer-encoded. In such a case we
3025 * use snprintf() to get a string representation of the numbers on the stack
3026 * and compare the strings, it's much faster than calling getDecodedObject().
3028 * Important note: if objects are not integer encoded, but binary-safe strings,
3029 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3031 static int compareStringObjects(robj
*a
, robj
*b
) {
3032 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3033 char bufa
[128], bufb
[128], *astr
, *bstr
;
3036 if (a
== b
) return 0;
3037 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3038 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3044 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3045 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3051 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3054 static size_t stringObjectLen(robj
*o
) {
3055 redisAssert(o
->type
== REDIS_STRING
);
3056 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3057 return sdslen(o
->ptr
);
3061 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3065 static int getDoubleFromObject(redisClient
*c
, robj
*o
, double *value
) {
3069 if (o
&& o
->type
!= REDIS_STRING
) {
3070 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3076 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3077 parsedValue
= strtod(o
->ptr
, &eptr
);
3078 else if (o
->encoding
== REDIS_ENCODING_INT
)
3079 parsedValue
= (long)o
->ptr
;
3081 redisAssert(1 != 1);
3083 if (eptr
!= NULL
&& *eptr
!= '\0') {
3084 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3088 *value
= parsedValue
;
3093 static int getLongLongFromObject(redisClient
*c
, robj
*o
, long long *value
) {
3094 long long parsedValue
;
3097 if (o
&& o
->type
!= REDIS_STRING
) {
3098 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3104 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3105 parsedValue
= strtoll(o
->ptr
, &eptr
, 10);
3106 else if (o
->encoding
== REDIS_ENCODING_INT
)
3107 parsedValue
= (long)o
->ptr
;
3109 redisAssert(1 != 1);
3111 if (eptr
!= NULL
&& *eptr
!= '\0') {
3112 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3116 *value
= parsedValue
;
3121 static int getLongFromObject(redisClient
*c
, robj
*o
, long *value
) {
3122 long long actualValue
;
3124 if (getLongLongFromObject(c
, o
, &actualValue
) != REDIS_OK
) return REDIS_ERR
;
3126 if (actualValue
< LONG_MIN
|| actualValue
> LONG_MAX
) {
3127 addReplySds(c
,sdsnew("-ERR value is out of range\r\n"));
3131 *value
= actualValue
;
3136 /*============================ RDB saving/loading =========================== */
3138 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3139 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3143 static int rdbSaveTime(FILE *fp
, time_t t
) {
3144 int32_t t32
= (int32_t) t
;
3145 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3149 /* check rdbLoadLen() comments for more info */
3150 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3151 unsigned char buf
[2];
3154 /* Save a 6 bit len */
3155 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3156 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3157 } else if (len
< (1<<14)) {
3158 /* Save a 14 bit len */
3159 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3161 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3163 /* Save a 32 bit len */
3164 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3165 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3167 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3172 /* String objects in the form "2391" "-100" without any space and with a
3173 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3174 * encoded as integers to save space */
3175 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3177 char *endptr
, buf
[32];
3179 /* Check if it's possible to encode this value as a number */
3180 value
= strtoll(s
, &endptr
, 10);
3181 if (endptr
[0] != '\0') return 0;
3182 snprintf(buf
,32,"%lld",value
);
3184 /* If the number converted back into a string is not identical
3185 * then it's not possible to encode the string as integer */
3186 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3188 /* Finally check if it fits in our ranges */
3189 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3190 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3191 enc
[1] = value
&0xFF;
3193 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3194 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3195 enc
[1] = value
&0xFF;
3196 enc
[2] = (value
>>8)&0xFF;
3198 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3199 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3200 enc
[1] = value
&0xFF;
3201 enc
[2] = (value
>>8)&0xFF;
3202 enc
[3] = (value
>>16)&0xFF;
3203 enc
[4] = (value
>>24)&0xFF;
3210 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3211 size_t comprlen
, outlen
;
3215 /* We require at least four bytes compression for this to be worth it */
3216 if (len
<= 4) return 0;
3218 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3219 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3220 if (comprlen
== 0) {
3224 /* Data compressed! Let's save it on disk */
3225 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3226 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3227 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3228 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3229 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3238 /* Save a string objet as [len][data] on disk. If the object is a string
3239 * representation of an integer value we try to safe it in a special form */
3240 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3243 /* Try integer encoding */
3245 unsigned char buf
[5];
3246 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3247 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3252 /* Try LZF compression - under 20 bytes it's unable to compress even
3253 * aaaaaaaaaaaaaaaaaa so skip it */
3254 if (server
.rdbcompression
&& len
> 20) {
3257 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3258 if (retval
== -1) return -1;
3259 if (retval
> 0) return 0;
3260 /* retval == 0 means data can't be compressed, save the old way */
3263 /* Store verbatim */
3264 if (rdbSaveLen(fp
,len
) == -1) return -1;
3265 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3269 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3270 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3273 /* Avoid incr/decr ref count business when possible.
3274 * This plays well with copy-on-write given that we are probably
3275 * in a child process (BGSAVE). Also this makes sure key objects
3276 * of swapped objects are not incRefCount-ed (an assert does not allow
3277 * this in order to avoid bugs) */
3278 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3279 obj
= getDecodedObject(obj
);
3280 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3283 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3288 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3289 * 8 bit integer specifing the length of the representation.
3290 * This 8 bit integer has special values in order to specify the following
3296 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3297 unsigned char buf
[128];
3303 } else if (!isfinite(val
)) {
3305 buf
[0] = (val
< 0) ? 255 : 254;
3307 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3308 buf
[0] = strlen((char*)buf
+1);
3311 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3315 /* Save a Redis object. */
3316 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3317 if (o
->type
== REDIS_STRING
) {
3318 /* Save a string value */
3319 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3320 } else if (o
->type
== REDIS_LIST
) {
3321 /* Save a list value */
3322 list
*list
= o
->ptr
;
3326 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3327 listRewind(list
,&li
);
3328 while((ln
= listNext(&li
))) {
3329 robj
*eleobj
= listNodeValue(ln
);
3331 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3333 } else if (o
->type
== REDIS_SET
) {
3334 /* Save a set value */
3336 dictIterator
*di
= dictGetIterator(set
);
3339 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3340 while((de
= dictNext(di
)) != NULL
) {
3341 robj
*eleobj
= dictGetEntryKey(de
);
3343 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3345 dictReleaseIterator(di
);
3346 } else if (o
->type
== REDIS_ZSET
) {
3347 /* Save a set value */
3349 dictIterator
*di
= dictGetIterator(zs
->dict
);
3352 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3353 while((de
= dictNext(di
)) != NULL
) {
3354 robj
*eleobj
= dictGetEntryKey(de
);
3355 double *score
= dictGetEntryVal(de
);
3357 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3358 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3360 dictReleaseIterator(di
);
3361 } else if (o
->type
== REDIS_HASH
) {
3362 /* Save a hash value */
3363 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3364 unsigned char *p
= zipmapRewind(o
->ptr
);
3365 unsigned int count
= zipmapLen(o
->ptr
);
3366 unsigned char *key
, *val
;
3367 unsigned int klen
, vlen
;
3369 if (rdbSaveLen(fp
,count
) == -1) return -1;
3370 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3371 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3372 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3375 dictIterator
*di
= dictGetIterator(o
->ptr
);
3378 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3379 while((de
= dictNext(di
)) != NULL
) {
3380 robj
*key
= dictGetEntryKey(de
);
3381 robj
*val
= dictGetEntryVal(de
);
3383 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3384 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3386 dictReleaseIterator(di
);
3394 /* Return the length the object will have on disk if saved with
3395 * the rdbSaveObject() function. Currently we use a trick to get
3396 * this length with very little changes to the code. In the future
3397 * we could switch to a faster solution. */
3398 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3399 if (fp
== NULL
) fp
= server
.devnull
;
3401 assert(rdbSaveObject(fp
,o
) != 1);
3405 /* Return the number of pages required to save this object in the swap file */
3406 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3407 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3409 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3412 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3413 static int rdbSave(char *filename
) {
3414 dictIterator
*di
= NULL
;
3419 time_t now
= time(NULL
);
3421 /* Wait for I/O therads to terminate, just in case this is a
3422 * foreground-saving, to avoid seeking the swap file descriptor at the
3424 if (server
.vm_enabled
)
3425 waitEmptyIOJobsQueue();
3427 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3428 fp
= fopen(tmpfile
,"w");
3430 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3433 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3434 for (j
= 0; j
< server
.dbnum
; j
++) {
3435 redisDb
*db
= server
.db
+j
;
3437 if (dictSize(d
) == 0) continue;
3438 di
= dictGetIterator(d
);
3444 /* Write the SELECT DB opcode */
3445 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3446 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3448 /* Iterate this DB writing every entry */
3449 while((de
= dictNext(di
)) != NULL
) {
3450 robj
*key
= dictGetEntryKey(de
);
3451 robj
*o
= dictGetEntryVal(de
);
3452 time_t expiretime
= getExpire(db
,key
);
3454 /* Save the expire time */
3455 if (expiretime
!= -1) {
3456 /* If this key is already expired skip it */
3457 if (expiretime
< now
) continue;
3458 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3459 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3461 /* Save the key and associated value. This requires special
3462 * handling if the value is swapped out. */
3463 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3464 key
->storage
== REDIS_VM_SWAPPING
) {
3465 /* Save type, key, value */
3466 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3467 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3468 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3470 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3472 /* Get a preview of the object in memory */
3473 po
= vmPreviewObject(key
);
3474 /* Save type, key, value */
3475 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3476 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3477 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3478 /* Remove the loaded object from memory */
3482 dictReleaseIterator(di
);
3485 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3487 /* Make sure data will not remain on the OS's output buffers */
3492 /* Use RENAME to make sure the DB file is changed atomically only
3493 * if the generate DB file is ok. */
3494 if (rename(tmpfile
,filename
) == -1) {
3495 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3499 redisLog(REDIS_NOTICE
,"DB saved on disk");
3501 server
.lastsave
= time(NULL
);
3507 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3508 if (di
) dictReleaseIterator(di
);
3512 static int rdbSaveBackground(char *filename
) {
3515 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3516 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3517 if ((childpid
= fork()) == 0) {
3519 if (server
.vm_enabled
) vmReopenSwapFile();
3521 if (rdbSave(filename
) == REDIS_OK
) {
3528 if (childpid
== -1) {
3529 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3533 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3534 server
.bgsavechildpid
= childpid
;
3535 updateDictResizePolicy();
3538 return REDIS_OK
; /* unreached */
3541 static void rdbRemoveTempFile(pid_t childpid
) {
3544 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3548 static int rdbLoadType(FILE *fp
) {
3550 if (fread(&type
,1,1,fp
) == 0) return -1;
3554 static time_t rdbLoadTime(FILE *fp
) {
3556 if (fread(&t32
,4,1,fp
) == 0) return -1;
3557 return (time_t) t32
;
3560 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3561 * of this file for a description of how this are stored on disk.
3563 * isencoded is set to 1 if the readed length is not actually a length but
3564 * an "encoding type", check the above comments for more info */
3565 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3566 unsigned char buf
[2];
3570 if (isencoded
) *isencoded
= 0;
3571 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3572 type
= (buf
[0]&0xC0)>>6;
3573 if (type
== REDIS_RDB_6BITLEN
) {
3574 /* Read a 6 bit len */
3576 } else if (type
== REDIS_RDB_ENCVAL
) {
3577 /* Read a 6 bit len encoding type */
3578 if (isencoded
) *isencoded
= 1;
3580 } else if (type
== REDIS_RDB_14BITLEN
) {
3581 /* Read a 14 bit len */
3582 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3583 return ((buf
[0]&0x3F)<<8)|buf
[1];
3585 /* Read a 32 bit len */
3586 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3591 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3592 unsigned char enc
[4];
3595 if (enctype
== REDIS_RDB_ENC_INT8
) {
3596 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3597 val
= (signed char)enc
[0];
3598 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3600 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3601 v
= enc
[0]|(enc
[1]<<8);
3603 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3605 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3606 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3609 val
= 0; /* anti-warning */
3612 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3615 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3616 unsigned int len
, clen
;
3617 unsigned char *c
= NULL
;
3620 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3621 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3622 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3623 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3624 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3625 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3627 return createObject(REDIS_STRING
,val
);
3634 static robj
*rdbLoadStringObject(FILE*fp
) {
3639 len
= rdbLoadLen(fp
,&isencoded
);
3642 case REDIS_RDB_ENC_INT8
:
3643 case REDIS_RDB_ENC_INT16
:
3644 case REDIS_RDB_ENC_INT32
:
3645 return rdbLoadIntegerObject(fp
,len
);
3646 case REDIS_RDB_ENC_LZF
:
3647 return rdbLoadLzfStringObject(fp
);
3653 if (len
== REDIS_RDB_LENERR
) return NULL
;
3654 val
= sdsnewlen(NULL
,len
);
3655 if (len
&& fread(val
,len
,1,fp
) == 0) {
3659 return createObject(REDIS_STRING
,val
);
3662 /* For information about double serialization check rdbSaveDoubleValue() */
3663 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3667 if (fread(&len
,1,1,fp
) == 0) return -1;
3669 case 255: *val
= R_NegInf
; return 0;
3670 case 254: *val
= R_PosInf
; return 0;
3671 case 253: *val
= R_Nan
; return 0;
3673 if (fread(buf
,len
,1,fp
) == 0) return -1;
3675 sscanf(buf
, "%lg", val
);
3680 /* Load a Redis object of the specified type from the specified file.
3681 * On success a newly allocated object is returned, otherwise NULL. */
3682 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3685 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3686 if (type
== REDIS_STRING
) {
3687 /* Read string value */
3688 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3689 o
= tryObjectEncoding(o
);
3690 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3691 /* Read list/set value */
3694 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3695 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3696 /* It's faster to expand the dict to the right size asap in order
3697 * to avoid rehashing */
3698 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3699 dictExpand(o
->ptr
,listlen
);
3700 /* Load every single element of the list/set */
3704 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3705 ele
= tryObjectEncoding(ele
);
3706 if (type
== REDIS_LIST
) {
3707 listAddNodeTail((list
*)o
->ptr
,ele
);
3709 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3712 } else if (type
== REDIS_ZSET
) {
3713 /* Read list/set value */
3717 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3718 o
= createZsetObject();
3720 /* Load every single element of the list/set */
3723 double *score
= zmalloc(sizeof(double));
3725 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3726 ele
= tryObjectEncoding(ele
);
3727 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3728 dictAdd(zs
->dict
,ele
,score
);
3729 zslInsert(zs
->zsl
,*score
,ele
);
3730 incrRefCount(ele
); /* added to skiplist */
3732 } else if (type
== REDIS_HASH
) {
3735 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3736 o
= createHashObject();
3737 /* Too many entries? Use an hash table. */
3738 if (hashlen
> server
.hash_max_zipmap_entries
)
3739 convertToRealHash(o
);
3740 /* Load every key/value, then set it into the zipmap or hash
3741 * table, as needed. */
3745 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3746 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3747 /* If we are using a zipmap and there are too big values
3748 * the object is converted to real hash table encoding. */
3749 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3750 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3751 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3753 convertToRealHash(o
);
3756 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3757 unsigned char *zm
= o
->ptr
;
3759 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3760 val
->ptr
,sdslen(val
->ptr
),NULL
);
3765 key
= tryObjectEncoding(key
);
3766 val
= tryObjectEncoding(val
);
3767 dictAdd((dict
*)o
->ptr
,key
,val
);
3776 static int rdbLoad(char *filename
) {
3778 robj
*keyobj
= NULL
;
3780 int type
, retval
, rdbver
;
3781 dict
*d
= server
.db
[0].dict
;
3782 redisDb
*db
= server
.db
+0;
3784 time_t expiretime
= -1, now
= time(NULL
);
3785 long long loadedkeys
= 0;
3787 fp
= fopen(filename
,"r");
3788 if (!fp
) return REDIS_ERR
;
3789 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3791 if (memcmp(buf
,"REDIS",5) != 0) {
3793 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3796 rdbver
= atoi(buf
+5);
3799 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3806 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3807 if (type
== REDIS_EXPIRETIME
) {
3808 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3809 /* We read the time so we need to read the object type again */
3810 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3812 if (type
== REDIS_EOF
) break;
3813 /* Handle SELECT DB opcode as a special case */
3814 if (type
== REDIS_SELECTDB
) {
3815 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3817 if (dbid
>= (unsigned)server
.dbnum
) {
3818 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3821 db
= server
.db
+dbid
;
3826 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3828 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3829 /* Add the new object in the hash table */
3830 retval
= dictAdd(d
,keyobj
,o
);
3831 if (retval
== DICT_ERR
) {
3832 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3835 /* Set the expire time if needed */
3836 if (expiretime
!= -1) {
3837 setExpire(db
,keyobj
,expiretime
);
3838 /* Delete this key if already expired */
3839 if (expiretime
< now
) deleteKey(db
,keyobj
);
3843 /* Handle swapping while loading big datasets when VM is on */
3845 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3846 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3847 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3854 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3855 if (keyobj
) decrRefCount(keyobj
);
3856 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3858 return REDIS_ERR
; /* Just to avoid warning */
3861 /*================================== Commands =============================== */
3863 static void authCommand(redisClient
*c
) {
3864 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3865 c
->authenticated
= 1;
3866 addReply(c
,shared
.ok
);
3868 c
->authenticated
= 0;
3869 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3873 static void pingCommand(redisClient
*c
) {
3874 addReply(c
,shared
.pong
);
3877 static void echoCommand(redisClient
*c
) {
3878 addReplyBulk(c
,c
->argv
[1]);
3881 /*=================================== Strings =============================== */
3883 static void setGenericCommand(redisClient
*c
, int nx
) {
3886 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3887 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3888 if (retval
== DICT_ERR
) {
3890 /* If the key is about a swapped value, we want a new key object
3891 * to overwrite the old. So we delete the old key in the database.
3892 * This will also make sure that swap pages about the old object
3893 * will be marked as free. */
3894 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3895 incrRefCount(c
->argv
[1]);
3896 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3897 incrRefCount(c
->argv
[2]);
3899 addReply(c
,shared
.czero
);
3903 incrRefCount(c
->argv
[1]);
3904 incrRefCount(c
->argv
[2]);
3907 removeExpire(c
->db
,c
->argv
[1]);
3908 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3911 static void setCommand(redisClient
*c
) {
3912 setGenericCommand(c
,0);
3915 static void setnxCommand(redisClient
*c
) {
3916 setGenericCommand(c
,1);
3919 static int getGenericCommand(redisClient
*c
) {
3922 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3925 if (o
->type
!= REDIS_STRING
) {
3926 addReply(c
,shared
.wrongtypeerr
);
3934 static void getCommand(redisClient
*c
) {
3935 getGenericCommand(c
);
3938 static void getsetCommand(redisClient
*c
) {
3939 if (getGenericCommand(c
) == REDIS_ERR
) return;
3940 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3941 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3943 incrRefCount(c
->argv
[1]);
3945 incrRefCount(c
->argv
[2]);
3947 removeExpire(c
->db
,c
->argv
[1]);
3950 static void mgetCommand(redisClient
*c
) {
3953 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3954 for (j
= 1; j
< c
->argc
; j
++) {
3955 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3957 addReply(c
,shared
.nullbulk
);
3959 if (o
->type
!= REDIS_STRING
) {
3960 addReply(c
,shared
.nullbulk
);
3968 static void msetGenericCommand(redisClient
*c
, int nx
) {
3969 int j
, busykeys
= 0;
3971 if ((c
->argc
% 2) == 0) {
3972 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3975 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3976 * set nothing at all if at least one already key exists. */
3978 for (j
= 1; j
< c
->argc
; j
+= 2) {
3979 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3985 addReply(c
, shared
.czero
);
3989 for (j
= 1; j
< c
->argc
; j
+= 2) {
3992 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
3993 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3994 if (retval
== DICT_ERR
) {
3995 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3996 incrRefCount(c
->argv
[j
+1]);
3998 incrRefCount(c
->argv
[j
]);
3999 incrRefCount(c
->argv
[j
+1]);
4001 removeExpire(c
->db
,c
->argv
[j
]);
4003 server
.dirty
+= (c
->argc
-1)/2;
4004 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4007 static void msetCommand(redisClient
*c
) {
4008 msetGenericCommand(c
,0);
4011 static void msetnxCommand(redisClient
*c
) {
4012 msetGenericCommand(c
,1);
4015 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4020 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4022 if (getLongLongFromObject(c
, o
, &value
) != REDIS_OK
) return;
4025 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4026 o
= tryObjectEncoding(o
);
4027 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4028 if (retval
== DICT_ERR
) {
4029 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4030 removeExpire(c
->db
,c
->argv
[1]);
4032 incrRefCount(c
->argv
[1]);
4035 addReply(c
,shared
.colon
);
4037 addReply(c
,shared
.crlf
);
4040 static void incrCommand(redisClient
*c
) {
4041 incrDecrCommand(c
,1);
4044 static void decrCommand(redisClient
*c
) {
4045 incrDecrCommand(c
,-1);
4048 static void incrbyCommand(redisClient
*c
) {
4051 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4053 incrDecrCommand(c
,incr
);
4056 static void decrbyCommand(redisClient
*c
) {
4059 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4061 incrDecrCommand(c
,-incr
);
4064 static void appendCommand(redisClient
*c
) {
4069 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4071 /* Create the key */
4072 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4073 incrRefCount(c
->argv
[1]);
4074 incrRefCount(c
->argv
[2]);
4075 totlen
= stringObjectLen(c
->argv
[2]);
4079 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4082 o
= dictGetEntryVal(de
);
4083 if (o
->type
!= REDIS_STRING
) {
4084 addReply(c
,shared
.wrongtypeerr
);
4087 /* If the object is specially encoded or shared we have to make
4089 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4090 robj
*decoded
= getDecodedObject(o
);
4092 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4093 decrRefCount(decoded
);
4094 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4097 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4098 o
->ptr
= sdscatlen(o
->ptr
,
4099 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4101 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4102 (unsigned long) c
->argv
[2]->ptr
);
4104 totlen
= sdslen(o
->ptr
);
4107 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4110 static void substrCommand(redisClient
*c
) {
4112 long start
= atoi(c
->argv
[2]->ptr
);
4113 long end
= atoi(c
->argv
[3]->ptr
);
4114 size_t rangelen
, strlen
;
4117 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4118 checkType(c
,o
,REDIS_STRING
)) return;
4120 o
= getDecodedObject(o
);
4121 strlen
= sdslen(o
->ptr
);
4123 /* convert negative indexes */
4124 if (start
< 0) start
= strlen
+start
;
4125 if (end
< 0) end
= strlen
+end
;
4126 if (start
< 0) start
= 0;
4127 if (end
< 0) end
= 0;
4129 /* indexes sanity checks */
4130 if (start
> end
|| (size_t)start
>= strlen
) {
4131 /* Out of range start or start > end result in null reply */
4132 addReply(c
,shared
.nullbulk
);
4136 if ((size_t)end
>= strlen
) end
= strlen
-1;
4137 rangelen
= (end
-start
)+1;
4139 /* Return the result */
4140 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4141 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4142 addReplySds(c
,range
);
4143 addReply(c
,shared
.crlf
);
4147 /* ========================= Type agnostic commands ========================= */
4149 static void delCommand(redisClient
*c
) {
4152 for (j
= 1; j
< c
->argc
; j
++) {
4153 if (deleteKey(c
->db
,c
->argv
[j
])) {
4158 addReplyLong(c
,deleted
);
4161 static void existsCommand(redisClient
*c
) {
4162 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4165 static void selectCommand(redisClient
*c
) {
4166 int id
= atoi(c
->argv
[1]->ptr
);
4168 if (selectDb(c
,id
) == REDIS_ERR
) {
4169 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4171 addReply(c
,shared
.ok
);
4175 static void randomkeyCommand(redisClient
*c
) {
4179 de
= dictGetRandomKey(c
->db
->dict
);
4180 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4183 addReply(c
,shared
.plus
);
4184 addReply(c
,shared
.crlf
);
4186 addReply(c
,shared
.plus
);
4187 addReply(c
,dictGetEntryKey(de
));
4188 addReply(c
,shared
.crlf
);
4192 static void keysCommand(redisClient
*c
) {
4195 sds pattern
= c
->argv
[1]->ptr
;
4196 int plen
= sdslen(pattern
);
4197 unsigned long numkeys
= 0;
4198 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4200 di
= dictGetIterator(c
->db
->dict
);
4202 decrRefCount(lenobj
);
4203 while((de
= dictNext(di
)) != NULL
) {
4204 robj
*keyobj
= dictGetEntryKey(de
);
4206 sds key
= keyobj
->ptr
;
4207 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4208 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4209 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4210 addReplyBulk(c
,keyobj
);
4215 dictReleaseIterator(di
);
4216 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4219 static void dbsizeCommand(redisClient
*c
) {
4221 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4224 static void lastsaveCommand(redisClient
*c
) {
4226 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4229 static void typeCommand(redisClient
*c
) {
4233 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4238 case REDIS_STRING
: type
= "+string"; break;
4239 case REDIS_LIST
: type
= "+list"; break;
4240 case REDIS_SET
: type
= "+set"; break;
4241 case REDIS_ZSET
: type
= "+zset"; break;
4242 case REDIS_HASH
: type
= "+hash"; break;
4243 default: type
= "+unknown"; break;
4246 addReplySds(c
,sdsnew(type
));
4247 addReply(c
,shared
.crlf
);
4250 static void saveCommand(redisClient
*c
) {
4251 if (server
.bgsavechildpid
!= -1) {
4252 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4255 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4256 addReply(c
,shared
.ok
);
4258 addReply(c
,shared
.err
);
4262 static void bgsaveCommand(redisClient
*c
) {
4263 if (server
.bgsavechildpid
!= -1) {
4264 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4267 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4268 char *status
= "+Background saving started\r\n";
4269 addReplySds(c
,sdsnew(status
));
4271 addReply(c
,shared
.err
);
4275 static void shutdownCommand(redisClient
*c
) {
4276 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4277 /* Kill the saving child if there is a background saving in progress.
4278 We want to avoid race conditions, for instance our saving child may
4279 overwrite the synchronous saving did by SHUTDOWN. */
4280 if (server
.bgsavechildpid
!= -1) {
4281 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4282 kill(server
.bgsavechildpid
,SIGKILL
);
4283 rdbRemoveTempFile(server
.bgsavechildpid
);
4285 if (server
.appendonly
) {
4286 /* Append only file: fsync() the AOF and exit */
4287 fsync(server
.appendfd
);
4288 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4291 /* Snapshotting. Perform a SYNC SAVE and exit */
4292 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4293 if (server
.daemonize
)
4294 unlink(server
.pidfile
);
4295 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4296 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4297 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4300 /* Ooops.. error saving! The best we can do is to continue
4301 * operating. Note that if there was a background saving process,
4302 * in the next cron() Redis will be notified that the background
4303 * saving aborted, handling special stuff like slaves pending for
4304 * synchronization... */
4305 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4307 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4312 static void renameGenericCommand(redisClient
*c
, int nx
) {
4315 /* To use the same key as src and dst is probably an error */
4316 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4317 addReply(c
,shared
.sameobjecterr
);
4321 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4325 deleteIfVolatile(c
->db
,c
->argv
[2]);
4326 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4329 addReply(c
,shared
.czero
);
4332 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4334 incrRefCount(c
->argv
[2]);
4336 deleteKey(c
->db
,c
->argv
[1]);
4338 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4341 static void renameCommand(redisClient
*c
) {
4342 renameGenericCommand(c
,0);
4345 static void renamenxCommand(redisClient
*c
) {
4346 renameGenericCommand(c
,1);
4349 static void moveCommand(redisClient
*c
) {
4354 /* Obtain source and target DB pointers */
4357 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4358 addReply(c
,shared
.outofrangeerr
);
4362 selectDb(c
,srcid
); /* Back to the source DB */
4364 /* If the user is moving using as target the same
4365 * DB as the source DB it is probably an error. */
4367 addReply(c
,shared
.sameobjecterr
);
4371 /* Check if the element exists and get a reference */
4372 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4374 addReply(c
,shared
.czero
);
4378 /* Try to add the element to the target DB */
4379 deleteIfVolatile(dst
,c
->argv
[1]);
4380 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4381 addReply(c
,shared
.czero
);
4384 incrRefCount(c
->argv
[1]);
4387 /* OK! key moved, free the entry in the source DB */
4388 deleteKey(src
,c
->argv
[1]);
4390 addReply(c
,shared
.cone
);
4393 /* =================================== Lists ================================ */
4394 static void pushGenericCommand(redisClient
*c
, int where
) {
4398 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4400 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4401 addReply(c
,shared
.cone
);
4404 lobj
= createListObject();
4406 if (where
== REDIS_HEAD
) {
4407 listAddNodeHead(list
,c
->argv
[2]);
4409 listAddNodeTail(list
,c
->argv
[2]);
4411 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4412 incrRefCount(c
->argv
[1]);
4413 incrRefCount(c
->argv
[2]);
4415 if (lobj
->type
!= REDIS_LIST
) {
4416 addReply(c
,shared
.wrongtypeerr
);
4419 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4420 addReply(c
,shared
.cone
);
4424 if (where
== REDIS_HEAD
) {
4425 listAddNodeHead(list
,c
->argv
[2]);
4427 listAddNodeTail(list
,c
->argv
[2]);
4429 incrRefCount(c
->argv
[2]);
4432 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4435 static void lpushCommand(redisClient
*c
) {
4436 pushGenericCommand(c
,REDIS_HEAD
);
4439 static void rpushCommand(redisClient
*c
) {
4440 pushGenericCommand(c
,REDIS_TAIL
);
4443 static void llenCommand(redisClient
*c
) {
4447 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4448 checkType(c
,o
,REDIS_LIST
)) return;
4451 addReplyUlong(c
,listLength(l
));
4454 static void lindexCommand(redisClient
*c
) {
4456 int index
= atoi(c
->argv
[2]->ptr
);
4460 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4461 checkType(c
,o
,REDIS_LIST
)) return;
4464 ln
= listIndex(list
, index
);
4466 addReply(c
,shared
.nullbulk
);
4468 robj
*ele
= listNodeValue(ln
);
4469 addReplyBulk(c
,ele
);
4473 static void lsetCommand(redisClient
*c
) {
4475 int index
= atoi(c
->argv
[2]->ptr
);
4479 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4480 checkType(c
,o
,REDIS_LIST
)) return;
4483 ln
= listIndex(list
, index
);
4485 addReply(c
,shared
.outofrangeerr
);
4487 robj
*ele
= listNodeValue(ln
);
4490 listNodeValue(ln
) = c
->argv
[3];
4491 incrRefCount(c
->argv
[3]);
4492 addReply(c
,shared
.ok
);
4497 static void popGenericCommand(redisClient
*c
, int where
) {
4502 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4503 checkType(c
,o
,REDIS_LIST
)) return;
4506 if (where
== REDIS_HEAD
)
4507 ln
= listFirst(list
);
4509 ln
= listLast(list
);
4512 addReply(c
,shared
.nullbulk
);
4514 robj
*ele
= listNodeValue(ln
);
4515 addReplyBulk(c
,ele
);
4516 listDelNode(list
,ln
);
4517 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4522 static void lpopCommand(redisClient
*c
) {
4523 popGenericCommand(c
,REDIS_HEAD
);
4526 static void rpopCommand(redisClient
*c
) {
4527 popGenericCommand(c
,REDIS_TAIL
);
4530 static void lrangeCommand(redisClient
*c
) {
4532 int start
= atoi(c
->argv
[2]->ptr
);
4533 int end
= atoi(c
->argv
[3]->ptr
);
4540 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4541 || checkType(c
,o
,REDIS_LIST
)) return;
4543 llen
= listLength(list
);
4545 /* convert negative indexes */
4546 if (start
< 0) start
= llen
+start
;
4547 if (end
< 0) end
= llen
+end
;
4548 if (start
< 0) start
= 0;
4549 if (end
< 0) end
= 0;
4551 /* indexes sanity checks */
4552 if (start
> end
|| start
>= llen
) {
4553 /* Out of range start or start > end result in empty list */
4554 addReply(c
,shared
.emptymultibulk
);
4557 if (end
>= llen
) end
= llen
-1;
4558 rangelen
= (end
-start
)+1;
4560 /* Return the result in form of a multi-bulk reply */
4561 ln
= listIndex(list
, start
);
4562 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4563 for (j
= 0; j
< rangelen
; j
++) {
4564 ele
= listNodeValue(ln
);
4565 addReplyBulk(c
,ele
);
4570 static void ltrimCommand(redisClient
*c
) {
4572 int start
= atoi(c
->argv
[2]->ptr
);
4573 int end
= atoi(c
->argv
[3]->ptr
);
4575 int j
, ltrim
, rtrim
;
4579 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4580 checkType(c
,o
,REDIS_LIST
)) return;
4582 llen
= listLength(list
);
4584 /* convert negative indexes */
4585 if (start
< 0) start
= llen
+start
;
4586 if (end
< 0) end
= llen
+end
;
4587 if (start
< 0) start
= 0;
4588 if (end
< 0) end
= 0;
4590 /* indexes sanity checks */
4591 if (start
> end
|| start
>= llen
) {
4592 /* Out of range start or start > end result in empty list */
4596 if (end
>= llen
) end
= llen
-1;
4601 /* Remove list elements to perform the trim */
4602 for (j
= 0; j
< ltrim
; j
++) {
4603 ln
= listFirst(list
);
4604 listDelNode(list
,ln
);
4606 for (j
= 0; j
< rtrim
; j
++) {
4607 ln
= listLast(list
);
4608 listDelNode(list
,ln
);
4610 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4612 addReply(c
,shared
.ok
);
4615 static void lremCommand(redisClient
*c
) {
4618 listNode
*ln
, *next
;
4619 int toremove
= atoi(c
->argv
[2]->ptr
);
4623 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4624 checkType(c
,o
,REDIS_LIST
)) return;
4628 toremove
= -toremove
;
4631 ln
= fromtail
? list
->tail
: list
->head
;
4633 robj
*ele
= listNodeValue(ln
);
4635 next
= fromtail
? ln
->prev
: ln
->next
;
4636 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4637 listDelNode(list
,ln
);
4640 if (toremove
&& removed
== toremove
) break;
4644 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4645 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4648 /* This is the semantic of this command:
4649 * RPOPLPUSH srclist dstlist:
4650 * IF LLEN(srclist) > 0
4651 * element = RPOP srclist
4652 * LPUSH dstlist element
4659 * The idea is to be able to get an element from a list in a reliable way
4660 * since the element is not just returned but pushed against another list
4661 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4663 static void rpoplpushcommand(redisClient
*c
) {
4668 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4669 checkType(c
,sobj
,REDIS_LIST
)) return;
4670 srclist
= sobj
->ptr
;
4671 ln
= listLast(srclist
);
4674 addReply(c
,shared
.nullbulk
);
4676 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4677 robj
*ele
= listNodeValue(ln
);
4680 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4681 addReply(c
,shared
.wrongtypeerr
);
4685 /* Add the element to the target list (unless it's directly
4686 * passed to some BLPOP-ing client */
4687 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4689 /* Create the list if the key does not exist */
4690 dobj
= createListObject();
4691 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4692 incrRefCount(c
->argv
[2]);
4694 dstlist
= dobj
->ptr
;
4695 listAddNodeHead(dstlist
,ele
);
4699 /* Send the element to the client as reply as well */
4700 addReplyBulk(c
,ele
);
4702 /* Finally remove the element from the source list */
4703 listDelNode(srclist
,ln
);
4704 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4709 /* ==================================== Sets ================================ */
4711 static void saddCommand(redisClient
*c
) {
4714 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4716 set
= createSetObject();
4717 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4718 incrRefCount(c
->argv
[1]);
4720 if (set
->type
!= REDIS_SET
) {
4721 addReply(c
,shared
.wrongtypeerr
);
4725 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4726 incrRefCount(c
->argv
[2]);
4728 addReply(c
,shared
.cone
);
4730 addReply(c
,shared
.czero
);
4734 static void sremCommand(redisClient
*c
) {
4737 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4738 checkType(c
,set
,REDIS_SET
)) return;
4740 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4742 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4743 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4744 addReply(c
,shared
.cone
);
4746 addReply(c
,shared
.czero
);
4750 static void smoveCommand(redisClient
*c
) {
4751 robj
*srcset
, *dstset
;
4753 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4754 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4756 /* If the source key does not exist return 0, if it's of the wrong type
4758 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4759 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4762 /* Error if the destination key is not a set as well */
4763 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4764 addReply(c
,shared
.wrongtypeerr
);
4767 /* Remove the element from the source set */
4768 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4769 /* Key not found in the src set! return zero */
4770 addReply(c
,shared
.czero
);
4773 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4774 deleteKey(c
->db
,c
->argv
[1]);
4776 /* Add the element to the destination set */
4778 dstset
= createSetObject();
4779 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4780 incrRefCount(c
->argv
[2]);
4782 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4783 incrRefCount(c
->argv
[3]);
4784 addReply(c
,shared
.cone
);
4787 static void sismemberCommand(redisClient
*c
) {
4790 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4791 checkType(c
,set
,REDIS_SET
)) return;
4793 if (dictFind(set
->ptr
,c
->argv
[2]))
4794 addReply(c
,shared
.cone
);
4796 addReply(c
,shared
.czero
);
4799 static void scardCommand(redisClient
*c
) {
4803 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4804 checkType(c
,o
,REDIS_SET
)) return;
4807 addReplyUlong(c
,dictSize(s
));
4810 static void spopCommand(redisClient
*c
) {
4814 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4815 checkType(c
,set
,REDIS_SET
)) return;
4817 de
= dictGetRandomKey(set
->ptr
);
4819 addReply(c
,shared
.nullbulk
);
4821 robj
*ele
= dictGetEntryKey(de
);
4823 addReplyBulk(c
,ele
);
4824 dictDelete(set
->ptr
,ele
);
4825 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4826 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4831 static void srandmemberCommand(redisClient
*c
) {
4835 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4836 checkType(c
,set
,REDIS_SET
)) return;
4838 de
= dictGetRandomKey(set
->ptr
);
4840 addReply(c
,shared
.nullbulk
);
4842 robj
*ele
= dictGetEntryKey(de
);
4844 addReplyBulk(c
,ele
);
4848 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4849 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4851 return dictSize(*d1
)-dictSize(*d2
);
4854 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4855 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4858 robj
*lenobj
= NULL
, *dstset
= NULL
;
4859 unsigned long j
, cardinality
= 0;
4861 for (j
= 0; j
< setsnum
; j
++) {
4865 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4866 lookupKeyRead(c
->db
,setskeys
[j
]);
4870 if (deleteKey(c
->db
,dstkey
))
4872 addReply(c
,shared
.czero
);
4874 addReply(c
,shared
.emptymultibulk
);
4878 if (setobj
->type
!= REDIS_SET
) {
4880 addReply(c
,shared
.wrongtypeerr
);
4883 dv
[j
] = setobj
->ptr
;
4885 /* Sort sets from the smallest to largest, this will improve our
4886 * algorithm's performace */
4887 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4889 /* The first thing we should output is the total number of elements...
4890 * since this is a multi-bulk write, but at this stage we don't know
4891 * the intersection set size, so we use a trick, append an empty object
4892 * to the output list and save the pointer to later modify it with the
4895 lenobj
= createObject(REDIS_STRING
,NULL
);
4897 decrRefCount(lenobj
);
4899 /* If we have a target key where to store the resulting set
4900 * create this key with an empty set inside */
4901 dstset
= createSetObject();
4904 /* Iterate all the elements of the first (smallest) set, and test
4905 * the element against all the other sets, if at least one set does
4906 * not include the element it is discarded */
4907 di
= dictGetIterator(dv
[0]);
4909 while((de
= dictNext(di
)) != NULL
) {
4912 for (j
= 1; j
< setsnum
; j
++)
4913 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4915 continue; /* at least one set does not contain the member */
4916 ele
= dictGetEntryKey(de
);
4918 addReplyBulk(c
,ele
);
4921 dictAdd(dstset
->ptr
,ele
,NULL
);
4925 dictReleaseIterator(di
);
4928 /* Store the resulting set into the target, if the intersection
4929 * is not an empty set. */
4930 deleteKey(c
->db
,dstkey
);
4931 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4932 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4933 incrRefCount(dstkey
);
4934 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4936 decrRefCount(dstset
);
4937 addReply(c
,shared
.czero
);
4941 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4946 static void sinterCommand(redisClient
*c
) {
4947 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4950 static void sinterstoreCommand(redisClient
*c
) {
4951 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4954 #define REDIS_OP_UNION 0
4955 #define REDIS_OP_DIFF 1
4956 #define REDIS_OP_INTER 2
4958 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4959 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4962 robj
*dstset
= NULL
;
4963 int j
, cardinality
= 0;
4965 for (j
= 0; j
< setsnum
; j
++) {
4969 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4970 lookupKeyRead(c
->db
,setskeys
[j
]);
4975 if (setobj
->type
!= REDIS_SET
) {
4977 addReply(c
,shared
.wrongtypeerr
);
4980 dv
[j
] = setobj
->ptr
;
4983 /* We need a temp set object to store our union. If the dstkey
4984 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4985 * this set object will be the resulting object to set into the target key*/
4986 dstset
= createSetObject();
4988 /* Iterate all the elements of all the sets, add every element a single
4989 * time to the result set */
4990 for (j
= 0; j
< setsnum
; j
++) {
4991 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4992 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4994 di
= dictGetIterator(dv
[j
]);
4996 while((de
= dictNext(di
)) != NULL
) {
4999 /* dictAdd will not add the same element multiple times */
5000 ele
= dictGetEntryKey(de
);
5001 if (op
== REDIS_OP_UNION
|| j
== 0) {
5002 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5006 } else if (op
== REDIS_OP_DIFF
) {
5007 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5012 dictReleaseIterator(di
);
5014 /* result set is empty? Exit asap. */
5015 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5018 /* Output the content of the resulting set, if not in STORE mode */
5020 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5021 di
= dictGetIterator(dstset
->ptr
);
5022 while((de
= dictNext(di
)) != NULL
) {
5025 ele
= dictGetEntryKey(de
);
5026 addReplyBulk(c
,ele
);
5028 dictReleaseIterator(di
);
5029 decrRefCount(dstset
);
5031 /* If we have a target key where to store the resulting set
5032 * create this key with the result set inside */
5033 deleteKey(c
->db
,dstkey
);
5034 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5035 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5036 incrRefCount(dstkey
);
5037 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5039 decrRefCount(dstset
);
5040 addReply(c
,shared
.czero
);
5047 static void sunionCommand(redisClient
*c
) {
5048 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5051 static void sunionstoreCommand(redisClient
*c
) {
5052 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5055 static void sdiffCommand(redisClient
*c
) {
5056 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5059 static void sdiffstoreCommand(redisClient
*c
) {
5060 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5063 /* ==================================== ZSets =============================== */
5065 /* ZSETs are ordered sets using two data structures to hold the same elements
5066 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5069 * The elements are added to an hash table mapping Redis objects to scores.
5070 * At the same time the elements are added to a skip list mapping scores
5071 * to Redis objects (so objects are sorted by scores in this "view"). */
5073 /* This skiplist implementation is almost a C translation of the original
5074 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5075 * Alternative to Balanced Trees", modified in three ways:
5076 * a) this implementation allows for repeated values.
5077 * b) the comparison is not just by key (our 'score') but by satellite data.
5078 * c) there is a back pointer, so it's a doubly linked list with the back
5079 * pointers being only at "level 1". This allows to traverse the list
5080 * from tail to head, useful for ZREVRANGE. */
5082 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5083 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5085 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5087 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5093 static zskiplist
*zslCreate(void) {
5097 zsl
= zmalloc(sizeof(*zsl
));
5100 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5101 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5102 zsl
->header
->forward
[j
] = NULL
;
5104 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5105 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5106 zsl
->header
->span
[j
] = 0;
5108 zsl
->header
->backward
= NULL
;
5113 static void zslFreeNode(zskiplistNode
*node
) {
5114 decrRefCount(node
->obj
);
5115 zfree(node
->forward
);
5120 static void zslFree(zskiplist
*zsl
) {
5121 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5123 zfree(zsl
->header
->forward
);
5124 zfree(zsl
->header
->span
);
5127 next
= node
->forward
[0];
5134 static int zslRandomLevel(void) {
5136 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5138 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5141 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5142 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5143 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5147 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5148 /* store rank that is crossed to reach the insert position */
5149 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5151 while (x
->forward
[i
] &&
5152 (x
->forward
[i
]->score
< score
||
5153 (x
->forward
[i
]->score
== score
&&
5154 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5155 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5160 /* we assume the key is not already inside, since we allow duplicated
5161 * scores, and the re-insertion of score and redis object should never
5162 * happpen since the caller of zslInsert() should test in the hash table
5163 * if the element is already inside or not. */
5164 level
= zslRandomLevel();
5165 if (level
> zsl
->level
) {
5166 for (i
= zsl
->level
; i
< level
; i
++) {
5168 update
[i
] = zsl
->header
;
5169 update
[i
]->span
[i
-1] = zsl
->length
;
5173 x
= zslCreateNode(level
,score
,obj
);
5174 for (i
= 0; i
< level
; i
++) {
5175 x
->forward
[i
] = update
[i
]->forward
[i
];
5176 update
[i
]->forward
[i
] = x
;
5178 /* update span covered by update[i] as x is inserted here */
5180 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5181 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5185 /* increment span for untouched levels */
5186 for (i
= level
; i
< zsl
->level
; i
++) {
5187 update
[i
]->span
[i
-1]++;
5190 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5192 x
->forward
[0]->backward
= x
;
5198 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5199 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5201 for (i
= 0; i
< zsl
->level
; i
++) {
5202 if (update
[i
]->forward
[i
] == x
) {
5204 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5206 update
[i
]->forward
[i
] = x
->forward
[i
];
5208 /* invariant: i > 0, because update[0]->forward[0]
5209 * is always equal to x */
5210 update
[i
]->span
[i
-1] -= 1;
5213 if (x
->forward
[0]) {
5214 x
->forward
[0]->backward
= x
->backward
;
5216 zsl
->tail
= x
->backward
;
5218 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5223 /* Delete an element with matching score/object from the skiplist. */
5224 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5225 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5229 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5230 while (x
->forward
[i
] &&
5231 (x
->forward
[i
]->score
< score
||
5232 (x
->forward
[i
]->score
== score
&&
5233 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5237 /* We may have multiple elements with the same score, what we need
5238 * is to find the element with both the right score and object. */
5240 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5241 zslDeleteNode(zsl
, x
, update
);
5245 return 0; /* not found */
5247 return 0; /* not found */
5250 /* Delete all the elements with score between min and max from the skiplist.
5251 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5252 * Note that this function takes the reference to the hash table view of the
5253 * sorted set, in order to remove the elements from the hash table too. */
5254 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5255 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5256 unsigned long removed
= 0;
5260 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5261 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5265 /* We may have multiple elements with the same score, what we need
5266 * is to find the element with both the right score and object. */
5268 while (x
&& x
->score
<= max
) {
5269 zskiplistNode
*next
= x
->forward
[0];
5270 zslDeleteNode(zsl
, x
, update
);
5271 dictDelete(dict
,x
->obj
);
5276 return removed
; /* not found */
5279 /* Delete all the elements with rank between start and end from the skiplist.
5280 * Start and end are inclusive. Note that start and end need to be 1-based */
5281 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5282 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5283 unsigned long traversed
= 0, removed
= 0;
5287 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5288 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5289 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5297 while (x
&& traversed
<= end
) {
5298 zskiplistNode
*next
= x
->forward
[0];
5299 zslDeleteNode(zsl
, x
, update
);
5300 dictDelete(dict
,x
->obj
);
5309 /* Find the first node having a score equal or greater than the specified one.
5310 * Returns NULL if there is no match. */
5311 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5316 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5317 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5320 /* We may have multiple elements with the same score, what we need
5321 * is to find the element with both the right score and object. */
5322 return x
->forward
[0];
5325 /* Find the rank for an element by both score and key.
5326 * Returns 0 when the element cannot be found, rank otherwise.
5327 * Note that the rank is 1-based due to the span of zsl->header to the
5329 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5331 unsigned long rank
= 0;
5335 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5336 while (x
->forward
[i
] &&
5337 (x
->forward
[i
]->score
< score
||
5338 (x
->forward
[i
]->score
== score
&&
5339 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5340 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5344 /* x might be equal to zsl->header, so test if obj is non-NULL */
5345 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5352 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5353 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5355 unsigned long traversed
= 0;
5359 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5360 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5362 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5365 if (traversed
== rank
) {
5372 /* The actual Z-commands implementations */
5374 /* This generic command implements both ZADD and ZINCRBY.
5375 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5376 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5377 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5382 zsetobj
= lookupKeyWrite(c
->db
,key
);
5383 if (zsetobj
== NULL
) {
5384 zsetobj
= createZsetObject();
5385 dictAdd(c
->db
->dict
,key
,zsetobj
);
5388 if (zsetobj
->type
!= REDIS_ZSET
) {
5389 addReply(c
,shared
.wrongtypeerr
);
5395 /* Ok now since we implement both ZADD and ZINCRBY here the code
5396 * needs to handle the two different conditions. It's all about setting
5397 * '*score', that is, the new score to set, to the right value. */
5398 score
= zmalloc(sizeof(double));
5402 /* Read the old score. If the element was not present starts from 0 */
5403 de
= dictFind(zs
->dict
,ele
);
5405 double *oldscore
= dictGetEntryVal(de
);
5406 *score
= *oldscore
+ scoreval
;
5414 /* What follows is a simple remove and re-insert operation that is common
5415 * to both ZADD and ZINCRBY... */
5416 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5417 /* case 1: New element */
5418 incrRefCount(ele
); /* added to hash */
5419 zslInsert(zs
->zsl
,*score
,ele
);
5420 incrRefCount(ele
); /* added to skiplist */
5423 addReplyDouble(c
,*score
);
5425 addReply(c
,shared
.cone
);
5430 /* case 2: Score update operation */
5431 de
= dictFind(zs
->dict
,ele
);
5432 redisAssert(de
!= NULL
);
5433 oldscore
= dictGetEntryVal(de
);
5434 if (*score
!= *oldscore
) {
5437 /* Remove and insert the element in the skip list with new score */
5438 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5439 redisAssert(deleted
!= 0);
5440 zslInsert(zs
->zsl
,*score
,ele
);
5442 /* Update the score in the hash table */
5443 dictReplace(zs
->dict
,ele
,score
);
5449 addReplyDouble(c
,*score
);
5451 addReply(c
,shared
.czero
);
5455 static void zaddCommand(redisClient
*c
) {
5458 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5460 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5463 static void zincrbyCommand(redisClient
*c
) {
5466 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5468 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5471 static void zremCommand(redisClient
*c
) {
5478 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5479 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5482 de
= dictFind(zs
->dict
,c
->argv
[2]);
5484 addReply(c
,shared
.czero
);
5487 /* Delete from the skiplist */
5488 oldscore
= dictGetEntryVal(de
);
5489 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5490 redisAssert(deleted
!= 0);
5492 /* Delete from the hash table */
5493 dictDelete(zs
->dict
,c
->argv
[2]);
5494 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5495 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5497 addReply(c
,shared
.cone
);
5500 static void zremrangebyscoreCommand(redisClient
*c
) {
5507 if ((getDoubleFromObject(c
, c
->argv
[2], &min
) != REDIS_OK
) ||
5508 (getDoubleFromObject(c
, c
->argv
[3], &max
) != REDIS_OK
)) return;
5510 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5511 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5514 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5515 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5516 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5517 server
.dirty
+= deleted
;
5518 addReplyLong(c
,deleted
);
5521 static void zremrangebyrankCommand(redisClient
*c
) {
5529 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5530 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5532 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5533 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5535 llen
= zs
->zsl
->length
;
5537 /* convert negative indexes */
5538 if (start
< 0) start
= llen
+start
;
5539 if (end
< 0) end
= llen
+end
;
5540 if (start
< 0) start
= 0;
5541 if (end
< 0) end
= 0;
5543 /* indexes sanity checks */
5544 if (start
> end
|| start
>= llen
) {
5545 addReply(c
,shared
.czero
);
5548 if (end
>= llen
) end
= llen
-1;
5550 /* increment start and end because zsl*Rank functions
5551 * use 1-based rank */
5552 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5553 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5554 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5555 server
.dirty
+= deleted
;
5556 addReplyLong(c
, deleted
);
5564 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5565 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5566 unsigned long size1
, size2
;
5567 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5568 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5569 return size1
- size2
;
5572 #define REDIS_AGGR_SUM 1
5573 #define REDIS_AGGR_MIN 2
5574 #define REDIS_AGGR_MAX 3
5576 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5577 if (aggregate
== REDIS_AGGR_SUM
) {
5578 *target
= *target
+ val
;
5579 } else if (aggregate
== REDIS_AGGR_MIN
) {
5580 *target
= val
< *target
? val
: *target
;
5581 } else if (aggregate
== REDIS_AGGR_MAX
) {
5582 *target
= val
> *target
? val
: *target
;
5585 redisAssert(0 != 0);
5589 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5591 int aggregate
= REDIS_AGGR_SUM
;
5598 /* expect zsetnum input keys to be given */
5599 zsetnum
= atoi(c
->argv
[2]->ptr
);
5601 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5605 /* test if the expected number of keys would overflow */
5606 if (3+zsetnum
> c
->argc
) {
5607 addReply(c
,shared
.syntaxerr
);
5611 /* read keys to be used for input */
5612 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5613 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5614 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5618 if (zsetobj
->type
!= REDIS_ZSET
) {
5620 addReply(c
,shared
.wrongtypeerr
);
5623 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5626 /* default all weights to 1 */
5627 src
[i
].weight
= 1.0;
5630 /* parse optional extra arguments */
5632 int remaining
= c
->argc
- j
;
5635 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5637 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5638 if (getDoubleFromObject(c
, c
->argv
[j
], &src
[i
].weight
) != REDIS_OK
)
5641 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5643 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5644 aggregate
= REDIS_AGGR_SUM
;
5645 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5646 aggregate
= REDIS_AGGR_MIN
;
5647 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5648 aggregate
= REDIS_AGGR_MAX
;
5651 addReply(c
,shared
.syntaxerr
);
5657 addReply(c
,shared
.syntaxerr
);
5663 /* sort sets from the smallest to largest, this will improve our
5664 * algorithm's performance */
5665 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5667 dstobj
= createZsetObject();
5668 dstzset
= dstobj
->ptr
;
5670 if (op
== REDIS_OP_INTER
) {
5671 /* skip going over all entries if the smallest zset is NULL or empty */
5672 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5673 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5674 * from small to large, all src[i > 0].dict are non-empty too */
5675 di
= dictGetIterator(src
[0].dict
);
5676 while((de
= dictNext(di
)) != NULL
) {
5677 double *score
= zmalloc(sizeof(double)), value
;
5678 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5680 for (j
= 1; j
< zsetnum
; j
++) {
5681 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5683 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5684 zunionInterAggregate(score
, value
, aggregate
);
5690 /* skip entry when not present in every source dict */
5694 robj
*o
= dictGetEntryKey(de
);
5695 dictAdd(dstzset
->dict
,o
,score
);
5696 incrRefCount(o
); /* added to dictionary */
5697 zslInsert(dstzset
->zsl
,*score
,o
);
5698 incrRefCount(o
); /* added to skiplist */
5701 dictReleaseIterator(di
);
5703 } else if (op
== REDIS_OP_UNION
) {
5704 for (i
= 0; i
< zsetnum
; i
++) {
5705 if (!src
[i
].dict
) continue;
5707 di
= dictGetIterator(src
[i
].dict
);
5708 while((de
= dictNext(di
)) != NULL
) {
5709 /* skip key when already processed */
5710 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5712 double *score
= zmalloc(sizeof(double)), value
;
5713 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5715 /* because the zsets are sorted by size, its only possible
5716 * for sets at larger indices to hold this entry */
5717 for (j
= (i
+1); j
< zsetnum
; j
++) {
5718 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5720 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5721 zunionInterAggregate(score
, value
, aggregate
);
5725 robj
*o
= dictGetEntryKey(de
);
5726 dictAdd(dstzset
->dict
,o
,score
);
5727 incrRefCount(o
); /* added to dictionary */
5728 zslInsert(dstzset
->zsl
,*score
,o
);
5729 incrRefCount(o
); /* added to skiplist */
5731 dictReleaseIterator(di
);
5734 /* unknown operator */
5735 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5738 deleteKey(c
->db
,dstkey
);
5739 if (dstzset
->zsl
->length
) {
5740 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5741 incrRefCount(dstkey
);
5742 addReplyLong(c
, dstzset
->zsl
->length
);
5745 decrRefCount(dstobj
);
5746 addReply(c
, shared
.czero
);
5751 static void zunionCommand(redisClient
*c
) {
5752 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5755 static void zinterCommand(redisClient
*c
) {
5756 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5759 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5771 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5772 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5774 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5776 } else if (c
->argc
>= 5) {
5777 addReply(c
,shared
.syntaxerr
);
5781 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5782 || checkType(c
,o
,REDIS_ZSET
)) return;
5787 /* convert negative indexes */
5788 if (start
< 0) start
= llen
+start
;
5789 if (end
< 0) end
= llen
+end
;
5790 if (start
< 0) start
= 0;
5791 if (end
< 0) end
= 0;
5793 /* indexes sanity checks */
5794 if (start
> end
|| start
>= llen
) {
5795 /* Out of range start or start > end result in empty list */
5796 addReply(c
,shared
.emptymultibulk
);
5799 if (end
>= llen
) end
= llen
-1;
5800 rangelen
= (end
-start
)+1;
5802 /* check if starting point is trivial, before searching
5803 * the element in log(N) time */
5805 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5808 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5811 /* Return the result in form of a multi-bulk reply */
5812 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5813 withscores
? (rangelen
*2) : rangelen
));
5814 for (j
= 0; j
< rangelen
; j
++) {
5816 addReplyBulk(c
,ele
);
5818 addReplyDouble(c
,ln
->score
);
5819 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5823 static void zrangeCommand(redisClient
*c
) {
5824 zrangeGenericCommand(c
,0);
5827 static void zrevrangeCommand(redisClient
*c
) {
5828 zrangeGenericCommand(c
,1);
5831 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5832 * If justcount is non-zero, just the count is returned. */
5833 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5836 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5837 int offset
= 0, limit
= -1;
5841 /* Parse the min-max interval. If one of the values is prefixed
5842 * by the "(" character, it's considered "open". For instance
5843 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5844 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5845 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5846 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5849 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5851 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5852 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5855 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5858 /* Parse "WITHSCORES": note that if the command was called with
5859 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5860 * enter the following paths to parse WITHSCORES and LIMIT. */
5861 if (c
->argc
== 5 || c
->argc
== 8) {
5862 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5867 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5871 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5876 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5877 addReply(c
,shared
.syntaxerr
);
5879 } else if (c
->argc
== (7 + withscores
)) {
5880 offset
= atoi(c
->argv
[5]->ptr
);
5881 limit
= atoi(c
->argv
[6]->ptr
);
5882 if (offset
< 0) offset
= 0;
5885 /* Ok, lookup the key and get the range */
5886 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5888 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5890 if (o
->type
!= REDIS_ZSET
) {
5891 addReply(c
,shared
.wrongtypeerr
);
5893 zset
*zsetobj
= o
->ptr
;
5894 zskiplist
*zsl
= zsetobj
->zsl
;
5896 robj
*ele
, *lenobj
= NULL
;
5897 unsigned long rangelen
= 0;
5899 /* Get the first node with the score >= min, or with
5900 * score > min if 'minex' is true. */
5901 ln
= zslFirstWithScore(zsl
,min
);
5902 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5905 /* No element matching the speciifed interval */
5906 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5910 /* We don't know in advance how many matching elements there
5911 * are in the list, so we push this object that will represent
5912 * the multi-bulk length in the output buffer, and will "fix"
5915 lenobj
= createObject(REDIS_STRING
,NULL
);
5917 decrRefCount(lenobj
);
5920 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5923 ln
= ln
->forward
[0];
5926 if (limit
== 0) break;
5929 addReplyBulk(c
,ele
);
5931 addReplyDouble(c
,ln
->score
);
5933 ln
= ln
->forward
[0];
5935 if (limit
> 0) limit
--;
5938 addReplyLong(c
,(long)rangelen
);
5940 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5941 withscores
? (rangelen
*2) : rangelen
);
5947 static void zrangebyscoreCommand(redisClient
*c
) {
5948 genericZrangebyscoreCommand(c
,0);
5951 static void zcountCommand(redisClient
*c
) {
5952 genericZrangebyscoreCommand(c
,1);
5955 static void zcardCommand(redisClient
*c
) {
5959 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5960 checkType(c
,o
,REDIS_ZSET
)) return;
5963 addReplyUlong(c
,zs
->zsl
->length
);
5966 static void zscoreCommand(redisClient
*c
) {
5971 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5972 checkType(c
,o
,REDIS_ZSET
)) return;
5975 de
= dictFind(zs
->dict
,c
->argv
[2]);
5977 addReply(c
,shared
.nullbulk
);
5979 double *score
= dictGetEntryVal(de
);
5981 addReplyDouble(c
,*score
);
5985 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5993 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5994 checkType(c
,o
,REDIS_ZSET
)) return;
5998 de
= dictFind(zs
->dict
,c
->argv
[2]);
6000 addReply(c
,shared
.nullbulk
);
6004 score
= dictGetEntryVal(de
);
6005 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6008 addReplyLong(c
, zsl
->length
- rank
);
6010 addReplyLong(c
, rank
-1);
6013 addReply(c
,shared
.nullbulk
);
6017 static void zrankCommand(redisClient
*c
) {
6018 zrankGenericCommand(c
, 0);
6021 static void zrevrankCommand(redisClient
*c
) {
6022 zrankGenericCommand(c
, 1);
6025 /* =================================== Hashes =============================== */
6026 static void hsetCommand(redisClient
*c
) {
6028 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6031 o
= createHashObject();
6032 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6033 incrRefCount(c
->argv
[1]);
6035 if (o
->type
!= REDIS_HASH
) {
6036 addReply(c
,shared
.wrongtypeerr
);
6040 /* We want to convert the zipmap into an hash table right now if the
6041 * entry to be added is too big. Note that we check if the object
6042 * is integer encoded before to try fetching the length in the test below.
6043 * This is because integers are small, but currently stringObjectLen()
6044 * performs a slow conversion: not worth it. */
6045 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
6046 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
6047 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
6048 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
6049 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
6051 convertToRealHash(o
);
6054 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6055 unsigned char *zm
= o
->ptr
;
6056 robj
*valobj
= getDecodedObject(c
->argv
[3]);
6058 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6059 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
6060 decrRefCount(valobj
);
6063 /* And here there is the second check for hash conversion. */
6064 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6065 convertToRealHash(o
);
6067 c
->argv
[2] = tryObjectEncoding(c
->argv
[2]);
6068 /* note that c->argv[3] is already encoded, as the latest arg
6069 * of a bulk command is always integer encoded if possible. */
6070 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
6071 incrRefCount(c
->argv
[2]);
6075 incrRefCount(c
->argv
[3]);
6078 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
6081 static void hmsetCommand(redisClient
*c
) {
6083 robj
*o
, *key
, *val
;
6085 if ((c
->argc
% 2) == 1) {
6086 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6090 if ((o
= lookupKeyWrite(c
->db
,c
->argv
[1])) == NULL
) {
6091 o
= createHashObject();
6092 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6093 incrRefCount(c
->argv
[1]);
6095 if (o
->type
!= REDIS_HASH
) {
6096 addReply(c
,shared
.wrongtypeerr
);
6101 /* We want to convert the zipmap into an hash table right now if the
6102 * entry to be added is too big. */
6103 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6104 for (i
= 2; i
< c
->argc
; i
+=2) {
6105 if ((c
->argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6106 sdslen(c
->argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) ||
6107 (c
->argv
[i
+1]->encoding
== REDIS_ENCODING_RAW
&&
6108 sdslen(c
->argv
[i
+1]->ptr
) > server
.hash_max_zipmap_value
)) {
6109 convertToRealHash(o
);
6115 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6116 unsigned char *zm
= o
->ptr
;
6118 for (i
= 2; i
< c
->argc
; i
+=2) {
6119 key
= getDecodedObject(c
->argv
[i
]);
6120 val
= getDecodedObject(c
->argv
[i
+1]);
6121 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
6122 val
->ptr
,sdslen(val
->ptr
),NULL
);
6128 /* And here there is the second check for hash conversion. */
6129 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6130 convertToRealHash(o
);
6132 for (i
= 2; i
< c
->argc
; i
+=2) {
6133 key
= tryObjectEncoding(c
->argv
[i
]);
6134 val
= tryObjectEncoding(c
->argv
[i
+1]);
6135 if (dictReplace(o
->ptr
,key
,val
)) {
6142 addReply(c
, shared
.ok
);
6145 static void hincrbyCommand(redisClient
*c
) {
6146 long long value
= 0, incr
= 0;
6147 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6150 o
= createHashObject();
6151 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6152 incrRefCount(c
->argv
[1]);
6154 if (o
->type
!= REDIS_HASH
) {
6155 addReply(c
,shared
.wrongtypeerr
);
6160 if (getLongLongFromObject(c
, c
->argv
[3], &incr
) != REDIS_OK
) return;
6162 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6163 unsigned char *zm
= o
->ptr
;
6164 unsigned char *zval
;
6167 /* Find value if already present in hash */
6168 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6170 /* strtoll needs the char* to have a trailing \0, but
6171 * the zipmap doesn't include them. */
6172 sds szval
= sdsnewlen(zval
, zvlen
);
6173 value
= strtoll(szval
,NULL
,10);
6178 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6179 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6180 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6184 /* Check if the zipmap needs to be converted. */
6185 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6186 convertToRealHash(o
);
6191 /* Find value if already present in hash */
6192 de
= dictFind(o
->ptr
,c
->argv
[2]);
6194 hval
= dictGetEntryVal(de
);
6195 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6196 value
= strtoll(hval
->ptr
,NULL
,10);
6197 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6198 value
= (long)hval
->ptr
;
6200 redisAssert(1 != 1);
6204 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6205 hval
= tryObjectEncoding(hval
);
6206 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6207 incrRefCount(c
->argv
[2]);
6212 addReplyLongLong(c
, value
);
6215 static void hgetCommand(redisClient
*c
) {
6218 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6219 checkType(c
,o
,REDIS_HASH
)) return;
6221 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6222 unsigned char *zm
= o
->ptr
;
6227 field
= getDecodedObject(c
->argv
[2]);
6228 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6229 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6230 addReplySds(c
,sdsnewlen(val
,vlen
));
6231 addReply(c
,shared
.crlf
);
6232 decrRefCount(field
);
6235 addReply(c
,shared
.nullbulk
);
6236 decrRefCount(field
);
6240 struct dictEntry
*de
;
6242 de
= dictFind(o
->ptr
,c
->argv
[2]);
6244 addReply(c
,shared
.nullbulk
);
6246 robj
*e
= dictGetEntryVal(de
);
6253 static void hmgetCommand(redisClient
*c
) {
6256 robj
*o
= lookupKeyRead(c
->db
, c
->argv
[1]);
6258 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6259 for (i
= 2; i
< c
->argc
; i
++) {
6260 addReply(c
,shared
.nullbulk
);
6264 if (o
->type
!= REDIS_HASH
) {
6265 addReply(c
,shared
.wrongtypeerr
);
6270 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6271 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6272 unsigned char *zm
= o
->ptr
;
6277 for (i
= 2; i
< c
->argc
; i
++) {
6278 field
= getDecodedObject(c
->argv
[i
]);
6279 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
),&v
,&vlen
)) {
6280 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6281 addReplySds(c
,sdsnewlen(v
,vlen
));
6282 addReply(c
,shared
.crlf
);
6284 addReply(c
,shared
.nullbulk
);
6286 decrRefCount(field
);
6291 for (i
= 2; i
< c
->argc
; i
++) {
6292 de
= dictFind(o
->ptr
,c
->argv
[i
]);
6294 addReplyBulk(c
,(robj
*)dictGetEntryVal(de
));
6296 addReply(c
,shared
.nullbulk
);
6302 static void hdelCommand(redisClient
*c
) {
6306 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6307 checkType(c
,o
,REDIS_HASH
)) return;
6309 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6310 robj
*field
= getDecodedObject(c
->argv
[2]);
6312 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6313 (unsigned char*) field
->ptr
,
6314 sdslen(field
->ptr
), &deleted
);
6315 decrRefCount(field
);
6316 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6317 deleteKey(c
->db
,c
->argv
[1]);
6319 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6320 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6321 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6323 if (deleted
) server
.dirty
++;
6324 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6327 static void hlenCommand(redisClient
*c
) {
6331 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6332 checkType(c
,o
,REDIS_HASH
)) return;
6334 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6335 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6336 addReplyUlong(c
,len
);
6339 #define REDIS_GETALL_KEYS 1
6340 #define REDIS_GETALL_VALS 2
6341 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6343 unsigned long count
= 0;
6345 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6346 || checkType(c
,o
,REDIS_HASH
)) return;
6348 lenobj
= createObject(REDIS_STRING
,NULL
);
6350 decrRefCount(lenobj
);
6352 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6353 unsigned char *p
= zipmapRewind(o
->ptr
);
6354 unsigned char *field
, *val
;
6355 unsigned int flen
, vlen
;
6357 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6360 if (flags
& REDIS_GETALL_KEYS
) {
6361 aux
= createStringObject((char*)field
,flen
);
6362 addReplyBulk(c
,aux
);
6366 if (flags
& REDIS_GETALL_VALS
) {
6367 aux
= createStringObject((char*)val
,vlen
);
6368 addReplyBulk(c
,aux
);
6374 dictIterator
*di
= dictGetIterator(o
->ptr
);
6377 while((de
= dictNext(di
)) != NULL
) {
6378 robj
*fieldobj
= dictGetEntryKey(de
);
6379 robj
*valobj
= dictGetEntryVal(de
);
6381 if (flags
& REDIS_GETALL_KEYS
) {
6382 addReplyBulk(c
,fieldobj
);
6385 if (flags
& REDIS_GETALL_VALS
) {
6386 addReplyBulk(c
,valobj
);
6390 dictReleaseIterator(di
);
6392 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6395 static void hkeysCommand(redisClient
*c
) {
6396 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6399 static void hvalsCommand(redisClient
*c
) {
6400 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6403 static void hgetallCommand(redisClient
*c
) {
6404 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6407 static void hexistsCommand(redisClient
*c
) {
6411 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6412 checkType(c
,o
,REDIS_HASH
)) return;
6414 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6416 unsigned char *zm
= o
->ptr
;
6418 field
= getDecodedObject(c
->argv
[2]);
6419 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6420 decrRefCount(field
);
6422 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6424 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6427 static void convertToRealHash(robj
*o
) {
6428 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6429 unsigned int klen
, vlen
;
6430 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6432 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6433 p
= zipmapRewind(zm
);
6434 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6435 robj
*keyobj
, *valobj
;
6437 keyobj
= createStringObject((char*)key
,klen
);
6438 valobj
= createStringObject((char*)val
,vlen
);
6439 keyobj
= tryObjectEncoding(keyobj
);
6440 valobj
= tryObjectEncoding(valobj
);
6441 dictAdd(dict
,keyobj
,valobj
);
6443 o
->encoding
= REDIS_ENCODING_HT
;
6448 /* ========================= Non type-specific commands ==================== */
6450 static void flushdbCommand(redisClient
*c
) {
6451 server
.dirty
+= dictSize(c
->db
->dict
);
6452 dictEmpty(c
->db
->dict
);
6453 dictEmpty(c
->db
->expires
);
6454 addReply(c
,shared
.ok
);
6457 static void flushallCommand(redisClient
*c
) {
6458 server
.dirty
+= emptyDb();
6459 addReply(c
,shared
.ok
);
6460 if (server
.bgsavechildpid
!= -1) {
6461 kill(server
.bgsavechildpid
,SIGKILL
);
6462 rdbRemoveTempFile(server
.bgsavechildpid
);
6464 rdbSave(server
.dbfilename
);
6468 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6469 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6471 so
->pattern
= pattern
;
6475 /* Return the value associated to the key with a name obtained
6476 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6477 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6481 int prefixlen
, sublen
, postfixlen
;
6482 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6486 char buf
[REDIS_SORTKEY_MAX
+1];
6489 /* If the pattern is "#" return the substitution object itself in order
6490 * to implement the "SORT ... GET #" feature. */
6491 spat
= pattern
->ptr
;
6492 if (spat
[0] == '#' && spat
[1] == '\0') {
6496 /* The substitution object may be specially encoded. If so we create
6497 * a decoded object on the fly. Otherwise getDecodedObject will just
6498 * increment the ref count, that we'll decrement later. */
6499 subst
= getDecodedObject(subst
);
6502 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6503 p
= strchr(spat
,'*');
6505 decrRefCount(subst
);
6510 sublen
= sdslen(ssub
);
6511 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6512 memcpy(keyname
.buf
,spat
,prefixlen
);
6513 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6514 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6515 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6516 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6518 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6519 decrRefCount(subst
);
6521 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6522 return lookupKeyRead(db
,&keyobj
);
6525 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6526 * the additional parameter is not standard but a BSD-specific we have to
6527 * pass sorting parameters via the global 'server' structure */
6528 static int sortCompare(const void *s1
, const void *s2
) {
6529 const redisSortObject
*so1
= s1
, *so2
= s2
;
6532 if (!server
.sort_alpha
) {
6533 /* Numeric sorting. Here it's trivial as we precomputed scores */
6534 if (so1
->u
.score
> so2
->u
.score
) {
6536 } else if (so1
->u
.score
< so2
->u
.score
) {
6542 /* Alphanumeric sorting */
6543 if (server
.sort_bypattern
) {
6544 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6545 /* At least one compare object is NULL */
6546 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6548 else if (so1
->u
.cmpobj
== NULL
)
6553 /* We have both the objects, use strcoll */
6554 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6557 /* Compare elements directly */
6560 dec1
= getDecodedObject(so1
->obj
);
6561 dec2
= getDecodedObject(so2
->obj
);
6562 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6567 return server
.sort_desc
? -cmp
: cmp
;
6570 /* The SORT command is the most complex command in Redis. Warning: this code
6571 * is optimized for speed and a bit less for readability */
6572 static void sortCommand(redisClient
*c
) {
6575 int desc
= 0, alpha
= 0;
6576 int limit_start
= 0, limit_count
= -1, start
, end
;
6577 int j
, dontsort
= 0, vectorlen
;
6578 int getop
= 0; /* GET operation counter */
6579 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6580 redisSortObject
*vector
; /* Resulting vector to sort */
6582 /* Lookup the key to sort. It must be of the right types */
6583 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6584 if (sortval
== NULL
) {
6585 addReply(c
,shared
.emptymultibulk
);
6588 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6589 sortval
->type
!= REDIS_ZSET
)
6591 addReply(c
,shared
.wrongtypeerr
);
6595 /* Create a list of operations to perform for every sorted element.
6596 * Operations can be GET/DEL/INCR/DECR */
6597 operations
= listCreate();
6598 listSetFreeMethod(operations
,zfree
);
6601 /* Now we need to protect sortval incrementing its count, in the future
6602 * SORT may have options able to overwrite/delete keys during the sorting
6603 * and the sorted key itself may get destroied */
6604 incrRefCount(sortval
);
6606 /* The SORT command has an SQL-alike syntax, parse it */
6607 while(j
< c
->argc
) {
6608 int leftargs
= c
->argc
-j
-1;
6609 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6611 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6613 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6615 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6616 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6617 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6619 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6620 storekey
= c
->argv
[j
+1];
6622 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6623 sortby
= c
->argv
[j
+1];
6624 /* If the BY pattern does not contain '*', i.e. it is constant,
6625 * we don't need to sort nor to lookup the weight keys. */
6626 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6628 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6629 listAddNodeTail(operations
,createSortOperation(
6630 REDIS_SORT_GET
,c
->argv
[j
+1]));
6634 decrRefCount(sortval
);
6635 listRelease(operations
);
6636 addReply(c
,shared
.syntaxerr
);
6642 /* Load the sorting vector with all the objects to sort */
6643 switch(sortval
->type
) {
6644 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6645 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6646 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6647 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6649 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6652 if (sortval
->type
== REDIS_LIST
) {
6653 list
*list
= sortval
->ptr
;
6657 listRewind(list
,&li
);
6658 while((ln
= listNext(&li
))) {
6659 robj
*ele
= ln
->value
;
6660 vector
[j
].obj
= ele
;
6661 vector
[j
].u
.score
= 0;
6662 vector
[j
].u
.cmpobj
= NULL
;
6670 if (sortval
->type
== REDIS_SET
) {
6673 zset
*zs
= sortval
->ptr
;
6677 di
= dictGetIterator(set
);
6678 while((setele
= dictNext(di
)) != NULL
) {
6679 vector
[j
].obj
= dictGetEntryKey(setele
);
6680 vector
[j
].u
.score
= 0;
6681 vector
[j
].u
.cmpobj
= NULL
;
6684 dictReleaseIterator(di
);
6686 redisAssert(j
== vectorlen
);
6688 /* Now it's time to load the right scores in the sorting vector */
6689 if (dontsort
== 0) {
6690 for (j
= 0; j
< vectorlen
; j
++) {
6694 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6695 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6697 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6699 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6700 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6702 /* Don't need to decode the object if it's
6703 * integer-encoded (the only encoding supported) so
6704 * far. We can just cast it */
6705 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6706 vector
[j
].u
.score
= (long)byval
->ptr
;
6708 redisAssert(1 != 1);
6713 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6714 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6716 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6717 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6719 redisAssert(1 != 1);
6726 /* We are ready to sort the vector... perform a bit of sanity check
6727 * on the LIMIT option too. We'll use a partial version of quicksort. */
6728 start
= (limit_start
< 0) ? 0 : limit_start
;
6729 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6730 if (start
>= vectorlen
) {
6731 start
= vectorlen
-1;
6734 if (end
>= vectorlen
) end
= vectorlen
-1;
6736 if (dontsort
== 0) {
6737 server
.sort_desc
= desc
;
6738 server
.sort_alpha
= alpha
;
6739 server
.sort_bypattern
= sortby
? 1 : 0;
6740 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6741 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6743 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6746 /* Send command output to the output buffer, performing the specified
6747 * GET/DEL/INCR/DECR operations if any. */
6748 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6749 if (storekey
== NULL
) {
6750 /* STORE option not specified, sent the sorting result to client */
6751 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6752 for (j
= start
; j
<= end
; j
++) {
6756 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6757 listRewind(operations
,&li
);
6758 while((ln
= listNext(&li
))) {
6759 redisSortOperation
*sop
= ln
->value
;
6760 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6763 if (sop
->type
== REDIS_SORT_GET
) {
6764 if (!val
|| val
->type
!= REDIS_STRING
) {
6765 addReply(c
,shared
.nullbulk
);
6767 addReplyBulk(c
,val
);
6770 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6775 robj
*listObject
= createListObject();
6776 list
*listPtr
= (list
*) listObject
->ptr
;
6778 /* STORE option specified, set the sorting result as a List object */
6779 for (j
= start
; j
<= end
; j
++) {
6784 listAddNodeTail(listPtr
,vector
[j
].obj
);
6785 incrRefCount(vector
[j
].obj
);
6787 listRewind(operations
,&li
);
6788 while((ln
= listNext(&li
))) {
6789 redisSortOperation
*sop
= ln
->value
;
6790 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6793 if (sop
->type
== REDIS_SORT_GET
) {
6794 if (!val
|| val
->type
!= REDIS_STRING
) {
6795 listAddNodeTail(listPtr
,createStringObject("",0));
6797 listAddNodeTail(listPtr
,val
);
6801 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6805 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6806 incrRefCount(storekey
);
6808 /* Note: we add 1 because the DB is dirty anyway since even if the
6809 * SORT result is empty a new key is set and maybe the old content
6811 server
.dirty
+= 1+outputlen
;
6812 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6816 decrRefCount(sortval
);
6817 listRelease(operations
);
6818 for (j
= 0; j
< vectorlen
; j
++) {
6819 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6820 decrRefCount(vector
[j
].u
.cmpobj
);
6825 /* Convert an amount of bytes into a human readable string in the form
6826 * of 100B, 2G, 100M, 4K, and so forth. */
6827 static void bytesToHuman(char *s
, unsigned long long n
) {
6832 sprintf(s
,"%lluB",n
);
6834 } else if (n
< (1024*1024)) {
6835 d
= (double)n
/(1024);
6836 sprintf(s
,"%.2fK",d
);
6837 } else if (n
< (1024LL*1024*1024)) {
6838 d
= (double)n
/(1024*1024);
6839 sprintf(s
,"%.2fM",d
);
6840 } else if (n
< (1024LL*1024*1024*1024)) {
6841 d
= (double)n
/(1024LL*1024*1024);
6842 sprintf(s
,"%.2fG",d
);
6846 /* Create the string returned by the INFO command. This is decoupled
6847 * by the INFO command itself as we need to report the same information
6848 * on memory corruption problems. */
6849 static sds
genRedisInfoString(void) {
6851 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6855 bytesToHuman(hmem
,zmalloc_used_memory());
6856 info
= sdscatprintf(sdsempty(),
6857 "redis_version:%s\r\n"
6859 "multiplexing_api:%s\r\n"
6860 "process_id:%ld\r\n"
6861 "uptime_in_seconds:%ld\r\n"
6862 "uptime_in_days:%ld\r\n"
6863 "connected_clients:%d\r\n"
6864 "connected_slaves:%d\r\n"
6865 "blocked_clients:%d\r\n"
6866 "used_memory:%zu\r\n"
6867 "used_memory_human:%s\r\n"
6868 "changes_since_last_save:%lld\r\n"
6869 "bgsave_in_progress:%d\r\n"
6870 "last_save_time:%ld\r\n"
6871 "bgrewriteaof_in_progress:%d\r\n"
6872 "total_connections_received:%lld\r\n"
6873 "total_commands_processed:%lld\r\n"
6874 "expired_keys:%lld\r\n"
6875 "hash_max_zipmap_entries:%ld\r\n"
6876 "hash_max_zipmap_value:%ld\r\n"
6877 "pubsub_channels:%ld\r\n"
6878 "pubsub_patterns:%u\r\n"
6882 (sizeof(long) == 8) ? "64" : "32",
6887 listLength(server
.clients
)-listLength(server
.slaves
),
6888 listLength(server
.slaves
),
6889 server
.blpop_blocked_clients
,
6890 zmalloc_used_memory(),
6893 server
.bgsavechildpid
!= -1,
6895 server
.bgrewritechildpid
!= -1,
6896 server
.stat_numconnections
,
6897 server
.stat_numcommands
,
6898 server
.stat_expiredkeys
,
6899 server
.hash_max_zipmap_entries
,
6900 server
.hash_max_zipmap_value
,
6901 dictSize(server
.pubsub_channels
),
6902 listLength(server
.pubsub_patterns
),
6903 server
.vm_enabled
!= 0,
6904 server
.masterhost
== NULL
? "master" : "slave"
6906 if (server
.masterhost
) {
6907 info
= sdscatprintf(info
,
6908 "master_host:%s\r\n"
6909 "master_port:%d\r\n"
6910 "master_link_status:%s\r\n"
6911 "master_last_io_seconds_ago:%d\r\n"
6914 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6916 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6919 if (server
.vm_enabled
) {
6921 info
= sdscatprintf(info
,
6922 "vm_conf_max_memory:%llu\r\n"
6923 "vm_conf_page_size:%llu\r\n"
6924 "vm_conf_pages:%llu\r\n"
6925 "vm_stats_used_pages:%llu\r\n"
6926 "vm_stats_swapped_objects:%llu\r\n"
6927 "vm_stats_swappin_count:%llu\r\n"
6928 "vm_stats_swappout_count:%llu\r\n"
6929 "vm_stats_io_newjobs_len:%lu\r\n"
6930 "vm_stats_io_processing_len:%lu\r\n"
6931 "vm_stats_io_processed_len:%lu\r\n"
6932 "vm_stats_io_active_threads:%lu\r\n"
6933 "vm_stats_blocked_clients:%lu\r\n"
6934 ,(unsigned long long) server
.vm_max_memory
,
6935 (unsigned long long) server
.vm_page_size
,
6936 (unsigned long long) server
.vm_pages
,
6937 (unsigned long long) server
.vm_stats_used_pages
,
6938 (unsigned long long) server
.vm_stats_swapped_objects
,
6939 (unsigned long long) server
.vm_stats_swapins
,
6940 (unsigned long long) server
.vm_stats_swapouts
,
6941 (unsigned long) listLength(server
.io_newjobs
),
6942 (unsigned long) listLength(server
.io_processing
),
6943 (unsigned long) listLength(server
.io_processed
),
6944 (unsigned long) server
.io_active_threads
,
6945 (unsigned long) server
.vm_blocked_clients
6949 for (j
= 0; j
< server
.dbnum
; j
++) {
6950 long long keys
, vkeys
;
6952 keys
= dictSize(server
.db
[j
].dict
);
6953 vkeys
= dictSize(server
.db
[j
].expires
);
6954 if (keys
|| vkeys
) {
6955 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6962 static void infoCommand(redisClient
*c
) {
6963 sds info
= genRedisInfoString();
6964 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6965 (unsigned long)sdslen(info
)));
6966 addReplySds(c
,info
);
6967 addReply(c
,shared
.crlf
);
6970 static void monitorCommand(redisClient
*c
) {
6971 /* ignore MONITOR if aleady slave or in monitor mode */
6972 if (c
->flags
& REDIS_SLAVE
) return;
6974 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6976 listAddNodeTail(server
.monitors
,c
);
6977 addReply(c
,shared
.ok
);
6980 /* ================================= Expire ================================= */
6981 static int removeExpire(redisDb
*db
, robj
*key
) {
6982 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6989 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6990 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
6998 /* Return the expire time of the specified key, or -1 if no expire
6999 * is associated with this key (i.e. the key is non volatile) */
7000 static time_t getExpire(redisDb
*db
, robj
*key
) {
7003 /* No expire? return ASAP */
7004 if (dictSize(db
->expires
) == 0 ||
7005 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7007 return (time_t) dictGetEntryVal(de
);
7010 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7014 /* No expire? return ASAP */
7015 if (dictSize(db
->expires
) == 0 ||
7016 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7018 /* Lookup the expire */
7019 when
= (time_t) dictGetEntryVal(de
);
7020 if (time(NULL
) <= when
) return 0;
7022 /* Delete the key */
7023 dictDelete(db
->expires
,key
);
7024 server
.stat_expiredkeys
++;
7025 return dictDelete(db
->dict
,key
) == DICT_OK
;
7028 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7031 /* No expire? return ASAP */
7032 if (dictSize(db
->expires
) == 0 ||
7033 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7035 /* Delete the key */
7037 server
.stat_expiredkeys
++;
7038 dictDelete(db
->expires
,key
);
7039 return dictDelete(db
->dict
,key
) == DICT_OK
;
7042 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7046 if (getLongFromObject(c
, param
, &seconds
) != REDIS_OK
) return;
7050 de
= dictFind(c
->db
->dict
,key
);
7052 addReply(c
,shared
.czero
);
7056 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7057 addReply(c
, shared
.cone
);
7060 time_t when
= time(NULL
)+seconds
;
7061 if (setExpire(c
->db
,key
,when
)) {
7062 addReply(c
,shared
.cone
);
7065 addReply(c
,shared
.czero
);
7071 static void expireCommand(redisClient
*c
) {
7072 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7075 static void expireatCommand(redisClient
*c
) {
7076 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7079 static void ttlCommand(redisClient
*c
) {
7083 expire
= getExpire(c
->db
,c
->argv
[1]);
7085 ttl
= (int) (expire
-time(NULL
));
7086 if (ttl
< 0) ttl
= -1;
7088 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7091 /* ================================ MULTI/EXEC ============================== */
7093 /* Client state initialization for MULTI/EXEC */
7094 static void initClientMultiState(redisClient
*c
) {
7095 c
->mstate
.commands
= NULL
;
7096 c
->mstate
.count
= 0;
7099 /* Release all the resources associated with MULTI/EXEC state */
7100 static void freeClientMultiState(redisClient
*c
) {
7103 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7105 multiCmd
*mc
= c
->mstate
.commands
+j
;
7107 for (i
= 0; i
< mc
->argc
; i
++)
7108 decrRefCount(mc
->argv
[i
]);
7111 zfree(c
->mstate
.commands
);
7114 /* Add a new command into the MULTI commands queue */
7115 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7119 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7120 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7121 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7124 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7125 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7126 for (j
= 0; j
< c
->argc
; j
++)
7127 incrRefCount(mc
->argv
[j
]);
7131 static void multiCommand(redisClient
*c
) {
7132 c
->flags
|= REDIS_MULTI
;
7133 addReply(c
,shared
.ok
);
7136 static void discardCommand(redisClient
*c
) {
7137 if (!(c
->flags
& REDIS_MULTI
)) {
7138 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7142 freeClientMultiState(c
);
7143 initClientMultiState(c
);
7144 c
->flags
&= (~REDIS_MULTI
);
7145 addReply(c
,shared
.ok
);
7148 static void execCommand(redisClient
*c
) {
7153 if (!(c
->flags
& REDIS_MULTI
)) {
7154 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7158 orig_argv
= c
->argv
;
7159 orig_argc
= c
->argc
;
7160 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7161 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7162 c
->argc
= c
->mstate
.commands
[j
].argc
;
7163 c
->argv
= c
->mstate
.commands
[j
].argv
;
7164 call(c
,c
->mstate
.commands
[j
].cmd
);
7166 c
->argv
= orig_argv
;
7167 c
->argc
= orig_argc
;
7168 freeClientMultiState(c
);
7169 initClientMultiState(c
);
7170 c
->flags
&= (~REDIS_MULTI
);
7173 /* =========================== Blocking Operations ========================= */
7175 /* Currently Redis blocking operations support is limited to list POP ops,
7176 * so the current implementation is not fully generic, but it is also not
7177 * completely specific so it will not require a rewrite to support new
7178 * kind of blocking operations in the future.
7180 * Still it's important to note that list blocking operations can be already
7181 * used as a notification mechanism in order to implement other blocking
7182 * operations at application level, so there must be a very strong evidence
7183 * of usefulness and generality before new blocking operations are implemented.
7185 * This is how the current blocking POP works, we use BLPOP as example:
7186 * - If the user calls BLPOP and the key exists and contains a non empty list
7187 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7188 * if there is not to block.
7189 * - If instead BLPOP is called and the key does not exists or the list is
7190 * empty we need to block. In order to do so we remove the notification for
7191 * new data to read in the client socket (so that we'll not serve new
7192 * requests if the blocking request is not served). Also we put the client
7193 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7194 * blocking for this keys.
7195 * - If a PUSH operation against a key with blocked clients waiting is
7196 * performed, we serve the first in the list: basically instead to push
7197 * the new element inside the list we return it to the (first / oldest)
7198 * blocking client, unblock the client, and remove it form the list.
7200 * The above comment and the source code should be enough in order to understand
7201 * the implementation and modify / fix it later.
7204 /* Set a client in blocking mode for the specified key, with the specified
7206 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7211 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7212 c
->blockingkeysnum
= numkeys
;
7213 c
->blockingto
= timeout
;
7214 for (j
= 0; j
< numkeys
; j
++) {
7215 /* Add the key in the client structure, to map clients -> keys */
7216 c
->blockingkeys
[j
] = keys
[j
];
7217 incrRefCount(keys
[j
]);
7219 /* And in the other "side", to map keys -> clients */
7220 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7224 /* For every key we take a list of clients blocked for it */
7226 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7227 incrRefCount(keys
[j
]);
7228 assert(retval
== DICT_OK
);
7230 l
= dictGetEntryVal(de
);
7232 listAddNodeTail(l
,c
);
7234 /* Mark the client as a blocked client */
7235 c
->flags
|= REDIS_BLOCKED
;
7236 server
.blpop_blocked_clients
++;
7239 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7240 static void unblockClientWaitingData(redisClient
*c
) {
7245 assert(c
->blockingkeys
!= NULL
);
7246 /* The client may wait for multiple keys, so unblock it for every key. */
7247 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7248 /* Remove this client from the list of clients waiting for this key. */
7249 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7251 l
= dictGetEntryVal(de
);
7252 listDelNode(l
,listSearchKey(l
,c
));
7253 /* If the list is empty we need to remove it to avoid wasting memory */
7254 if (listLength(l
) == 0)
7255 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7256 decrRefCount(c
->blockingkeys
[j
]);
7258 /* Cleanup the client structure */
7259 zfree(c
->blockingkeys
);
7260 c
->blockingkeys
= NULL
;
7261 c
->flags
&= (~REDIS_BLOCKED
);
7262 server
.blpop_blocked_clients
--;
7263 /* We want to process data if there is some command waiting
7264 * in the input buffer. Note that this is safe even if
7265 * unblockClientWaitingData() gets called from freeClient() because
7266 * freeClient() will be smart enough to call this function
7267 * *after* c->querybuf was set to NULL. */
7268 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7271 /* This should be called from any function PUSHing into lists.
7272 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7273 * 'ele' is the element pushed.
7275 * If the function returns 0 there was no client waiting for a list push
7278 * If the function returns 1 there was a client waiting for a list push
7279 * against this key, the element was passed to this client thus it's not
7280 * needed to actually add it to the list and the caller should return asap. */
7281 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7282 struct dictEntry
*de
;
7283 redisClient
*receiver
;
7287 de
= dictFind(c
->db
->blockingkeys
,key
);
7288 if (de
== NULL
) return 0;
7289 l
= dictGetEntryVal(de
);
7292 receiver
= ln
->value
;
7294 addReplySds(receiver
,sdsnew("*2\r\n"));
7295 addReplyBulk(receiver
,key
);
7296 addReplyBulk(receiver
,ele
);
7297 unblockClientWaitingData(receiver
);
7301 /* Blocking RPOP/LPOP */
7302 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7307 for (j
= 1; j
< c
->argc
-1; j
++) {
7308 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7310 if (o
->type
!= REDIS_LIST
) {
7311 addReply(c
,shared
.wrongtypeerr
);
7314 list
*list
= o
->ptr
;
7315 if (listLength(list
) != 0) {
7316 /* If the list contains elements fall back to the usual
7317 * non-blocking POP operation */
7318 robj
*argv
[2], **orig_argv
;
7321 /* We need to alter the command arguments before to call
7322 * popGenericCommand() as the command takes a single key. */
7323 orig_argv
= c
->argv
;
7324 orig_argc
= c
->argc
;
7325 argv
[1] = c
->argv
[j
];
7329 /* Also the return value is different, we need to output
7330 * the multi bulk reply header and the key name. The
7331 * "real" command will add the last element (the value)
7332 * for us. If this souds like an hack to you it's just
7333 * because it is... */
7334 addReplySds(c
,sdsnew("*2\r\n"));
7335 addReplyBulk(c
,argv
[1]);
7336 popGenericCommand(c
,where
);
7338 /* Fix the client structure with the original stuff */
7339 c
->argv
= orig_argv
;
7340 c
->argc
= orig_argc
;
7346 /* If the list is empty or the key does not exists we must block */
7347 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7348 if (timeout
> 0) timeout
+= time(NULL
);
7349 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7352 static void blpopCommand(redisClient
*c
) {
7353 blockingPopGenericCommand(c
,REDIS_HEAD
);
7356 static void brpopCommand(redisClient
*c
) {
7357 blockingPopGenericCommand(c
,REDIS_TAIL
);
7360 /* =============================== Replication ============================= */
7362 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7363 ssize_t nwritten
, ret
= size
;
7364 time_t start
= time(NULL
);
7368 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7369 nwritten
= write(fd
,ptr
,size
);
7370 if (nwritten
== -1) return -1;
7374 if ((time(NULL
)-start
) > timeout
) {
7382 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7383 ssize_t nread
, totread
= 0;
7384 time_t start
= time(NULL
);
7388 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7389 nread
= read(fd
,ptr
,size
);
7390 if (nread
== -1) return -1;
7395 if ((time(NULL
)-start
) > timeout
) {
7403 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7410 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7413 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7424 static void syncCommand(redisClient
*c
) {
7425 /* ignore SYNC if aleady slave or in monitor mode */
7426 if (c
->flags
& REDIS_SLAVE
) return;
7428 /* SYNC can't be issued when the server has pending data to send to
7429 * the client about already issued commands. We need a fresh reply
7430 * buffer registering the differences between the BGSAVE and the current
7431 * dataset, so that we can copy to other slaves if needed. */
7432 if (listLength(c
->reply
) != 0) {
7433 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7437 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7438 /* Here we need to check if there is a background saving operation
7439 * in progress, or if it is required to start one */
7440 if (server
.bgsavechildpid
!= -1) {
7441 /* Ok a background save is in progress. Let's check if it is a good
7442 * one for replication, i.e. if there is another slave that is
7443 * registering differences since the server forked to save */
7448 listRewind(server
.slaves
,&li
);
7449 while((ln
= listNext(&li
))) {
7451 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7454 /* Perfect, the server is already registering differences for
7455 * another slave. Set the right state, and copy the buffer. */
7456 listRelease(c
->reply
);
7457 c
->reply
= listDup(slave
->reply
);
7458 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7459 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7461 /* No way, we need to wait for the next BGSAVE in order to
7462 * register differences */
7463 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7464 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7467 /* Ok we don't have a BGSAVE in progress, let's start one */
7468 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7469 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7470 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7471 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7474 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7477 c
->flags
|= REDIS_SLAVE
;
7479 listAddNodeTail(server
.slaves
,c
);
7483 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7484 redisClient
*slave
= privdata
;
7486 REDIS_NOTUSED(mask
);
7487 char buf
[REDIS_IOBUF_LEN
];
7488 ssize_t nwritten
, buflen
;
7490 if (slave
->repldboff
== 0) {
7491 /* Write the bulk write count before to transfer the DB. In theory here
7492 * we don't know how much room there is in the output buffer of the
7493 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7494 * operations) will never be smaller than the few bytes we need. */
7497 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7499 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7507 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7508 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7510 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7511 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7515 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7516 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7521 slave
->repldboff
+= nwritten
;
7522 if (slave
->repldboff
== slave
->repldbsize
) {
7523 close(slave
->repldbfd
);
7524 slave
->repldbfd
= -1;
7525 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7526 slave
->replstate
= REDIS_REPL_ONLINE
;
7527 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7528 sendReplyToClient
, slave
) == AE_ERR
) {
7532 addReplySds(slave
,sdsempty());
7533 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7537 /* This function is called at the end of every backgrond saving.
7538 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7539 * otherwise REDIS_ERR is passed to the function.
7541 * The goal of this function is to handle slaves waiting for a successful
7542 * background saving in order to perform non-blocking synchronization. */
7543 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7545 int startbgsave
= 0;
7548 listRewind(server
.slaves
,&li
);
7549 while((ln
= listNext(&li
))) {
7550 redisClient
*slave
= ln
->value
;
7552 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7554 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7555 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7556 struct redis_stat buf
;
7558 if (bgsaveerr
!= REDIS_OK
) {
7560 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7563 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7564 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7566 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7569 slave
->repldboff
= 0;
7570 slave
->repldbsize
= buf
.st_size
;
7571 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7572 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7573 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7580 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7583 listRewind(server
.slaves
,&li
);
7584 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7585 while((ln
= listNext(&li
))) {
7586 redisClient
*slave
= ln
->value
;
7588 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7595 static int syncWithMaster(void) {
7596 char buf
[1024], tmpfile
[256], authcmd
[1024];
7598 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7599 int dfd
, maxtries
= 5;
7602 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7607 /* AUTH with the master if required. */
7608 if(server
.masterauth
) {
7609 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7610 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7612 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7616 /* Read the AUTH result. */
7617 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7619 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7623 if (buf
[0] != '+') {
7625 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7630 /* Issue the SYNC command */
7631 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7633 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7637 /* Read the bulk write count */
7638 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7640 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7644 if (buf
[0] != '$') {
7646 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7649 dumpsize
= strtol(buf
+1,NULL
,10);
7650 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7651 /* Read the bulk write data on a temp file */
7653 snprintf(tmpfile
,256,
7654 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7655 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7656 if (dfd
!= -1) break;
7661 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7665 int nread
, nwritten
;
7667 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7669 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7675 nwritten
= write(dfd
,buf
,nread
);
7676 if (nwritten
== -1) {
7677 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7685 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7686 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7692 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7693 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7697 server
.master
= createClient(fd
);
7698 server
.master
->flags
|= REDIS_MASTER
;
7699 server
.master
->authenticated
= 1;
7700 server
.replstate
= REDIS_REPL_CONNECTED
;
7704 static void slaveofCommand(redisClient
*c
) {
7705 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7706 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7707 if (server
.masterhost
) {
7708 sdsfree(server
.masterhost
);
7709 server
.masterhost
= NULL
;
7710 if (server
.master
) freeClient(server
.master
);
7711 server
.replstate
= REDIS_REPL_NONE
;
7712 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7715 sdsfree(server
.masterhost
);
7716 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7717 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7718 if (server
.master
) freeClient(server
.master
);
7719 server
.replstate
= REDIS_REPL_CONNECT
;
7720 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7721 server
.masterhost
, server
.masterport
);
7723 addReply(c
,shared
.ok
);
7726 /* ============================ Maxmemory directive ======================== */
7728 /* Try to free one object form the pre-allocated objects free list.
7729 * This is useful under low mem conditions as by default we take 1 million
7730 * free objects allocated. On success REDIS_OK is returned, otherwise
7732 static int tryFreeOneObjectFromFreelist(void) {
7735 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7736 if (listLength(server
.objfreelist
)) {
7737 listNode
*head
= listFirst(server
.objfreelist
);
7738 o
= listNodeValue(head
);
7739 listDelNode(server
.objfreelist
,head
);
7740 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7744 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7749 /* This function gets called when 'maxmemory' is set on the config file to limit
7750 * the max memory used by the server, and we are out of memory.
7751 * This function will try to, in order:
7753 * - Free objects from the free list
7754 * - Try to remove keys with an EXPIRE set
7756 * It is not possible to free enough memory to reach used-memory < maxmemory
7757 * the server will start refusing commands that will enlarge even more the
7760 static void freeMemoryIfNeeded(void) {
7761 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7762 int j
, k
, freed
= 0;
7764 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7765 for (j
= 0; j
< server
.dbnum
; j
++) {
7767 robj
*minkey
= NULL
;
7768 struct dictEntry
*de
;
7770 if (dictSize(server
.db
[j
].expires
)) {
7772 /* From a sample of three keys drop the one nearest to
7773 * the natural expire */
7774 for (k
= 0; k
< 3; k
++) {
7777 de
= dictGetRandomKey(server
.db
[j
].expires
);
7778 t
= (time_t) dictGetEntryVal(de
);
7779 if (minttl
== -1 || t
< minttl
) {
7780 minkey
= dictGetEntryKey(de
);
7784 deleteKey(server
.db
+j
,minkey
);
7787 if (!freed
) return; /* nothing to free... */
7791 /* ============================== Append Only file ========================== */
7793 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7794 sds buf
= sdsempty();
7800 /* The DB this command was targetting is not the same as the last command
7801 * we appendend. To issue a SELECT command is needed. */
7802 if (dictid
!= server
.appendseldb
) {
7805 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7806 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7807 (unsigned long)strlen(seldb
),seldb
);
7808 server
.appendseldb
= dictid
;
7811 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7812 * EXPIREs into EXPIREATs calls */
7813 if (cmd
->proc
== expireCommand
) {
7816 tmpargv
[0] = createStringObject("EXPIREAT",8);
7817 tmpargv
[1] = argv
[1];
7818 incrRefCount(argv
[1]);
7819 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7820 tmpargv
[2] = createObject(REDIS_STRING
,
7821 sdscatprintf(sdsempty(),"%ld",when
));
7825 /* Append the actual command */
7826 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7827 for (j
= 0; j
< argc
; j
++) {
7830 o
= getDecodedObject(o
);
7831 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7832 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7833 buf
= sdscatlen(buf
,"\r\n",2);
7837 /* Free the objects from the modified argv for EXPIREAT */
7838 if (cmd
->proc
== expireCommand
) {
7839 for (j
= 0; j
< 3; j
++)
7840 decrRefCount(argv
[j
]);
7843 /* We want to perform a single write. This should be guaranteed atomic
7844 * at least if the filesystem we are writing is a real physical one.
7845 * While this will save us against the server being killed I don't think
7846 * there is much to do about the whole server stopping for power problems
7848 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7849 if (nwritten
!= (signed)sdslen(buf
)) {
7850 /* Ooops, we are in troubles. The best thing to do for now is
7851 * to simply exit instead to give the illusion that everything is
7852 * working as expected. */
7853 if (nwritten
== -1) {
7854 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7856 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7860 /* If a background append only file rewriting is in progress we want to
7861 * accumulate the differences between the child DB and the current one
7862 * in a buffer, so that when the child process will do its work we
7863 * can append the differences to the new append only file. */
7864 if (server
.bgrewritechildpid
!= -1)
7865 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7869 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7870 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7871 now
-server
.lastfsync
> 1))
7873 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7874 server
.lastfsync
= now
;
7878 /* In Redis commands are always executed in the context of a client, so in
7879 * order to load the append only file we need to create a fake client. */
7880 static struct redisClient
*createFakeClient(void) {
7881 struct redisClient
*c
= zmalloc(sizeof(*c
));
7885 c
->querybuf
= sdsempty();
7889 /* We set the fake client as a slave waiting for the synchronization
7890 * so that Redis will not try to send replies to this client. */
7891 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7892 c
->reply
= listCreate();
7893 listSetFreeMethod(c
->reply
,decrRefCount
);
7894 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7898 static void freeFakeClient(struct redisClient
*c
) {
7899 sdsfree(c
->querybuf
);
7900 listRelease(c
->reply
);
7904 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7905 * error (the append only file is zero-length) REDIS_ERR is returned. On
7906 * fatal error an error message is logged and the program exists. */
7907 int loadAppendOnlyFile(char *filename
) {
7908 struct redisClient
*fakeClient
;
7909 FILE *fp
= fopen(filename
,"r");
7910 struct redis_stat sb
;
7911 unsigned long long loadedkeys
= 0;
7913 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7917 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7921 fakeClient
= createFakeClient();
7928 struct redisCommand
*cmd
;
7930 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7936 if (buf
[0] != '*') goto fmterr
;
7938 argv
= zmalloc(sizeof(robj
*)*argc
);
7939 for (j
= 0; j
< argc
; j
++) {
7940 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7941 if (buf
[0] != '$') goto fmterr
;
7942 len
= strtol(buf
+1,NULL
,10);
7943 argsds
= sdsnewlen(NULL
,len
);
7944 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7945 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7946 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7949 /* Command lookup */
7950 cmd
= lookupCommand(argv
[0]->ptr
);
7952 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7955 /* Try object encoding */
7956 if (cmd
->flags
& REDIS_CMD_BULK
)
7957 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
7958 /* Run the command in the context of a fake client */
7959 fakeClient
->argc
= argc
;
7960 fakeClient
->argv
= argv
;
7961 cmd
->proc(fakeClient
);
7962 /* Discard the reply objects list from the fake client */
7963 while(listLength(fakeClient
->reply
))
7964 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7965 /* Clean up, ready for the next command */
7966 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7968 /* Handle swapping while loading big datasets when VM is on */
7970 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7971 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7972 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7977 freeFakeClient(fakeClient
);
7982 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7984 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7988 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7992 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7993 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
7997 /* Avoid the incr/decr ref count business if possible to help
7998 * copy-on-write (we are often in a child process when this function
8000 * Also makes sure that key objects don't get incrRefCount-ed when VM
8002 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8003 obj
= getDecodedObject(obj
);
8006 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8007 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8008 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8010 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8011 if (decrrc
) decrRefCount(obj
);
8014 if (decrrc
) decrRefCount(obj
);
8018 /* Write binary-safe string into a file in the bulkformat
8019 * $<count>\r\n<payload>\r\n */
8020 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8023 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8024 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8025 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8026 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8030 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8031 static int fwriteBulkDouble(FILE *fp
, double d
) {
8032 char buf
[128], dbuf
[128];
8034 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8035 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8036 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8037 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8041 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8042 static int fwriteBulkLong(FILE *fp
, long l
) {
8043 char buf
[128], lbuf
[128];
8045 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8046 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8047 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8048 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8052 /* Write a sequence of commands able to fully rebuild the dataset into
8053 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8054 static int rewriteAppendOnlyFile(char *filename
) {
8055 dictIterator
*di
= NULL
;
8060 time_t now
= time(NULL
);
8062 /* Note that we have to use a different temp name here compared to the
8063 * one used by rewriteAppendOnlyFileBackground() function. */
8064 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8065 fp
= fopen(tmpfile
,"w");
8067 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8070 for (j
= 0; j
< server
.dbnum
; j
++) {
8071 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8072 redisDb
*db
= server
.db
+j
;
8074 if (dictSize(d
) == 0) continue;
8075 di
= dictGetIterator(d
);
8081 /* SELECT the new DB */
8082 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8083 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8085 /* Iterate this DB writing every entry */
8086 while((de
= dictNext(di
)) != NULL
) {
8091 key
= dictGetEntryKey(de
);
8092 /* If the value for this key is swapped, load a preview in memory.
8093 * We use a "swapped" flag to remember if we need to free the
8094 * value object instead to just increment the ref count anyway
8095 * in order to avoid copy-on-write of pages if we are forked() */
8096 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8097 key
->storage
== REDIS_VM_SWAPPING
) {
8098 o
= dictGetEntryVal(de
);
8101 o
= vmPreviewObject(key
);
8104 expiretime
= getExpire(db
,key
);
8106 /* Save the key and associated value */
8107 if (o
->type
== REDIS_STRING
) {
8108 /* Emit a SET command */
8109 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8110 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8112 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8113 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8114 } else if (o
->type
== REDIS_LIST
) {
8115 /* Emit the RPUSHes needed to rebuild the list */
8116 list
*list
= o
->ptr
;
8120 listRewind(list
,&li
);
8121 while((ln
= listNext(&li
))) {
8122 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8123 robj
*eleobj
= listNodeValue(ln
);
8125 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8126 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8127 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8129 } else if (o
->type
== REDIS_SET
) {
8130 /* Emit the SADDs needed to rebuild the set */
8132 dictIterator
*di
= dictGetIterator(set
);
8135 while((de
= dictNext(di
)) != NULL
) {
8136 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8137 robj
*eleobj
= dictGetEntryKey(de
);
8139 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8140 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8141 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8143 dictReleaseIterator(di
);
8144 } else if (o
->type
== REDIS_ZSET
) {
8145 /* Emit the ZADDs needed to rebuild the sorted set */
8147 dictIterator
*di
= dictGetIterator(zs
->dict
);
8150 while((de
= dictNext(di
)) != NULL
) {
8151 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8152 robj
*eleobj
= dictGetEntryKey(de
);
8153 double *score
= dictGetEntryVal(de
);
8155 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8156 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8157 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8158 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8160 dictReleaseIterator(di
);
8161 } else if (o
->type
== REDIS_HASH
) {
8162 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8164 /* Emit the HSETs needed to rebuild the hash */
8165 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8166 unsigned char *p
= zipmapRewind(o
->ptr
);
8167 unsigned char *field
, *val
;
8168 unsigned int flen
, vlen
;
8170 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8171 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8172 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8173 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8175 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8179 dictIterator
*di
= dictGetIterator(o
->ptr
);
8182 while((de
= dictNext(di
)) != NULL
) {
8183 robj
*field
= dictGetEntryKey(de
);
8184 robj
*val
= dictGetEntryVal(de
);
8186 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8187 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8188 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8189 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8191 dictReleaseIterator(di
);
8196 /* Save the expire time */
8197 if (expiretime
!= -1) {
8198 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8199 /* If this key is already expired skip it */
8200 if (expiretime
< now
) continue;
8201 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8202 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8203 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8205 if (swapped
) decrRefCount(o
);
8207 dictReleaseIterator(di
);
8210 /* Make sure data will not remain on the OS's output buffers */
8215 /* Use RENAME to make sure the DB file is changed atomically only
8216 * if the generate DB file is ok. */
8217 if (rename(tmpfile
,filename
) == -1) {
8218 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8222 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8228 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8229 if (di
) dictReleaseIterator(di
);
8233 /* This is how rewriting of the append only file in background works:
8235 * 1) The user calls BGREWRITEAOF
8236 * 2) Redis calls this function, that forks():
8237 * 2a) the child rewrite the append only file in a temp file.
8238 * 2b) the parent accumulates differences in server.bgrewritebuf.
8239 * 3) When the child finished '2a' exists.
8240 * 4) The parent will trap the exit code, if it's OK, will append the
8241 * data accumulated into server.bgrewritebuf into the temp file, and
8242 * finally will rename(2) the temp file in the actual file name.
8243 * The the new file is reopened as the new append only file. Profit!
8245 static int rewriteAppendOnlyFileBackground(void) {
8248 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8249 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8250 if ((childpid
= fork()) == 0) {
8254 if (server
.vm_enabled
) vmReopenSwapFile();
8256 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8257 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8264 if (childpid
== -1) {
8265 redisLog(REDIS_WARNING
,
8266 "Can't rewrite append only file in background: fork: %s",
8270 redisLog(REDIS_NOTICE
,
8271 "Background append only file rewriting started by pid %d",childpid
);
8272 server
.bgrewritechildpid
= childpid
;
8273 updateDictResizePolicy();
8274 /* We set appendseldb to -1 in order to force the next call to the
8275 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8276 * accumulated by the parent into server.bgrewritebuf will start
8277 * with a SELECT statement and it will be safe to merge. */
8278 server
.appendseldb
= -1;
8281 return REDIS_OK
; /* unreached */
8284 static void bgrewriteaofCommand(redisClient
*c
) {
8285 if (server
.bgrewritechildpid
!= -1) {
8286 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8289 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8290 char *status
= "+Background append only file rewriting started\r\n";
8291 addReplySds(c
,sdsnew(status
));
8293 addReply(c
,shared
.err
);
8297 static void aofRemoveTempFile(pid_t childpid
) {
8300 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8304 /* Virtual Memory is composed mainly of two subsystems:
8305 * - Blocking Virutal Memory
8306 * - Threaded Virtual Memory I/O
8307 * The two parts are not fully decoupled, but functions are split among two
8308 * different sections of the source code (delimited by comments) in order to
8309 * make more clear what functionality is about the blocking VM and what about
8310 * the threaded (not blocking) VM.
8314 * Redis VM is a blocking VM (one that blocks reading swapped values from
8315 * disk into memory when a value swapped out is needed in memory) that is made
8316 * unblocking by trying to examine the command argument vector in order to
8317 * load in background values that will likely be needed in order to exec
8318 * the command. The command is executed only once all the relevant keys
8319 * are loaded into memory.
8321 * This basically is almost as simple of a blocking VM, but almost as parallel
8322 * as a fully non-blocking VM.
8325 /* =================== Virtual Memory - Blocking Side ====================== */
8327 /* substitute the first occurrence of '%p' with the process pid in the
8328 * swap file name. */
8329 static void expandVmSwapFilename(void) {
8330 char *p
= strstr(server
.vm_swap_file
,"%p");
8336 new = sdscat(new,server
.vm_swap_file
);
8337 new = sdscatprintf(new,"%ld",(long) getpid());
8338 new = sdscat(new,p
+2);
8339 zfree(server
.vm_swap_file
);
8340 server
.vm_swap_file
= new;
8343 static void vmInit(void) {
8348 if (server
.vm_max_threads
!= 0)
8349 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8351 expandVmSwapFilename();
8352 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8353 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8354 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8356 if (server
.vm_fp
== NULL
) {
8357 redisLog(REDIS_WARNING
,
8358 "Impossible to open the swap file: %s. Exiting.",
8362 server
.vm_fd
= fileno(server
.vm_fp
);
8363 server
.vm_next_page
= 0;
8364 server
.vm_near_pages
= 0;
8365 server
.vm_stats_used_pages
= 0;
8366 server
.vm_stats_swapped_objects
= 0;
8367 server
.vm_stats_swapouts
= 0;
8368 server
.vm_stats_swapins
= 0;
8369 totsize
= server
.vm_pages
*server
.vm_page_size
;
8370 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8371 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8372 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8376 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8378 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8379 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8380 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8381 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8383 /* Initialize threaded I/O (used by Virtual Memory) */
8384 server
.io_newjobs
= listCreate();
8385 server
.io_processing
= listCreate();
8386 server
.io_processed
= listCreate();
8387 server
.io_ready_clients
= listCreate();
8388 pthread_mutex_init(&server
.io_mutex
,NULL
);
8389 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8390 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8391 server
.io_active_threads
= 0;
8392 if (pipe(pipefds
) == -1) {
8393 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8397 server
.io_ready_pipe_read
= pipefds
[0];
8398 server
.io_ready_pipe_write
= pipefds
[1];
8399 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8400 /* LZF requires a lot of stack */
8401 pthread_attr_init(&server
.io_threads_attr
);
8402 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8403 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8404 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8405 /* Listen for events in the threaded I/O pipe */
8406 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8407 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8408 oom("creating file event");
8411 /* Mark the page as used */
8412 static void vmMarkPageUsed(off_t page
) {
8413 off_t byte
= page
/8;
8415 redisAssert(vmFreePage(page
) == 1);
8416 server
.vm_bitmap
[byte
] |= 1<<bit
;
8419 /* Mark N contiguous pages as used, with 'page' being the first. */
8420 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8423 for (j
= 0; j
< count
; j
++)
8424 vmMarkPageUsed(page
+j
);
8425 server
.vm_stats_used_pages
+= count
;
8426 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8427 (long long)count
, (long long)page
);
8430 /* Mark the page as free */
8431 static void vmMarkPageFree(off_t page
) {
8432 off_t byte
= page
/8;
8434 redisAssert(vmFreePage(page
) == 0);
8435 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8438 /* Mark N contiguous pages as free, with 'page' being the first. */
8439 static void vmMarkPagesFree(off_t page
, off_t count
) {
8442 for (j
= 0; j
< count
; j
++)
8443 vmMarkPageFree(page
+j
);
8444 server
.vm_stats_used_pages
-= count
;
8445 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8446 (long long)count
, (long long)page
);
8449 /* Test if the page is free */
8450 static int vmFreePage(off_t page
) {
8451 off_t byte
= page
/8;
8453 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8456 /* Find N contiguous free pages storing the first page of the cluster in *first.
8457 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8458 * REDIS_ERR is returned.
8460 * This function uses a simple algorithm: we try to allocate
8461 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8462 * again from the start of the swap file searching for free spaces.
8464 * If it looks pretty clear that there are no free pages near our offset
8465 * we try to find less populated places doing a forward jump of
8466 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8467 * without hurry, and then we jump again and so forth...
8469 * This function can be improved using a free list to avoid to guess
8470 * too much, since we could collect data about freed pages.
8472 * note: I implemented this function just after watching an episode of
8473 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8475 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8476 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8478 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8479 server
.vm_near_pages
= 0;
8480 server
.vm_next_page
= 0;
8482 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8483 base
= server
.vm_next_page
;
8485 while(offset
< server
.vm_pages
) {
8486 off_t
this = base
+offset
;
8488 /* If we overflow, restart from page zero */
8489 if (this >= server
.vm_pages
) {
8490 this -= server
.vm_pages
;
8492 /* Just overflowed, what we found on tail is no longer
8493 * interesting, as it's no longer contiguous. */
8497 if (vmFreePage(this)) {
8498 /* This is a free page */
8500 /* Already got N free pages? Return to the caller, with success */
8502 *first
= this-(n
-1);
8503 server
.vm_next_page
= this+1;
8504 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8508 /* The current one is not a free page */
8512 /* Fast-forward if the current page is not free and we already
8513 * searched enough near this place. */
8515 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8516 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8518 /* Note that even if we rewind after the jump, we are don't need
8519 * to make sure numfree is set to zero as we only jump *if* it
8520 * is set to zero. */
8522 /* Otherwise just check the next page */
8529 /* Write the specified object at the specified page of the swap file */
8530 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8531 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8532 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8533 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8534 redisLog(REDIS_WARNING
,
8535 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8539 rdbSaveObject(server
.vm_fp
,o
);
8540 fflush(server
.vm_fp
);
8541 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8545 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8546 * needed to later retrieve the object into the key object.
8547 * If we can't find enough contiguous empty pages to swap the object on disk
8548 * REDIS_ERR is returned. */
8549 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8550 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8553 assert(key
->storage
== REDIS_VM_MEMORY
);
8554 assert(key
->refcount
== 1);
8555 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8556 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8557 key
->vm
.page
= page
;
8558 key
->vm
.usedpages
= pages
;
8559 key
->storage
= REDIS_VM_SWAPPED
;
8560 key
->vtype
= val
->type
;
8561 decrRefCount(val
); /* Deallocate the object from memory. */
8562 vmMarkPagesUsed(page
,pages
);
8563 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8564 (unsigned char*) key
->ptr
,
8565 (unsigned long long) page
, (unsigned long long) pages
);
8566 server
.vm_stats_swapped_objects
++;
8567 server
.vm_stats_swapouts
++;
8571 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8574 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8575 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8576 redisLog(REDIS_WARNING
,
8577 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8581 o
= rdbLoadObject(type
,server
.vm_fp
);
8583 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8586 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8590 /* Load the value object relative to the 'key' object from swap to memory.
8591 * The newly allocated object is returned.
8593 * If preview is true the unserialized object is returned to the caller but
8594 * no changes are made to the key object, nor the pages are marked as freed */
8595 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8598 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8599 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8601 key
->storage
= REDIS_VM_MEMORY
;
8602 key
->vm
.atime
= server
.unixtime
;
8603 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8604 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8605 (unsigned char*) key
->ptr
);
8606 server
.vm_stats_swapped_objects
--;
8608 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8609 (unsigned char*) key
->ptr
);
8611 server
.vm_stats_swapins
++;
8615 /* Plain object loading, from swap to memory */
8616 static robj
*vmLoadObject(robj
*key
) {
8617 /* If we are loading the object in background, stop it, we
8618 * need to load this object synchronously ASAP. */
8619 if (key
->storage
== REDIS_VM_LOADING
)
8620 vmCancelThreadedIOJob(key
);
8621 return vmGenericLoadObject(key
,0);
8624 /* Just load the value on disk, without to modify the key.
8625 * This is useful when we want to perform some operation on the value
8626 * without to really bring it from swap to memory, like while saving the
8627 * dataset or rewriting the append only log. */
8628 static robj
*vmPreviewObject(robj
*key
) {
8629 return vmGenericLoadObject(key
,1);
8632 /* How a good candidate is this object for swapping?
8633 * The better candidate it is, the greater the returned value.
8635 * Currently we try to perform a fast estimation of the object size in
8636 * memory, and combine it with aging informations.
8638 * Basically swappability = idle-time * log(estimated size)
8640 * Bigger objects are preferred over smaller objects, but not
8641 * proportionally, this is why we use the logarithm. This algorithm is
8642 * just a first try and will probably be tuned later. */
8643 static double computeObjectSwappability(robj
*o
) {
8644 time_t age
= server
.unixtime
- o
->vm
.atime
;
8648 struct dictEntry
*de
;
8651 if (age
<= 0) return 0;
8654 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8657 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8662 listNode
*ln
= listFirst(l
);
8664 asize
= sizeof(list
);
8666 robj
*ele
= ln
->value
;
8669 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8670 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8672 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8677 z
= (o
->type
== REDIS_ZSET
);
8678 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8680 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8681 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8686 de
= dictGetRandomKey(d
);
8687 ele
= dictGetEntryKey(de
);
8688 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8689 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8691 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8692 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8696 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8697 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8698 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8699 unsigned int klen
, vlen
;
8700 unsigned char *key
, *val
;
8702 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8706 asize
= len
*(klen
+vlen
+3);
8707 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8709 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8714 de
= dictGetRandomKey(d
);
8715 ele
= dictGetEntryKey(de
);
8716 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8717 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8719 ele
= dictGetEntryVal(de
);
8720 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8721 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8723 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8728 return (double)age
*log(1+asize
);
8731 /* Try to swap an object that's a good candidate for swapping.
8732 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8733 * to swap any object at all.
8735 * If 'usethreaded' is true, Redis will try to swap the object in background
8736 * using I/O threads. */
8737 static int vmSwapOneObject(int usethreads
) {
8739 struct dictEntry
*best
= NULL
;
8740 double best_swappability
= 0;
8741 redisDb
*best_db
= NULL
;
8744 for (j
= 0; j
< server
.dbnum
; j
++) {
8745 redisDb
*db
= server
.db
+j
;
8746 /* Why maxtries is set to 100?
8747 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8748 * are swappable objects */
8751 if (dictSize(db
->dict
) == 0) continue;
8752 for (i
= 0; i
< 5; i
++) {
8754 double swappability
;
8756 if (maxtries
) maxtries
--;
8757 de
= dictGetRandomKey(db
->dict
);
8758 key
= dictGetEntryKey(de
);
8759 val
= dictGetEntryVal(de
);
8760 /* Only swap objects that are currently in memory.
8762 * Also don't swap shared objects if threaded VM is on, as we
8763 * try to ensure that the main thread does not touch the
8764 * object while the I/O thread is using it, but we can't
8765 * control other keys without adding additional mutex. */
8766 if (key
->storage
!= REDIS_VM_MEMORY
||
8767 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8768 if (maxtries
) i
--; /* don't count this try */
8771 swappability
= computeObjectSwappability(val
);
8772 if (!best
|| swappability
> best_swappability
) {
8774 best_swappability
= swappability
;
8779 if (best
== NULL
) return REDIS_ERR
;
8780 key
= dictGetEntryKey(best
);
8781 val
= dictGetEntryVal(best
);
8783 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8784 key
->ptr
, best_swappability
);
8786 /* Unshare the key if needed */
8787 if (key
->refcount
> 1) {
8788 robj
*newkey
= dupStringObject(key
);
8790 key
= dictGetEntryKey(best
) = newkey
;
8794 vmSwapObjectThreaded(key
,val
,best_db
);
8797 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8798 dictGetEntryVal(best
) = NULL
;
8806 static int vmSwapOneObjectBlocking() {
8807 return vmSwapOneObject(0);
8810 static int vmSwapOneObjectThreaded() {
8811 return vmSwapOneObject(1);
8814 /* Return true if it's safe to swap out objects in a given moment.
8815 * Basically we don't want to swap objects out while there is a BGSAVE
8816 * or a BGAEOREWRITE running in backgroud. */
8817 static int vmCanSwapOut(void) {
8818 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8821 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8822 * and was deleted. Otherwise 0 is returned. */
8823 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8827 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8828 foundkey
= dictGetEntryKey(de
);
8829 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8834 /* =================== Virtual Memory - Threaded I/O ======================= */
8836 static void freeIOJob(iojob
*j
) {
8837 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8838 j
->type
== REDIS_IOJOB_DO_SWAP
||
8839 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8840 decrRefCount(j
->val
);
8841 /* We don't decrRefCount the j->key field as we did't incremented
8842 * the count creating IO Jobs. This is because the key field here is
8843 * just used as an indentifier and if a key is removed the Job should
8844 * never be touched again. */
8848 /* Every time a thread finished a Job, it writes a byte into the write side
8849 * of an unix pipe in order to "awake" the main thread, and this function
8851 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8855 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8857 REDIS_NOTUSED(mask
);
8858 REDIS_NOTUSED(privdata
);
8860 /* For every byte we read in the read side of the pipe, there is one
8861 * I/O job completed to process. */
8862 while((retval
= read(fd
,buf
,1)) == 1) {
8866 struct dictEntry
*de
;
8868 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8870 /* Get the processed element (the oldest one) */
8872 assert(listLength(server
.io_processed
) != 0);
8873 if (toprocess
== -1) {
8874 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8875 if (toprocess
<= 0) toprocess
= 1;
8877 ln
= listFirst(server
.io_processed
);
8879 listDelNode(server
.io_processed
,ln
);
8881 /* If this job is marked as canceled, just ignore it */
8886 /* Post process it in the main thread, as there are things we
8887 * can do just here to avoid race conditions and/or invasive locks */
8888 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8889 de
= dictFind(j
->db
->dict
,j
->key
);
8891 key
= dictGetEntryKey(de
);
8892 if (j
->type
== REDIS_IOJOB_LOAD
) {
8895 /* Key loaded, bring it at home */
8896 key
->storage
= REDIS_VM_MEMORY
;
8897 key
->vm
.atime
= server
.unixtime
;
8898 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8899 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8900 (unsigned char*) key
->ptr
);
8901 server
.vm_stats_swapped_objects
--;
8902 server
.vm_stats_swapins
++;
8903 dictGetEntryVal(de
) = j
->val
;
8904 incrRefCount(j
->val
);
8907 /* Handle clients waiting for this key to be loaded. */
8908 handleClientsBlockedOnSwappedKey(db
,key
);
8909 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8910 /* Now we know the amount of pages required to swap this object.
8911 * Let's find some space for it, and queue this task again
8912 * rebranded as REDIS_IOJOB_DO_SWAP. */
8913 if (!vmCanSwapOut() ||
8914 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8916 /* Ooops... no space or we can't swap as there is
8917 * a fork()ed Redis trying to save stuff on disk. */
8919 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8921 /* Note that we need to mark this pages as used now,
8922 * if the job will be canceled, we'll mark them as freed
8924 vmMarkPagesUsed(j
->page
,j
->pages
);
8925 j
->type
= REDIS_IOJOB_DO_SWAP
;
8930 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8933 /* Key swapped. We can finally free some memory. */
8934 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8935 printf("key->storage: %d\n",key
->storage
);
8936 printf("key->name: %s\n",(char*)key
->ptr
);
8937 printf("key->refcount: %d\n",key
->refcount
);
8938 printf("val: %p\n",(void*)j
->val
);
8939 printf("val->type: %d\n",j
->val
->type
);
8940 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8942 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8943 val
= dictGetEntryVal(de
);
8944 key
->vm
.page
= j
->page
;
8945 key
->vm
.usedpages
= j
->pages
;
8946 key
->storage
= REDIS_VM_SWAPPED
;
8947 key
->vtype
= j
->val
->type
;
8948 decrRefCount(val
); /* Deallocate the object from memory. */
8949 dictGetEntryVal(de
) = NULL
;
8950 redisLog(REDIS_DEBUG
,
8951 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8952 (unsigned char*) key
->ptr
,
8953 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8954 server
.vm_stats_swapped_objects
++;
8955 server
.vm_stats_swapouts
++;
8957 /* Put a few more swap requests in queue if we are still
8959 if (trytoswap
&& vmCanSwapOut() &&
8960 zmalloc_used_memory() > server
.vm_max_memory
)
8965 more
= listLength(server
.io_newjobs
) <
8966 (unsigned) server
.vm_max_threads
;
8968 /* Don't waste CPU time if swappable objects are rare. */
8969 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8977 if (processed
== toprocess
) return;
8979 if (retval
< 0 && errno
!= EAGAIN
) {
8980 redisLog(REDIS_WARNING
,
8981 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8986 static void lockThreadedIO(void) {
8987 pthread_mutex_lock(&server
.io_mutex
);
8990 static void unlockThreadedIO(void) {
8991 pthread_mutex_unlock(&server
.io_mutex
);
8994 /* Remove the specified object from the threaded I/O queue if still not
8995 * processed, otherwise make sure to flag it as canceled. */
8996 static void vmCancelThreadedIOJob(robj
*o
) {
8998 server
.io_newjobs
, /* 0 */
8999 server
.io_processing
, /* 1 */
9000 server
.io_processed
/* 2 */
9004 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9007 /* Search for a matching key in one of the queues */
9008 for (i
= 0; i
< 3; i
++) {
9012 listRewind(lists
[i
],&li
);
9013 while ((ln
= listNext(&li
)) != NULL
) {
9014 iojob
*job
= ln
->value
;
9016 if (job
->canceled
) continue; /* Skip this, already canceled. */
9017 if (job
->key
== o
) {
9018 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9019 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9020 /* Mark the pages as free since the swap didn't happened
9021 * or happened but is now discarded. */
9022 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9023 vmMarkPagesFree(job
->page
,job
->pages
);
9024 /* Cancel the job. It depends on the list the job is
9027 case 0: /* io_newjobs */
9028 /* If the job was yet not processed the best thing to do
9029 * is to remove it from the queue at all */
9031 listDelNode(lists
[i
],ln
);
9033 case 1: /* io_processing */
9034 /* Oh Shi- the thread is messing with the Job:
9036 * Probably it's accessing the object if this is a
9037 * PREPARE_SWAP or DO_SWAP job.
9038 * If it's a LOAD job it may be reading from disk and
9039 * if we don't wait for the job to terminate before to
9040 * cancel it, maybe in a few microseconds data can be
9041 * corrupted in this pages. So the short story is:
9043 * Better to wait for the job to move into the
9044 * next queue (processed)... */
9046 /* We try again and again until the job is completed. */
9048 /* But let's wait some time for the I/O thread
9049 * to finish with this job. After all this condition
9050 * should be very rare. */
9053 case 2: /* io_processed */
9054 /* The job was already processed, that's easy...
9055 * just mark it as canceled so that we'll ignore it
9056 * when processing completed jobs. */
9060 /* Finally we have to adjust the storage type of the object
9061 * in order to "UNDO" the operaiton. */
9062 if (o
->storage
== REDIS_VM_LOADING
)
9063 o
->storage
= REDIS_VM_SWAPPED
;
9064 else if (o
->storage
== REDIS_VM_SWAPPING
)
9065 o
->storage
= REDIS_VM_MEMORY
;
9072 assert(1 != 1); /* We should never reach this */
9075 static void *IOThreadEntryPoint(void *arg
) {
9080 pthread_detach(pthread_self());
9082 /* Get a new job to process */
9084 if (listLength(server
.io_newjobs
) == 0) {
9085 /* No new jobs in queue, exit. */
9086 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9087 (long) pthread_self());
9088 server
.io_active_threads
--;
9092 ln
= listFirst(server
.io_newjobs
);
9094 listDelNode(server
.io_newjobs
,ln
);
9095 /* Add the job in the processing queue */
9096 j
->thread
= pthread_self();
9097 listAddNodeTail(server
.io_processing
,j
);
9098 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9100 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9101 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9103 /* Process the Job */
9104 if (j
->type
== REDIS_IOJOB_LOAD
) {
9105 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9106 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9107 FILE *fp
= fopen("/dev/null","w+");
9108 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9110 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9111 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9115 /* Done: insert the job into the processed queue */
9116 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9117 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9119 listDelNode(server
.io_processing
,ln
);
9120 listAddNodeTail(server
.io_processed
,j
);
9123 /* Signal the main thread there is new stuff to process */
9124 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9126 return NULL
; /* never reached */
9129 static void spawnIOThread(void) {
9131 sigset_t mask
, omask
;
9135 sigaddset(&mask
,SIGCHLD
);
9136 sigaddset(&mask
,SIGHUP
);
9137 sigaddset(&mask
,SIGPIPE
);
9138 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9139 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9140 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9144 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9145 server
.io_active_threads
++;
9148 /* We need to wait for the last thread to exit before we are able to
9149 * fork() in order to BGSAVE or BGREWRITEAOF. */
9150 static void waitEmptyIOJobsQueue(void) {
9152 int io_processed_len
;
9155 if (listLength(server
.io_newjobs
) == 0 &&
9156 listLength(server
.io_processing
) == 0 &&
9157 server
.io_active_threads
== 0)
9162 /* While waiting for empty jobs queue condition we post-process some
9163 * finshed job, as I/O threads may be hanging trying to write against
9164 * the io_ready_pipe_write FD but there are so much pending jobs that
9166 io_processed_len
= listLength(server
.io_processed
);
9168 if (io_processed_len
) {
9169 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9170 usleep(1000); /* 1 millisecond */
9172 usleep(10000); /* 10 milliseconds */
9177 static void vmReopenSwapFile(void) {
9178 /* Note: we don't close the old one as we are in the child process
9179 * and don't want to mess at all with the original file object. */
9180 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9181 if (server
.vm_fp
== NULL
) {
9182 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9183 server
.vm_swap_file
);
9186 server
.vm_fd
= fileno(server
.vm_fp
);
9189 /* This function must be called while with threaded IO locked */
9190 static void queueIOJob(iojob
*j
) {
9191 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9192 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9193 listAddNodeTail(server
.io_newjobs
,j
);
9194 if (server
.io_active_threads
< server
.vm_max_threads
)
9198 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9201 assert(key
->storage
== REDIS_VM_MEMORY
);
9202 assert(key
->refcount
== 1);
9204 j
= zmalloc(sizeof(*j
));
9205 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9211 j
->thread
= (pthread_t
) -1;
9212 key
->storage
= REDIS_VM_SWAPPING
;
9220 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9222 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9223 * If there is not already a job loading the key, it is craeted.
9224 * The key is added to the io_keys list in the client structure, and also
9225 * in the hash table mapping swapped keys to waiting clients, that is,
9226 * server.io_waited_keys. */
9227 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9228 struct dictEntry
*de
;
9232 /* If the key does not exist or is already in RAM we don't need to
9233 * block the client at all. */
9234 de
= dictFind(c
->db
->dict
,key
);
9235 if (de
== NULL
) return 0;
9236 o
= dictGetEntryKey(de
);
9237 if (o
->storage
== REDIS_VM_MEMORY
) {
9239 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9240 /* We were swapping the key, undo it! */
9241 vmCancelThreadedIOJob(o
);
9245 /* OK: the key is either swapped, or being loaded just now. */
9247 /* Add the key to the list of keys this client is waiting for.
9248 * This maps clients to keys they are waiting for. */
9249 listAddNodeTail(c
->io_keys
,key
);
9252 /* Add the client to the swapped keys => clients waiting map. */
9253 de
= dictFind(c
->db
->io_keys
,key
);
9257 /* For every key we take a list of clients blocked for it */
9259 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9261 assert(retval
== DICT_OK
);
9263 l
= dictGetEntryVal(de
);
9265 listAddNodeTail(l
,c
);
9267 /* Are we already loading the key from disk? If not create a job */
9268 if (o
->storage
== REDIS_VM_SWAPPED
) {
9271 o
->storage
= REDIS_VM_LOADING
;
9272 j
= zmalloc(sizeof(*j
));
9273 j
->type
= REDIS_IOJOB_LOAD
;
9276 j
->key
->vtype
= o
->vtype
;
9277 j
->page
= o
->vm
.page
;
9280 j
->thread
= (pthread_t
) -1;
9288 /* Preload keys needed for the ZUNION and ZINTER commands. */
9289 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9291 num
= atoi(c
->argv
[2]->ptr
);
9292 for (i
= 0; i
< num
; i
++) {
9293 waitForSwappedKey(c
,c
->argv
[3+i
]);
9297 /* Is this client attempting to run a command against swapped keys?
9298 * If so, block it ASAP, load the keys in background, then resume it.
9300 * The important idea about this function is that it can fail! If keys will
9301 * still be swapped when the client is resumed, this key lookups will
9302 * just block loading keys from disk. In practical terms this should only
9303 * happen with SORT BY command or if there is a bug in this function.
9305 * Return 1 if the client is marked as blocked, 0 if the client can
9306 * continue as the keys it is going to access appear to be in memory. */
9307 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9310 if (cmd
->vm_preload_proc
!= NULL
) {
9311 cmd
->vm_preload_proc(c
);
9313 if (cmd
->vm_firstkey
== 0) return 0;
9314 last
= cmd
->vm_lastkey
;
9315 if (last
< 0) last
= c
->argc
+last
;
9316 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9317 waitForSwappedKey(c
,c
->argv
[j
]);
9320 /* If the client was blocked for at least one key, mark it as blocked. */
9321 if (listLength(c
->io_keys
)) {
9322 c
->flags
|= REDIS_IO_WAIT
;
9323 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9324 server
.vm_blocked_clients
++;
9331 /* Remove the 'key' from the list of blocked keys for a given client.
9333 * The function returns 1 when there are no longer blocking keys after
9334 * the current one was removed (and the client can be unblocked). */
9335 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9339 struct dictEntry
*de
;
9341 /* Remove the key from the list of keys this client is waiting for. */
9342 listRewind(c
->io_keys
,&li
);
9343 while ((ln
= listNext(&li
)) != NULL
) {
9344 if (compareStringObjects(ln
->value
,key
) == 0) {
9345 listDelNode(c
->io_keys
,ln
);
9351 /* Remove the client form the key => waiting clients map. */
9352 de
= dictFind(c
->db
->io_keys
,key
);
9354 l
= dictGetEntryVal(de
);
9355 ln
= listSearchKey(l
,c
);
9358 if (listLength(l
) == 0)
9359 dictDelete(c
->db
->io_keys
,key
);
9361 return listLength(c
->io_keys
) == 0;
9364 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9365 struct dictEntry
*de
;
9370 de
= dictFind(db
->io_keys
,key
);
9373 l
= dictGetEntryVal(de
);
9374 len
= listLength(l
);
9375 /* Note: we can't use something like while(listLength(l)) as the list
9376 * can be freed by the calling function when we remove the last element. */
9379 redisClient
*c
= ln
->value
;
9381 if (dontWaitForSwappedKey(c
,key
)) {
9382 /* Put the client in the list of clients ready to go as we
9383 * loaded all the keys about it. */
9384 listAddNodeTail(server
.io_ready_clients
,c
);
9389 /* =========================== Remote Configuration ========================= */
9391 static void configSetCommand(redisClient
*c
) {
9392 robj
*o
= getDecodedObject(c
->argv
[3]);
9393 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9394 zfree(server
.dbfilename
);
9395 server
.dbfilename
= zstrdup(o
->ptr
);
9396 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9397 zfree(server
.requirepass
);
9398 server
.requirepass
= zstrdup(o
->ptr
);
9399 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9400 zfree(server
.masterauth
);
9401 server
.masterauth
= zstrdup(o
->ptr
);
9402 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9403 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9405 addReplySds(c
,sdscatprintf(sdsempty(),
9406 "-ERR not supported CONFIG parameter %s\r\n",
9407 (char*)c
->argv
[2]->ptr
));
9412 addReply(c
,shared
.ok
);
9415 static void configGetCommand(redisClient
*c
) {
9416 robj
*o
= getDecodedObject(c
->argv
[2]);
9417 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9418 char *pattern
= o
->ptr
;
9422 decrRefCount(lenobj
);
9424 if (stringmatch(pattern
,"dbfilename",0)) {
9425 addReplyBulkCString(c
,"dbfilename");
9426 addReplyBulkCString(c
,server
.dbfilename
);
9429 if (stringmatch(pattern
,"requirepass",0)) {
9430 addReplyBulkCString(c
,"requirepass");
9431 addReplyBulkCString(c
,server
.requirepass
);
9434 if (stringmatch(pattern
,"masterauth",0)) {
9435 addReplyBulkCString(c
,"masterauth");
9436 addReplyBulkCString(c
,server
.masterauth
);
9439 if (stringmatch(pattern
,"maxmemory",0)) {
9442 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9443 addReplyBulkCString(c
,"maxmemory");
9444 addReplyBulkCString(c
,buf
);
9448 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9451 static void configCommand(redisClient
*c
) {
9452 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9453 if (c
->argc
!= 4) goto badarity
;
9454 configSetCommand(c
);
9455 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9456 if (c
->argc
!= 3) goto badarity
;
9457 configGetCommand(c
);
9458 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9459 if (c
->argc
!= 2) goto badarity
;
9460 server
.stat_numcommands
= 0;
9461 server
.stat_numconnections
= 0;
9462 server
.stat_expiredkeys
= 0;
9463 server
.stat_starttime
= time(NULL
);
9464 addReply(c
,shared
.ok
);
9466 addReplySds(c
,sdscatprintf(sdsempty(),
9467 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9472 addReplySds(c
,sdscatprintf(sdsempty(),
9473 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9474 (char*) c
->argv
[1]->ptr
));
9477 /* =========================== Pubsub implementation ======================== */
9479 static void freePubsubPattern(void *p
) {
9480 pubsubPattern
*pat
= p
;
9482 decrRefCount(pat
->pattern
);
9486 static int listMatchPubsubPattern(void *a
, void *b
) {
9487 pubsubPattern
*pa
= a
, *pb
= b
;
9489 return (pa
->client
== pb
->client
) &&
9490 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9493 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9494 * 0 if the client was already subscribed to that channel. */
9495 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9496 struct dictEntry
*de
;
9497 list
*clients
= NULL
;
9500 /* Add the channel to the client -> channels hash table */
9501 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9503 incrRefCount(channel
);
9504 /* Add the client to the channel -> list of clients hash table */
9505 de
= dictFind(server
.pubsub_channels
,channel
);
9507 clients
= listCreate();
9508 dictAdd(server
.pubsub_channels
,channel
,clients
);
9509 incrRefCount(channel
);
9511 clients
= dictGetEntryVal(de
);
9513 listAddNodeTail(clients
,c
);
9515 /* Notify the client */
9516 addReply(c
,shared
.mbulk3
);
9517 addReply(c
,shared
.subscribebulk
);
9518 addReplyBulk(c
,channel
);
9519 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9523 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9524 * 0 if the client was not subscribed to the specified channel. */
9525 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9526 struct dictEntry
*de
;
9531 /* Remove the channel from the client -> channels hash table */
9532 incrRefCount(channel
); /* channel may be just a pointer to the same object
9533 we have in the hash tables. Protect it... */
9534 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9536 /* Remove the client from the channel -> clients list hash table */
9537 de
= dictFind(server
.pubsub_channels
,channel
);
9539 clients
= dictGetEntryVal(de
);
9540 ln
= listSearchKey(clients
,c
);
9542 listDelNode(clients
,ln
);
9543 if (listLength(clients
) == 0) {
9544 /* Free the list and associated hash entry at all if this was
9545 * the latest client, so that it will be possible to abuse
9546 * Redis PUBSUB creating millions of channels. */
9547 dictDelete(server
.pubsub_channels
,channel
);
9550 /* Notify the client */
9552 addReply(c
,shared
.mbulk3
);
9553 addReply(c
,shared
.unsubscribebulk
);
9554 addReplyBulk(c
,channel
);
9555 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9556 listLength(c
->pubsub_patterns
));
9559 decrRefCount(channel
); /* it is finally safe to release it */
9563 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9564 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9567 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9570 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9571 incrRefCount(pattern
);
9572 pat
= zmalloc(sizeof(*pat
));
9573 pat
->pattern
= getDecodedObject(pattern
);
9575 listAddNodeTail(server
.pubsub_patterns
,pat
);
9577 /* Notify the client */
9578 addReply(c
,shared
.mbulk3
);
9579 addReply(c
,shared
.psubscribebulk
);
9580 addReplyBulk(c
,pattern
);
9581 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9585 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9586 * 0 if the client was not subscribed to the specified channel. */
9587 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9592 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9593 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9595 listDelNode(c
->pubsub_patterns
,ln
);
9597 pat
.pattern
= pattern
;
9598 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9599 listDelNode(server
.pubsub_patterns
,ln
);
9601 /* Notify the client */
9603 addReply(c
,shared
.mbulk3
);
9604 addReply(c
,shared
.punsubscribebulk
);
9605 addReplyBulk(c
,pattern
);
9606 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9607 listLength(c
->pubsub_patterns
));
9609 decrRefCount(pattern
);
9613 /* Unsubscribe from all the channels. Return the number of channels the
9614 * client was subscribed from. */
9615 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9616 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9620 while((de
= dictNext(di
)) != NULL
) {
9621 robj
*channel
= dictGetEntryKey(de
);
9623 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9625 dictReleaseIterator(di
);
9629 /* Unsubscribe from all the patterns. Return the number of patterns the
9630 * client was subscribed from. */
9631 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9636 listRewind(c
->pubsub_patterns
,&li
);
9637 while ((ln
= listNext(&li
)) != NULL
) {
9638 robj
*pattern
= ln
->value
;
9640 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9645 /* Publish a message */
9646 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9648 struct dictEntry
*de
;
9652 /* Send to clients listening for that channel */
9653 de
= dictFind(server
.pubsub_channels
,channel
);
9655 list
*list
= dictGetEntryVal(de
);
9659 listRewind(list
,&li
);
9660 while ((ln
= listNext(&li
)) != NULL
) {
9661 redisClient
*c
= ln
->value
;
9663 addReply(c
,shared
.mbulk3
);
9664 addReply(c
,shared
.messagebulk
);
9665 addReplyBulk(c
,channel
);
9666 addReplyBulk(c
,message
);
9670 /* Send to clients listening to matching channels */
9671 if (listLength(server
.pubsub_patterns
)) {
9672 listRewind(server
.pubsub_patterns
,&li
);
9673 channel
= getDecodedObject(channel
);
9674 while ((ln
= listNext(&li
)) != NULL
) {
9675 pubsubPattern
*pat
= ln
->value
;
9677 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9678 sdslen(pat
->pattern
->ptr
),
9679 (char*)channel
->ptr
,
9680 sdslen(channel
->ptr
),0)) {
9681 addReply(pat
->client
,shared
.mbulk3
);
9682 addReply(pat
->client
,shared
.messagebulk
);
9683 addReplyBulk(pat
->client
,channel
);
9684 addReplyBulk(pat
->client
,message
);
9688 decrRefCount(channel
);
9693 static void subscribeCommand(redisClient
*c
) {
9696 for (j
= 1; j
< c
->argc
; j
++)
9697 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9700 static void unsubscribeCommand(redisClient
*c
) {
9702 pubsubUnsubscribeAllChannels(c
,1);
9707 for (j
= 1; j
< c
->argc
; j
++)
9708 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9712 static void psubscribeCommand(redisClient
*c
) {
9715 for (j
= 1; j
< c
->argc
; j
++)
9716 pubsubSubscribePattern(c
,c
->argv
[j
]);
9719 static void punsubscribeCommand(redisClient
*c
) {
9721 pubsubUnsubscribeAllPatterns(c
,1);
9726 for (j
= 1; j
< c
->argc
; j
++)
9727 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9731 static void publishCommand(redisClient
*c
) {
9732 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9733 addReplyLong(c
,receivers
);
9736 /* ================================= Debugging ============================== */
9738 static void debugCommand(redisClient
*c
) {
9739 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9741 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9742 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9743 addReply(c
,shared
.err
);
9747 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9748 addReply(c
,shared
.err
);
9751 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9752 addReply(c
,shared
.ok
);
9753 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9755 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9756 addReply(c
,shared
.err
);
9759 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9760 addReply(c
,shared
.ok
);
9761 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9762 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9766 addReply(c
,shared
.nokeyerr
);
9769 key
= dictGetEntryKey(de
);
9770 val
= dictGetEntryVal(de
);
9771 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9772 key
->storage
== REDIS_VM_SWAPPING
)) {
9776 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9777 strenc
= strencoding
[val
->encoding
];
9779 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9782 addReplySds(c
,sdscatprintf(sdsempty(),
9783 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9784 "encoding:%s serializedlength:%lld\r\n",
9785 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9786 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9788 addReplySds(c
,sdscatprintf(sdsempty(),
9789 "+Key at:%p refcount:%d, value swapped at: page %llu "
9790 "using %llu pages\r\n",
9791 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9792 (unsigned long long) key
->vm
.usedpages
));
9794 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9795 lookupKeyRead(c
->db
,c
->argv
[2]);
9796 addReply(c
,shared
.ok
);
9797 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9798 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9801 if (!server
.vm_enabled
) {
9802 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9806 addReply(c
,shared
.nokeyerr
);
9809 key
= dictGetEntryKey(de
);
9810 val
= dictGetEntryVal(de
);
9811 /* If the key is shared we want to create a copy */
9812 if (key
->refcount
> 1) {
9813 robj
*newkey
= dupStringObject(key
);
9815 key
= dictGetEntryKey(de
) = newkey
;
9818 if (key
->storage
!= REDIS_VM_MEMORY
) {
9819 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9820 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9821 dictGetEntryVal(de
) = NULL
;
9822 addReply(c
,shared
.ok
);
9824 addReply(c
,shared
.err
);
9827 addReplySds(c
,sdsnew(
9828 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9832 static void _redisAssert(char *estr
, char *file
, int line
) {
9833 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9834 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9835 #ifdef HAVE_BACKTRACE
9836 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9841 /* =================================== Main! ================================ */
9844 int linuxOvercommitMemoryValue(void) {
9845 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9849 if (fgets(buf
,64,fp
) == NULL
) {
9858 void linuxOvercommitMemoryWarning(void) {
9859 if (linuxOvercommitMemoryValue() == 0) {
9860 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9863 #endif /* __linux__ */
9865 static void daemonize(void) {
9869 if (fork() != 0) exit(0); /* parent exits */
9870 setsid(); /* create a new session */
9872 /* Every output goes to /dev/null. If Redis is daemonized but
9873 * the 'logfile' is set to 'stdout' in the configuration file
9874 * it will not log at all. */
9875 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9876 dup2(fd
, STDIN_FILENO
);
9877 dup2(fd
, STDOUT_FILENO
);
9878 dup2(fd
, STDERR_FILENO
);
9879 if (fd
> STDERR_FILENO
) close(fd
);
9881 /* Try to write the pid file */
9882 fp
= fopen(server
.pidfile
,"w");
9884 fprintf(fp
,"%d\n",getpid());
9889 static void version() {
9890 printf("Redis server version %s\n", REDIS_VERSION
);
9894 static void usage() {
9895 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9896 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9900 int main(int argc
, char **argv
) {
9905 if (strcmp(argv
[1], "-v") == 0 ||
9906 strcmp(argv
[1], "--version") == 0) version();
9907 if (strcmp(argv
[1], "--help") == 0) usage();
9908 resetServerSaveParams();
9909 loadServerConfig(argv
[1]);
9910 } else if ((argc
> 2)) {
9913 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9915 if (server
.daemonize
) daemonize();
9917 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9919 linuxOvercommitMemoryWarning();
9922 if (server
.appendonly
) {
9923 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9924 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9926 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9927 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9929 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9930 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9932 aeDeleteEventLoop(server
.el
);
9936 /* ============================= Backtrace support ========================= */
9938 #ifdef HAVE_BACKTRACE
9939 static char *findFuncName(void *pointer
, unsigned long *offset
);
9941 static void *getMcontextEip(ucontext_t
*uc
) {
9942 #if defined(__FreeBSD__)
9943 return (void*) uc
->uc_mcontext
.mc_eip
;
9944 #elif defined(__dietlibc__)
9945 return (void*) uc
->uc_mcontext
.eip
;
9946 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9948 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9950 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9952 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9953 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9954 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9956 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9958 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9959 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9960 #elif defined(__ia64__) /* Linux IA64 */
9961 return (void*) uc
->uc_mcontext
.sc_ip
;
9967 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9969 char **messages
= NULL
;
9970 int i
, trace_size
= 0;
9971 unsigned long offset
=0;
9972 ucontext_t
*uc
= (ucontext_t
*) secret
;
9974 REDIS_NOTUSED(info
);
9976 redisLog(REDIS_WARNING
,
9977 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9978 infostring
= genRedisInfoString();
9979 redisLog(REDIS_WARNING
, "%s",infostring
);
9980 /* It's not safe to sdsfree() the returned string under memory
9981 * corruption conditions. Let it leak as we are going to abort */
9983 trace_size
= backtrace(trace
, 100);
9984 /* overwrite sigaction with caller's address */
9985 if (getMcontextEip(uc
) != NULL
) {
9986 trace
[1] = getMcontextEip(uc
);
9988 messages
= backtrace_symbols(trace
, trace_size
);
9990 for (i
=1; i
<trace_size
; ++i
) {
9991 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9993 p
= strchr(messages
[i
],'+');
9994 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
9995 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
9997 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10000 /* free(messages); Don't call free() with possibly corrupted memory. */
10004 static void setupSigSegvAction(void) {
10005 struct sigaction act
;
10007 sigemptyset (&act
.sa_mask
);
10008 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10009 * is used. Otherwise, sa_handler is used */
10010 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10011 act
.sa_sigaction
= segvHandler
;
10012 sigaction (SIGSEGV
, &act
, NULL
);
10013 sigaction (SIGBUS
, &act
, NULL
);
10014 sigaction (SIGFPE
, &act
, NULL
);
10015 sigaction (SIGILL
, &act
, NULL
);
10016 sigaction (SIGBUS
, &act
, NULL
);
10020 #include "staticsymbols.h"
10021 /* This function try to convert a pointer into a function name. It's used in
10022 * oreder to provide a backtrace under segmentation fault that's able to
10023 * display functions declared as static (otherwise the backtrace is useless). */
10024 static char *findFuncName(void *pointer
, unsigned long *offset
){
10026 unsigned long off
, minoff
= 0;
10028 /* Try to match against the Symbol with the smallest offset */
10029 for (i
=0; symsTable
[i
].pointer
; i
++) {
10030 unsigned long lp
= (unsigned long) pointer
;
10032 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10033 off
=lp
-symsTable
[i
].pointer
;
10034 if (ret
< 0 || off
< minoff
) {
10040 if (ret
== -1) return NULL
;
10042 return symsTable
[ret
].name
;
10044 #else /* HAVE_BACKTRACE */
10045 static void setupSigSegvAction(void) {
10047 #endif /* HAVE_BACKTRACE */