2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "1.3.8"
40 #define __USE_POSIX199309
47 #endif /* HAVE_BACKTRACE */
55 #include <arpa/inet.h>
59 #include <sys/resource.h>
66 #include "solarisfixes.h"
70 #include "ae.h" /* Event driven programming library */
71 #include "sds.h" /* Dynamic safe strings */
72 #include "anet.h" /* Networking the easy way */
73 #include "dict.h" /* Hash tables */
74 #include "adlist.h" /* Linked lists */
75 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
76 #include "lzf.h" /* LZF compression library */
77 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
84 /* Static server configuration */
85 #define REDIS_SERVERPORT 6379 /* TCP port */
86 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
87 #define REDIS_IOBUF_LEN 1024
88 #define REDIS_LOADBUF_LEN 1024
89 #define REDIS_STATIC_ARGS 8
90 #define REDIS_DEFAULT_DBNUM 16
91 #define REDIS_CONFIGLINE_MAX 1024
92 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
93 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* try to expire 10 keys/loop */
95 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
96 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
99 #define REDIS_WRITEV_THRESHOLD 3
100 /* Max number of iovecs used for each writev call */
101 #define REDIS_WRITEV_IOVEC_COUNT 256
103 /* Hash table parameters */
104 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
107 #define REDIS_CMD_BULK 1 /* Bulk write command */
108 #define REDIS_CMD_INLINE 2 /* Inline command */
109 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
110 this flags will return an error when the 'maxmemory' option is set in the
111 config file and the server is using more than maxmemory bytes of memory.
112 In short this commands are denied on low memory conditions. */
113 #define REDIS_CMD_DENYOOM 4
114 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
117 #define REDIS_STRING 0
123 /* Objects encoding. Some kind of objects like Strings and Hashes can be
124 * internally represented in multiple ways. The 'encoding' field of the object
125 * is set to one of this fields for this object. */
126 #define REDIS_ENCODING_RAW 0 /* Raw representation */
127 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
128 #define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
129 #define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
131 static char* strencoding
[] = {
132 "raw", "int", "zipmap", "hashtable"
135 /* Object types only used for dumping to disk */
136 #define REDIS_EXPIRETIME 253
137 #define REDIS_SELECTDB 254
138 #define REDIS_EOF 255
140 /* Defines related to the dump file format. To store 32 bits lengths for short
141 * keys requires a lot of space, so we check the most significant 2 bits of
142 * the first byte to interpreter the length:
144 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
145 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
146 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
147 * 11|000000 this means: specially encoded object will follow. The six bits
148 * number specify the kind of object that follows.
149 * See the REDIS_RDB_ENC_* defines.
151 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
152 * values, will fit inside. */
153 #define REDIS_RDB_6BITLEN 0
154 #define REDIS_RDB_14BITLEN 1
155 #define REDIS_RDB_32BITLEN 2
156 #define REDIS_RDB_ENCVAL 3
157 #define REDIS_RDB_LENERR UINT_MAX
159 /* When a length of a string object stored on disk has the first two bits
160 * set, the remaining two bits specify a special encoding for the object
161 * accordingly to the following defines: */
162 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
163 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
164 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
165 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
167 /* Virtual memory object->where field. */
168 #define REDIS_VM_MEMORY 0 /* The object is on memory */
169 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
170 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
171 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
173 /* Virtual memory static configuration stuff.
174 * Check vmFindContiguousPages() to know more about this magic numbers. */
175 #define REDIS_VM_MAX_NEAR_PAGES 65536
176 #define REDIS_VM_MAX_RANDOM_JUMP 4096
177 #define REDIS_VM_MAX_THREADS 32
178 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
179 /* The following is the *percentage* of completed I/O jobs to process when the
180 * handelr is called. While Virtual Memory I/O operations are performed by
181 * threads, this operations must be processed by the main thread when completed
182 * in order to take effect. */
183 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
186 #define REDIS_SLAVE 1 /* This client is a slave server */
187 #define REDIS_MASTER 2 /* This client is a master server */
188 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
189 #define REDIS_MULTI 8 /* This client is in a MULTI context */
190 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
191 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
193 /* Slave replication state - slave side */
194 #define REDIS_REPL_NONE 0 /* No active replication */
195 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
196 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
198 /* Slave replication state - from the point of view of master
199 * Note that in SEND_BULK and ONLINE state the slave receives new updates
200 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
201 * to start the next background saving in order to send updates to it. */
202 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
203 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
204 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
205 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
207 /* List related stuff */
211 /* Sort operations */
212 #define REDIS_SORT_GET 0
213 #define REDIS_SORT_ASC 1
214 #define REDIS_SORT_DESC 2
215 #define REDIS_SORTKEY_MAX 1024
218 #define REDIS_DEBUG 0
219 #define REDIS_VERBOSE 1
220 #define REDIS_NOTICE 2
221 #define REDIS_WARNING 3
223 /* Anti-warning macro... */
224 #define REDIS_NOTUSED(V) ((void) V)
226 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
227 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
229 /* Append only defines */
230 #define APPENDFSYNC_NO 0
231 #define APPENDFSYNC_ALWAYS 1
232 #define APPENDFSYNC_EVERYSEC 2
234 /* Hashes related defaults */
235 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
236 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
238 /* We can print the stacktrace, so our assert is defined this way: */
239 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
240 static void _redisAssert(char *estr
, char *file
, int line
);
242 /*================================= Data types ============================== */
244 /* A redis object, that is a type able to hold a string / list / set */
246 /* The VM object structure */
247 struct redisObjectVM
{
248 off_t page
; /* the page at witch the object is stored on disk */
249 off_t usedpages
; /* number of pages used on disk */
250 time_t atime
; /* Last access time */
253 /* The actual Redis Object */
254 typedef struct redisObject
{
257 unsigned char encoding
;
258 unsigned char storage
; /* If this object is a key, where is the value?
259 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
260 unsigned char vtype
; /* If this object is a key, and value is swapped out,
261 * this is the type of the swapped out object. */
263 /* VM fields, this are only allocated if VM is active, otherwise the
264 * object allocation function will just allocate
265 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
266 * Redis without VM active will not have any overhead. */
267 struct redisObjectVM vm
;
270 /* Macro used to initalize a Redis object allocated on the stack.
271 * Note that this macro is taken near the structure definition to make sure
272 * we'll update it when the structure is changed, to avoid bugs like
273 * bug #85 introduced exactly in this way. */
274 #define initStaticStringObject(_var,_ptr) do { \
276 _var.type = REDIS_STRING; \
277 _var.encoding = REDIS_ENCODING_RAW; \
279 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
282 typedef struct redisDb
{
283 dict
*dict
; /* The keyspace for this DB */
284 dict
*expires
; /* Timeout of keys with a timeout set */
285 dict
*blockingkeys
; /* Keys with clients waiting for data (BLPOP) */
286 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
290 /* Client MULTI/EXEC state */
291 typedef struct multiCmd
{
294 struct redisCommand
*cmd
;
297 typedef struct multiState
{
298 multiCmd
*commands
; /* Array of MULTI commands */
299 int count
; /* Total number of MULTI commands */
302 /* With multiplexing we need to take per-clinet state.
303 * Clients are taken in a liked list. */
304 typedef struct redisClient
{
309 robj
**argv
, **mbargv
;
311 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
312 int multibulk
; /* multi bulk command format active */
315 time_t lastinteraction
; /* time of the last interaction, used for timeout */
316 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
317 int slaveseldb
; /* slave selected db, if this client is a slave */
318 int authenticated
; /* when requirepass is non-NULL */
319 int replstate
; /* replication state if this is a slave */
320 int repldbfd
; /* replication DB file descriptor */
321 long repldboff
; /* replication DB file offset */
322 off_t repldbsize
; /* replication DB file size */
323 multiState mstate
; /* MULTI/EXEC state */
324 robj
**blockingkeys
; /* The key we are waiting to terminate a blocking
325 * operation such as BLPOP. Otherwise NULL. */
326 int blockingkeysnum
; /* Number of blocking keys */
327 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
328 * is >= blockingto then the operation timed out. */
329 list
*io_keys
; /* Keys this client is waiting to be loaded from the
330 * swap file in order to continue. */
331 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
332 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
340 /* Global server state structure */
345 long long dirty
; /* changes to DB from the last save */
347 list
*slaves
, *monitors
;
348 char neterr
[ANET_ERR_LEN
];
350 int cronloops
; /* number of times the cron function run */
351 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
352 time_t lastsave
; /* Unix time of last save succeeede */
353 /* Fields used only for stats */
354 time_t stat_starttime
; /* server start time */
355 long long stat_numcommands
; /* number of processed commands */
356 long long stat_numconnections
; /* number of connections received */
357 long long stat_expiredkeys
; /* number of expired keys */
370 pid_t bgsavechildpid
;
371 pid_t bgrewritechildpid
;
372 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
373 struct saveparam
*saveparams
;
378 char *appendfilename
;
382 /* Replication related */
387 redisClient
*master
; /* client that is master for this slave */
389 unsigned int maxclients
;
390 unsigned long long maxmemory
;
391 unsigned int blpop_blocked_clients
;
392 unsigned int vm_blocked_clients
;
393 /* Sort parameters - qsort_r() is only available under BSD so we
394 * have to take this state global, in order to pass it to sortCompare() */
398 /* Virtual memory configuration */
403 unsigned long long vm_max_memory
;
405 size_t hash_max_zipmap_entries
;
406 size_t hash_max_zipmap_value
;
407 /* Virtual memory state */
410 off_t vm_next_page
; /* Next probably empty page */
411 off_t vm_near_pages
; /* Number of pages allocated sequentially */
412 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
413 time_t unixtime
; /* Unix time sampled every second. */
414 /* Virtual memory I/O threads stuff */
415 /* An I/O thread process an element taken from the io_jobs queue and
416 * put the result of the operation in the io_done list. While the
417 * job is being processed, it's put on io_processing queue. */
418 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
419 list
*io_processing
; /* List of VM I/O jobs being processed */
420 list
*io_processed
; /* List of VM I/O jobs already processed */
421 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
422 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
423 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
424 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
425 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
426 int io_active_threads
; /* Number of running I/O threads */
427 int vm_max_threads
; /* Max number of I/O threads running at the same time */
428 /* Our main thread is blocked on the event loop, locking for sockets ready
429 * to be read or written, so when a threaded I/O operation is ready to be
430 * processed by the main thread, the I/O thread will use a unix pipe to
431 * awake the main thread. The followings are the two pipe FDs. */
432 int io_ready_pipe_read
;
433 int io_ready_pipe_write
;
434 /* Virtual memory stats */
435 unsigned long long vm_stats_used_pages
;
436 unsigned long long vm_stats_swapped_objects
;
437 unsigned long long vm_stats_swapouts
;
438 unsigned long long vm_stats_swapins
;
440 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
441 list
*pubsub_patterns
; /* A list of pubsub_patterns */
446 typedef struct pubsubPattern
{
451 typedef void redisCommandProc(redisClient
*c
);
452 struct redisCommand
{
454 redisCommandProc
*proc
;
457 /* Use a function to determine which keys need to be loaded
458 * in the background prior to executing this command. Takes precedence
459 * over vm_firstkey and others, ignored when NULL */
460 redisCommandProc
*vm_preload_proc
;
461 /* What keys should be loaded in background when calling this command? */
462 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
463 int vm_lastkey
; /* THe last argument that's a key */
464 int vm_keystep
; /* The step between first and last key */
467 struct redisFunctionSym
{
469 unsigned long pointer
;
472 typedef struct _redisSortObject
{
480 typedef struct _redisSortOperation
{
483 } redisSortOperation
;
485 /* ZSETs use a specialized version of Skiplists */
487 typedef struct zskiplistNode
{
488 struct zskiplistNode
**forward
;
489 struct zskiplistNode
*backward
;
495 typedef struct zskiplist
{
496 struct zskiplistNode
*header
, *tail
;
497 unsigned long length
;
501 typedef struct zset
{
506 /* Our shared "common" objects */
508 #define REDIS_SHARED_INTEGERS 10000
509 struct sharedObjectsStruct
{
510 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
511 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
512 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
513 *outofrangeerr
, *plus
,
514 *select0
, *select1
, *select2
, *select3
, *select4
,
515 *select5
, *select6
, *select7
, *select8
, *select9
,
516 *messagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
517 *psubscribebulk
, *punsubscribebulk
, *integers
[REDIS_SHARED_INTEGERS
];
520 /* Global vars that are actally used as constants. The following double
521 * values are used for double on-disk serialization, and are initialized
522 * at runtime to avoid strange compiler optimizations. */
524 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
526 /* VM threaded I/O request message */
527 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
528 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
529 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
530 typedef struct iojob
{
531 int type
; /* Request type, REDIS_IOJOB_* */
532 redisDb
*db
;/* Redis database */
533 robj
*key
; /* This I/O request is about swapping this key */
534 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
535 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
536 off_t page
; /* Swap page where to read/write the object */
537 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
538 int canceled
; /* True if this command was canceled by blocking side of VM */
539 pthread_t thread
; /* ID of the thread processing this entry */
542 /*================================ Prototypes =============================== */
544 static void freeStringObject(robj
*o
);
545 static void freeListObject(robj
*o
);
546 static void freeSetObject(robj
*o
);
547 static void decrRefCount(void *o
);
548 static robj
*createObject(int type
, void *ptr
);
549 static void freeClient(redisClient
*c
);
550 static int rdbLoad(char *filename
);
551 static void addReply(redisClient
*c
, robj
*obj
);
552 static void addReplySds(redisClient
*c
, sds s
);
553 static void incrRefCount(robj
*o
);
554 static int rdbSaveBackground(char *filename
);
555 static robj
*createStringObject(char *ptr
, size_t len
);
556 static robj
*dupStringObject(robj
*o
);
557 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
558 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
559 static int syncWithMaster(void);
560 static robj
*tryObjectEncoding(robj
*o
);
561 static robj
*getDecodedObject(robj
*o
);
562 static int removeExpire(redisDb
*db
, robj
*key
);
563 static int expireIfNeeded(redisDb
*db
, robj
*key
);
564 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
565 static int deleteIfSwapped(redisDb
*db
, robj
*key
);
566 static int deleteKey(redisDb
*db
, robj
*key
);
567 static time_t getExpire(redisDb
*db
, robj
*key
);
568 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
569 static void updateSlavesWaitingBgsave(int bgsaveerr
);
570 static void freeMemoryIfNeeded(void);
571 static int processCommand(redisClient
*c
);
572 static void setupSigSegvAction(void);
573 static void rdbRemoveTempFile(pid_t childpid
);
574 static void aofRemoveTempFile(pid_t childpid
);
575 static size_t stringObjectLen(robj
*o
);
576 static void processInputBuffer(redisClient
*c
);
577 static zskiplist
*zslCreate(void);
578 static void zslFree(zskiplist
*zsl
);
579 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
580 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
581 static void initClientMultiState(redisClient
*c
);
582 static void freeClientMultiState(redisClient
*c
);
583 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
584 static void unblockClientWaitingData(redisClient
*c
);
585 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
586 static void vmInit(void);
587 static void vmMarkPagesFree(off_t page
, off_t count
);
588 static robj
*vmLoadObject(robj
*key
);
589 static robj
*vmPreviewObject(robj
*key
);
590 static int vmSwapOneObjectBlocking(void);
591 static int vmSwapOneObjectThreaded(void);
592 static int vmCanSwapOut(void);
593 static int tryFreeOneObjectFromFreelist(void);
594 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
595 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
596 static void vmCancelThreadedIOJob(robj
*o
);
597 static void lockThreadedIO(void);
598 static void unlockThreadedIO(void);
599 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
600 static void freeIOJob(iojob
*j
);
601 static void queueIOJob(iojob
*j
);
602 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
603 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
604 static void waitEmptyIOJobsQueue(void);
605 static void vmReopenSwapFile(void);
606 static int vmFreePage(off_t page
);
607 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
);
608 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
);
609 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
610 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
611 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
612 static struct redisCommand
*lookupCommand(char *name
);
613 static void call(redisClient
*c
, struct redisCommand
*cmd
);
614 static void resetClient(redisClient
*c
);
615 static void convertToRealHash(robj
*o
);
616 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
617 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
618 static void freePubsubPattern(void *p
);
619 static int listMatchPubsubPattern(void *a
, void *b
);
620 static int compareStringObjects(robj
*a
, robj
*b
);
623 static void authCommand(redisClient
*c
);
624 static void pingCommand(redisClient
*c
);
625 static void echoCommand(redisClient
*c
);
626 static void setCommand(redisClient
*c
);
627 static void setnxCommand(redisClient
*c
);
628 static void getCommand(redisClient
*c
);
629 static void delCommand(redisClient
*c
);
630 static void existsCommand(redisClient
*c
);
631 static void incrCommand(redisClient
*c
);
632 static void decrCommand(redisClient
*c
);
633 static void incrbyCommand(redisClient
*c
);
634 static void decrbyCommand(redisClient
*c
);
635 static void selectCommand(redisClient
*c
);
636 static void randomkeyCommand(redisClient
*c
);
637 static void keysCommand(redisClient
*c
);
638 static void dbsizeCommand(redisClient
*c
);
639 static void lastsaveCommand(redisClient
*c
);
640 static void saveCommand(redisClient
*c
);
641 static void bgsaveCommand(redisClient
*c
);
642 static void bgrewriteaofCommand(redisClient
*c
);
643 static void shutdownCommand(redisClient
*c
);
644 static void moveCommand(redisClient
*c
);
645 static void renameCommand(redisClient
*c
);
646 static void renamenxCommand(redisClient
*c
);
647 static void lpushCommand(redisClient
*c
);
648 static void rpushCommand(redisClient
*c
);
649 static void lpopCommand(redisClient
*c
);
650 static void rpopCommand(redisClient
*c
);
651 static void llenCommand(redisClient
*c
);
652 static void lindexCommand(redisClient
*c
);
653 static void lrangeCommand(redisClient
*c
);
654 static void ltrimCommand(redisClient
*c
);
655 static void typeCommand(redisClient
*c
);
656 static void lsetCommand(redisClient
*c
);
657 static void saddCommand(redisClient
*c
);
658 static void sremCommand(redisClient
*c
);
659 static void smoveCommand(redisClient
*c
);
660 static void sismemberCommand(redisClient
*c
);
661 static void scardCommand(redisClient
*c
);
662 static void spopCommand(redisClient
*c
);
663 static void srandmemberCommand(redisClient
*c
);
664 static void sinterCommand(redisClient
*c
);
665 static void sinterstoreCommand(redisClient
*c
);
666 static void sunionCommand(redisClient
*c
);
667 static void sunionstoreCommand(redisClient
*c
);
668 static void sdiffCommand(redisClient
*c
);
669 static void sdiffstoreCommand(redisClient
*c
);
670 static void syncCommand(redisClient
*c
);
671 static void flushdbCommand(redisClient
*c
);
672 static void flushallCommand(redisClient
*c
);
673 static void sortCommand(redisClient
*c
);
674 static void lremCommand(redisClient
*c
);
675 static void rpoplpushcommand(redisClient
*c
);
676 static void infoCommand(redisClient
*c
);
677 static void mgetCommand(redisClient
*c
);
678 static void monitorCommand(redisClient
*c
);
679 static void expireCommand(redisClient
*c
);
680 static void expireatCommand(redisClient
*c
);
681 static void getsetCommand(redisClient
*c
);
682 static void ttlCommand(redisClient
*c
);
683 static void slaveofCommand(redisClient
*c
);
684 static void debugCommand(redisClient
*c
);
685 static void msetCommand(redisClient
*c
);
686 static void msetnxCommand(redisClient
*c
);
687 static void zaddCommand(redisClient
*c
);
688 static void zincrbyCommand(redisClient
*c
);
689 static void zrangeCommand(redisClient
*c
);
690 static void zrangebyscoreCommand(redisClient
*c
);
691 static void zcountCommand(redisClient
*c
);
692 static void zrevrangeCommand(redisClient
*c
);
693 static void zcardCommand(redisClient
*c
);
694 static void zremCommand(redisClient
*c
);
695 static void zscoreCommand(redisClient
*c
);
696 static void zremrangebyscoreCommand(redisClient
*c
);
697 static void multiCommand(redisClient
*c
);
698 static void execCommand(redisClient
*c
);
699 static void discardCommand(redisClient
*c
);
700 static void blpopCommand(redisClient
*c
);
701 static void brpopCommand(redisClient
*c
);
702 static void appendCommand(redisClient
*c
);
703 static void substrCommand(redisClient
*c
);
704 static void zrankCommand(redisClient
*c
);
705 static void zrevrankCommand(redisClient
*c
);
706 static void hsetCommand(redisClient
*c
);
707 static void hgetCommand(redisClient
*c
);
708 static void hmsetCommand(redisClient
*c
);
709 static void hmgetCommand(redisClient
*c
);
710 static void hdelCommand(redisClient
*c
);
711 static void hlenCommand(redisClient
*c
);
712 static void zremrangebyrankCommand(redisClient
*c
);
713 static void zunionCommand(redisClient
*c
);
714 static void zinterCommand(redisClient
*c
);
715 static void hkeysCommand(redisClient
*c
);
716 static void hvalsCommand(redisClient
*c
);
717 static void hgetallCommand(redisClient
*c
);
718 static void hexistsCommand(redisClient
*c
);
719 static void configCommand(redisClient
*c
);
720 static void hincrbyCommand(redisClient
*c
);
721 static void subscribeCommand(redisClient
*c
);
722 static void unsubscribeCommand(redisClient
*c
);
723 static void psubscribeCommand(redisClient
*c
);
724 static void punsubscribeCommand(redisClient
*c
);
725 static void publishCommand(redisClient
*c
);
727 /*================================= Globals ================================= */
730 static struct redisServer server
; /* server global state */
731 static struct redisCommand cmdTable
[] = {
732 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
733 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
734 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
735 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
736 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
737 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
738 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
739 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
740 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
741 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
742 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
743 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
744 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
745 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
746 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
747 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
748 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
749 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
750 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
751 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
752 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
753 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
754 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
755 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
756 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
757 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
758 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
759 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
760 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
761 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
762 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
763 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
764 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
765 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
766 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
767 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
768 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
769 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
770 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
771 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
772 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
773 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
774 {"zunion",zunionCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
775 {"zinter",zinterCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
776 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
777 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
779 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
780 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
781 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
783 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
784 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
785 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
786 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
788 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
789 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
790 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
792 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
794 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
795 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
796 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
798 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
799 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
800 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
801 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
802 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
803 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
804 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
805 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
806 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
807 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
808 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
809 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
810 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
811 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
812 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
813 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
814 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
815 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
816 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
817 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
818 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
819 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
820 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
821 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
822 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
823 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
824 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
825 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
826 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
827 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
828 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
829 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
830 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
831 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
832 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
833 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
834 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
835 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
836 {NULL
,NULL
,0,0,NULL
,0,0,0}
839 /*============================ Utility functions ============================ */
841 /* Glob-style pattern matching. */
842 static int stringmatchlen(const char *pattern
, int patternLen
,
843 const char *string
, int stringLen
, int nocase
)
848 while (pattern
[1] == '*') {
853 return 1; /* match */
855 if (stringmatchlen(pattern
+1, patternLen
-1,
856 string
, stringLen
, nocase
))
857 return 1; /* match */
861 return 0; /* no match */
865 return 0; /* no match */
875 not = pattern
[0] == '^';
882 if (pattern
[0] == '\\') {
885 if (pattern
[0] == string
[0])
887 } else if (pattern
[0] == ']') {
889 } else if (patternLen
== 0) {
893 } else if (pattern
[1] == '-' && patternLen
>= 3) {
894 int start
= pattern
[0];
895 int end
= pattern
[2];
903 start
= tolower(start
);
909 if (c
>= start
&& c
<= end
)
913 if (pattern
[0] == string
[0])
916 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
926 return 0; /* no match */
932 if (patternLen
>= 2) {
939 if (pattern
[0] != string
[0])
940 return 0; /* no match */
942 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
943 return 0; /* no match */
951 if (stringLen
== 0) {
952 while(*pattern
== '*') {
959 if (patternLen
== 0 && stringLen
== 0)
964 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
965 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
968 static void redisLog(int level
, const char *fmt
, ...) {
972 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
976 if (level
>= server
.verbosity
) {
982 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
983 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
984 vfprintf(fp
, fmt
, ap
);
990 if (server
.logfile
) fclose(fp
);
993 /*====================== Hash table type implementation ==================== */
995 /* This is an hash table type that uses the SDS dynamic strings libary as
996 * keys and radis objects as values (objects can hold SDS strings,
999 static void dictVanillaFree(void *privdata
, void *val
)
1001 DICT_NOTUSED(privdata
);
1005 static void dictListDestructor(void *privdata
, void *val
)
1007 DICT_NOTUSED(privdata
);
1008 listRelease((list
*)val
);
1011 static int sdsDictKeyCompare(void *privdata
, const void *key1
,
1015 DICT_NOTUSED(privdata
);
1017 l1
= sdslen((sds
)key1
);
1018 l2
= sdslen((sds
)key2
);
1019 if (l1
!= l2
) return 0;
1020 return memcmp(key1
, key2
, l1
) == 0;
1023 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1025 DICT_NOTUSED(privdata
);
1027 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1031 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1034 const robj
*o1
= key1
, *o2
= key2
;
1035 return sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1038 static unsigned int dictObjHash(const void *key
) {
1039 const robj
*o
= key
;
1040 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1043 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1046 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1049 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1050 o2
->encoding
== REDIS_ENCODING_INT
&&
1051 o1
->ptr
== o2
->ptr
) return 1;
1053 o1
= getDecodedObject(o1
);
1054 o2
= getDecodedObject(o2
);
1055 cmp
= sdsDictKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1061 static unsigned int dictEncObjHash(const void *key
) {
1062 robj
*o
= (robj
*) key
;
1064 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1065 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1067 if (o
->encoding
== REDIS_ENCODING_INT
) {
1071 len
= snprintf(buf
,32,"%ld",(long)o
->ptr
);
1072 return dictGenHashFunction((unsigned char*)buf
, len
);
1076 o
= getDecodedObject(o
);
1077 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1084 /* Sets type and expires */
1085 static dictType setDictType
= {
1086 dictEncObjHash
, /* hash function */
1089 dictEncObjKeyCompare
, /* key compare */
1090 dictRedisObjectDestructor
, /* key destructor */
1091 NULL
/* val destructor */
1094 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1095 static dictType zsetDictType
= {
1096 dictEncObjHash
, /* hash function */
1099 dictEncObjKeyCompare
, /* key compare */
1100 dictRedisObjectDestructor
, /* key destructor */
1101 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1105 static dictType dbDictType
= {
1106 dictObjHash
, /* hash function */
1109 dictObjKeyCompare
, /* key compare */
1110 dictRedisObjectDestructor
, /* key destructor */
1111 dictRedisObjectDestructor
/* val destructor */
1115 static dictType keyptrDictType
= {
1116 dictObjHash
, /* hash function */
1119 dictObjKeyCompare
, /* key compare */
1120 dictRedisObjectDestructor
, /* key destructor */
1121 NULL
/* val destructor */
1124 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1125 static dictType hashDictType
= {
1126 dictEncObjHash
, /* hash function */
1129 dictEncObjKeyCompare
, /* key compare */
1130 dictRedisObjectDestructor
, /* key destructor */
1131 dictRedisObjectDestructor
/* val destructor */
1134 /* Keylist hash table type has unencoded redis objects as keys and
1135 * lists as values. It's used for blocking operations (BLPOP) and to
1136 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1137 static dictType keylistDictType
= {
1138 dictObjHash
, /* hash function */
1141 dictObjKeyCompare
, /* key compare */
1142 dictRedisObjectDestructor
, /* key destructor */
1143 dictListDestructor
/* val destructor */
1146 static void version();
1148 /* ========================= Random utility functions ======================= */
1150 /* Redis generally does not try to recover from out of memory conditions
1151 * when allocating objects or strings, it is not clear if it will be possible
1152 * to report this condition to the client since the networking layer itself
1153 * is based on heap allocation for send buffers, so we simply abort.
1154 * At least the code will be simpler to read... */
1155 static void oom(const char *msg
) {
1156 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1161 /* ====================== Redis server networking stuff ===================== */
1162 static void closeTimedoutClients(void) {
1165 time_t now
= time(NULL
);
1168 listRewind(server
.clients
,&li
);
1169 while ((ln
= listNext(&li
)) != NULL
) {
1170 c
= listNodeValue(ln
);
1171 if (server
.maxidletime
&&
1172 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1173 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1174 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1175 listLength(c
->pubsub_patterns
) == 0 &&
1176 (now
- c
->lastinteraction
> server
.maxidletime
))
1178 redisLog(REDIS_VERBOSE
,"Closing idle client");
1180 } else if (c
->flags
& REDIS_BLOCKED
) {
1181 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1182 addReply(c
,shared
.nullmultibulk
);
1183 unblockClientWaitingData(c
);
1189 static int htNeedsResize(dict
*dict
) {
1190 long long size
, used
;
1192 size
= dictSlots(dict
);
1193 used
= dictSize(dict
);
1194 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1195 (used
*100/size
< REDIS_HT_MINFILL
));
1198 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1199 * we resize the hash table to save memory */
1200 static void tryResizeHashTables(void) {
1203 for (j
= 0; j
< server
.dbnum
; j
++) {
1204 if (htNeedsResize(server
.db
[j
].dict
)) {
1205 redisLog(REDIS_VERBOSE
,"The hash table %d is too sparse, resize it...",j
);
1206 dictResize(server
.db
[j
].dict
);
1207 redisLog(REDIS_VERBOSE
,"Hash table %d resized.",j
);
1209 if (htNeedsResize(server
.db
[j
].expires
))
1210 dictResize(server
.db
[j
].expires
);
1214 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1215 void backgroundSaveDoneHandler(int statloc
) {
1216 int exitcode
= WEXITSTATUS(statloc
);
1217 int bysignal
= WIFSIGNALED(statloc
);
1219 if (!bysignal
&& exitcode
== 0) {
1220 redisLog(REDIS_NOTICE
,
1221 "Background saving terminated with success");
1223 server
.lastsave
= time(NULL
);
1224 } else if (!bysignal
&& exitcode
!= 0) {
1225 redisLog(REDIS_WARNING
, "Background saving error");
1227 redisLog(REDIS_WARNING
,
1228 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1229 rdbRemoveTempFile(server
.bgsavechildpid
);
1231 server
.bgsavechildpid
= -1;
1232 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1233 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1234 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1237 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1239 void backgroundRewriteDoneHandler(int statloc
) {
1240 int exitcode
= WEXITSTATUS(statloc
);
1241 int bysignal
= WIFSIGNALED(statloc
);
1243 if (!bysignal
&& exitcode
== 0) {
1247 redisLog(REDIS_NOTICE
,
1248 "Background append only file rewriting terminated with success");
1249 /* Now it's time to flush the differences accumulated by the parent */
1250 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1251 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1253 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1256 /* Flush our data... */
1257 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1258 (signed) sdslen(server
.bgrewritebuf
)) {
1259 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1263 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1264 /* Now our work is to rename the temp file into the stable file. And
1265 * switch the file descriptor used by the server for append only. */
1266 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1267 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1271 /* Mission completed... almost */
1272 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1273 if (server
.appendfd
!= -1) {
1274 /* If append only is actually enabled... */
1275 close(server
.appendfd
);
1276 server
.appendfd
= fd
;
1278 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1279 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1281 /* If append only is disabled we just generate a dump in this
1282 * format. Why not? */
1285 } else if (!bysignal
&& exitcode
!= 0) {
1286 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1288 redisLog(REDIS_WARNING
,
1289 "Background append only file rewriting terminated by signal %d",
1293 sdsfree(server
.bgrewritebuf
);
1294 server
.bgrewritebuf
= sdsempty();
1295 aofRemoveTempFile(server
.bgrewritechildpid
);
1296 server
.bgrewritechildpid
= -1;
1299 /* This function is called once a background process of some kind terminates,
1300 * as we want to avoid resizing the hash tables when there is a child in order
1301 * to play well with copy-on-write (otherwise when a resize happens lots of
1302 * memory pages are copied). The goal of this function is to update the ability
1303 * for dict.c to resize the hash tables accordingly to the fact we have o not
1304 * running childs. */
1305 static void updateDictResizePolicy(void) {
1306 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1309 dictDisableResize();
1312 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1313 int j
, loops
= server
.cronloops
++;
1314 REDIS_NOTUSED(eventLoop
);
1316 REDIS_NOTUSED(clientData
);
1318 /* We take a cached value of the unix time in the global state because
1319 * with virtual memory and aging there is to store the current time
1320 * in objects at every object access, and accuracy is not needed.
1321 * To access a global var is faster than calling time(NULL) */
1322 server
.unixtime
= time(NULL
);
1324 /* Show some info about non-empty databases */
1325 for (j
= 0; j
< server
.dbnum
; j
++) {
1326 long long size
, used
, vkeys
;
1328 size
= dictSlots(server
.db
[j
].dict
);
1329 used
= dictSize(server
.db
[j
].dict
);
1330 vkeys
= dictSize(server
.db
[j
].expires
);
1331 if (!(loops
% 50) && (used
|| vkeys
)) {
1332 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1333 /* dictPrintStats(server.dict); */
1337 /* We don't want to resize the hash tables while a bacground saving
1338 * is in progress: the saving child is created using fork() that is
1339 * implemented with a copy-on-write semantic in most modern systems, so
1340 * if we resize the HT while there is the saving child at work actually
1341 * a lot of memory movements in the parent will cause a lot of pages
1343 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1 &&
1346 tryResizeHashTables();
1349 /* Show information about connected clients */
1350 if (!(loops
% 50)) {
1351 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1352 listLength(server
.clients
)-listLength(server
.slaves
),
1353 listLength(server
.slaves
),
1354 zmalloc_used_memory());
1357 /* Close connections of timedout clients */
1358 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1359 closeTimedoutClients();
1361 /* Check if a background saving or AOF rewrite in progress terminated */
1362 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1366 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1367 if (pid
== server
.bgsavechildpid
) {
1368 backgroundSaveDoneHandler(statloc
);
1370 backgroundRewriteDoneHandler(statloc
);
1372 updateDictResizePolicy();
1375 /* If there is not a background saving in progress check if
1376 * we have to save now */
1377 time_t now
= time(NULL
);
1378 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1379 struct saveparam
*sp
= server
.saveparams
+j
;
1381 if (server
.dirty
>= sp
->changes
&&
1382 now
-server
.lastsave
> sp
->seconds
) {
1383 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1384 sp
->changes
, sp
->seconds
);
1385 rdbSaveBackground(server
.dbfilename
);
1391 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1392 * will use few CPU cycles if there are few expiring keys, otherwise
1393 * it will get more aggressive to avoid that too much memory is used by
1394 * keys that can be removed from the keyspace. */
1395 for (j
= 0; j
< server
.dbnum
; j
++) {
1397 redisDb
*db
= server
.db
+j
;
1399 /* Continue to expire if at the end of the cycle more than 25%
1400 * of the keys were expired. */
1402 long num
= dictSize(db
->expires
);
1403 time_t now
= time(NULL
);
1406 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1407 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1412 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1413 t
= (time_t) dictGetEntryVal(de
);
1415 deleteKey(db
,dictGetEntryKey(de
));
1417 server
.stat_expiredkeys
++;
1420 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1423 /* Swap a few keys on disk if we are over the memory limit and VM
1424 * is enbled. Try to free objects from the free list first. */
1425 if (vmCanSwapOut()) {
1426 while (server
.vm_enabled
&& zmalloc_used_memory() >
1427 server
.vm_max_memory
)
1431 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1432 retval
= (server
.vm_max_threads
== 0) ?
1433 vmSwapOneObjectBlocking() :
1434 vmSwapOneObjectThreaded();
1435 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1436 zmalloc_used_memory() >
1437 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1439 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1441 /* Note that when using threade I/O we free just one object,
1442 * because anyway when the I/O thread in charge to swap this
1443 * object out will finish, the handler of completed jobs
1444 * will try to swap more objects if we are still out of memory. */
1445 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1449 /* Check if we should connect to a MASTER */
1450 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1451 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1452 if (syncWithMaster() == REDIS_OK
) {
1453 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1459 /* This function gets called every time Redis is entering the
1460 * main loop of the event driven library, that is, before to sleep
1461 * for ready file descriptors. */
1462 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1463 REDIS_NOTUSED(eventLoop
);
1465 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1469 listRewind(server
.io_ready_clients
,&li
);
1470 while((ln
= listNext(&li
))) {
1471 redisClient
*c
= ln
->value
;
1472 struct redisCommand
*cmd
;
1474 /* Resume the client. */
1475 listDelNode(server
.io_ready_clients
,ln
);
1476 c
->flags
&= (~REDIS_IO_WAIT
);
1477 server
.vm_blocked_clients
--;
1478 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1479 readQueryFromClient
, c
);
1480 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1481 assert(cmd
!= NULL
);
1484 /* There may be more data to process in the input buffer. */
1485 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1486 processInputBuffer(c
);
1491 static void createSharedObjects(void) {
1494 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1495 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1496 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1497 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1498 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1499 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1500 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1501 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1502 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1503 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1504 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1505 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1506 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1507 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1508 "-ERR no such key\r\n"));
1509 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1510 "-ERR syntax error\r\n"));
1511 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1512 "-ERR source and destination objects are the same\r\n"));
1513 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1514 "-ERR index out of range\r\n"));
1515 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1516 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1517 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1518 shared
.select0
= createStringObject("select 0\r\n",10);
1519 shared
.select1
= createStringObject("select 1\r\n",10);
1520 shared
.select2
= createStringObject("select 2\r\n",10);
1521 shared
.select3
= createStringObject("select 3\r\n",10);
1522 shared
.select4
= createStringObject("select 4\r\n",10);
1523 shared
.select5
= createStringObject("select 5\r\n",10);
1524 shared
.select6
= createStringObject("select 6\r\n",10);
1525 shared
.select7
= createStringObject("select 7\r\n",10);
1526 shared
.select8
= createStringObject("select 8\r\n",10);
1527 shared
.select9
= createStringObject("select 9\r\n",10);
1528 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1529 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1530 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1531 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1532 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1533 shared
.mbulk3
= createStringObject("*3\r\n",4);
1534 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1535 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1536 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1540 static void appendServerSaveParams(time_t seconds
, int changes
) {
1541 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1542 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1543 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1544 server
.saveparamslen
++;
1547 static void resetServerSaveParams() {
1548 zfree(server
.saveparams
);
1549 server
.saveparams
= NULL
;
1550 server
.saveparamslen
= 0;
1553 static void initServerConfig() {
1554 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1555 server
.port
= REDIS_SERVERPORT
;
1556 server
.verbosity
= REDIS_VERBOSE
;
1557 server
.maxidletime
= REDIS_MAXIDLETIME
;
1558 server
.saveparams
= NULL
;
1559 server
.logfile
= NULL
; /* NULL = log on standard output */
1560 server
.bindaddr
= NULL
;
1561 server
.glueoutputbuf
= 1;
1562 server
.daemonize
= 0;
1563 server
.appendonly
= 0;
1564 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1565 server
.lastfsync
= time(NULL
);
1566 server
.appendfd
= -1;
1567 server
.appendseldb
= -1; /* Make sure the first time will not match */
1568 server
.pidfile
= zstrdup("/var/run/redis.pid");
1569 server
.dbfilename
= zstrdup("dump.rdb");
1570 server
.appendfilename
= zstrdup("appendonly.aof");
1571 server
.requirepass
= NULL
;
1572 server
.shareobjects
= 0;
1573 server
.rdbcompression
= 1;
1574 server
.maxclients
= 0;
1575 server
.blpop_blocked_clients
= 0;
1576 server
.maxmemory
= 0;
1577 server
.vm_enabled
= 0;
1578 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1579 server
.vm_page_size
= 256; /* 256 bytes per page */
1580 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1581 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1582 server
.vm_max_threads
= 4;
1583 server
.vm_blocked_clients
= 0;
1584 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1585 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1587 resetServerSaveParams();
1589 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1590 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1591 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1592 /* Replication related */
1594 server
.masterauth
= NULL
;
1595 server
.masterhost
= NULL
;
1596 server
.masterport
= 6379;
1597 server
.master
= NULL
;
1598 server
.replstate
= REDIS_REPL_NONE
;
1600 /* Double constants initialization */
1602 R_PosInf
= 1.0/R_Zero
;
1603 R_NegInf
= -1.0/R_Zero
;
1604 R_Nan
= R_Zero
/R_Zero
;
1607 static void initServer() {
1610 signal(SIGHUP
, SIG_IGN
);
1611 signal(SIGPIPE
, SIG_IGN
);
1612 setupSigSegvAction();
1614 server
.devnull
= fopen("/dev/null","w");
1615 if (server
.devnull
== NULL
) {
1616 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1619 server
.clients
= listCreate();
1620 server
.slaves
= listCreate();
1621 server
.monitors
= listCreate();
1622 server
.objfreelist
= listCreate();
1623 createSharedObjects();
1624 server
.el
= aeCreateEventLoop();
1625 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1626 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1627 if (server
.fd
== -1) {
1628 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1631 for (j
= 0; j
< server
.dbnum
; j
++) {
1632 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1633 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1634 server
.db
[j
].blockingkeys
= dictCreate(&keylistDictType
,NULL
);
1635 if (server
.vm_enabled
)
1636 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1637 server
.db
[j
].id
= j
;
1639 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1640 server
.pubsub_patterns
= listCreate();
1641 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1642 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1643 server
.cronloops
= 0;
1644 server
.bgsavechildpid
= -1;
1645 server
.bgrewritechildpid
= -1;
1646 server
.bgrewritebuf
= sdsempty();
1647 server
.lastsave
= time(NULL
);
1649 server
.stat_numcommands
= 0;
1650 server
.stat_numconnections
= 0;
1651 server
.stat_expiredkeys
= 0;
1652 server
.stat_starttime
= time(NULL
);
1653 server
.unixtime
= time(NULL
);
1654 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1655 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1656 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1658 if (server
.appendonly
) {
1659 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1660 if (server
.appendfd
== -1) {
1661 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1667 if (server
.vm_enabled
) vmInit();
1670 /* Empty the whole database */
1671 static long long emptyDb() {
1673 long long removed
= 0;
1675 for (j
= 0; j
< server
.dbnum
; j
++) {
1676 removed
+= dictSize(server
.db
[j
].dict
);
1677 dictEmpty(server
.db
[j
].dict
);
1678 dictEmpty(server
.db
[j
].expires
);
1683 static int yesnotoi(char *s
) {
1684 if (!strcasecmp(s
,"yes")) return 1;
1685 else if (!strcasecmp(s
,"no")) return 0;
1689 /* I agree, this is a very rudimental way to load a configuration...
1690 will improve later if the config gets more complex */
1691 static void loadServerConfig(char *filename
) {
1693 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1696 char *errormsg
= "Fatal error, can't open config file '%s'";
1697 char *errorbuf
= zmalloc(sizeof(char)*(strlen(errormsg
)+strlen(filename
)));
1698 sprintf(errorbuf
, errormsg
, filename
);
1700 if (filename
[0] == '-' && filename
[1] == '\0')
1703 if ((fp
= fopen(filename
,"r")) == NULL
) {
1704 redisLog(REDIS_WARNING
, errorbuf
);
1709 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1715 line
= sdstrim(line
," \t\r\n");
1717 /* Skip comments and blank lines*/
1718 if (line
[0] == '#' || line
[0] == '\0') {
1723 /* Split into arguments */
1724 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1725 sdstolower(argv
[0]);
1727 /* Execute config directives */
1728 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1729 server
.maxidletime
= atoi(argv
[1]);
1730 if (server
.maxidletime
< 0) {
1731 err
= "Invalid timeout value"; goto loaderr
;
1733 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1734 server
.port
= atoi(argv
[1]);
1735 if (server
.port
< 1 || server
.port
> 65535) {
1736 err
= "Invalid port"; goto loaderr
;
1738 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1739 server
.bindaddr
= zstrdup(argv
[1]);
1740 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1741 int seconds
= atoi(argv
[1]);
1742 int changes
= atoi(argv
[2]);
1743 if (seconds
< 1 || changes
< 0) {
1744 err
= "Invalid save parameters"; goto loaderr
;
1746 appendServerSaveParams(seconds
,changes
);
1747 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1748 if (chdir(argv
[1]) == -1) {
1749 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1750 argv
[1], strerror(errno
));
1753 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1754 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1755 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1756 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1757 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1759 err
= "Invalid log level. Must be one of debug, notice, warning";
1762 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1765 server
.logfile
= zstrdup(argv
[1]);
1766 if (!strcasecmp(server
.logfile
,"stdout")) {
1767 zfree(server
.logfile
);
1768 server
.logfile
= NULL
;
1770 if (server
.logfile
) {
1771 /* Test if we are able to open the file. The server will not
1772 * be able to abort just for this problem later... */
1773 logfp
= fopen(server
.logfile
,"a");
1774 if (logfp
== NULL
) {
1775 err
= sdscatprintf(sdsempty(),
1776 "Can't open the log file: %s", strerror(errno
));
1781 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1782 server
.dbnum
= atoi(argv
[1]);
1783 if (server
.dbnum
< 1) {
1784 err
= "Invalid number of databases"; goto loaderr
;
1786 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1787 loadServerConfig(argv
[1]);
1788 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1789 server
.maxclients
= atoi(argv
[1]);
1790 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1791 server
.maxmemory
= strtoll(argv
[1], NULL
, 10);
1792 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1793 server
.masterhost
= sdsnew(argv
[1]);
1794 server
.masterport
= atoi(argv
[2]);
1795 server
.replstate
= REDIS_REPL_CONNECT
;
1796 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1797 server
.masterauth
= zstrdup(argv
[1]);
1798 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1799 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1800 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1802 } else if (!strcasecmp(argv
[0],"shareobjects") && argc
== 2) {
1803 if ((server
.shareobjects
= yesnotoi(argv
[1])) == -1) {
1804 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1806 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1807 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1808 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1810 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1811 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1812 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1814 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1815 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1816 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1818 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1819 if (!strcasecmp(argv
[1],"no")) {
1820 server
.appendfsync
= APPENDFSYNC_NO
;
1821 } else if (!strcasecmp(argv
[1],"always")) {
1822 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
1823 } else if (!strcasecmp(argv
[1],"everysec")) {
1824 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1826 err
= "argument must be 'no', 'always' or 'everysec'";
1829 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
1830 server
.requirepass
= zstrdup(argv
[1]);
1831 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
1832 zfree(server
.pidfile
);
1833 server
.pidfile
= zstrdup(argv
[1]);
1834 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
1835 zfree(server
.dbfilename
);
1836 server
.dbfilename
= zstrdup(argv
[1]);
1837 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
1838 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
1839 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1841 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
1842 zfree(server
.vm_swap_file
);
1843 server
.vm_swap_file
= zstrdup(argv
[1]);
1844 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
1845 server
.vm_max_memory
= strtoll(argv
[1], NULL
, 10);
1846 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
1847 server
.vm_page_size
= strtoll(argv
[1], NULL
, 10);
1848 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
1849 server
.vm_pages
= strtoll(argv
[1], NULL
, 10);
1850 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1851 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1852 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
1853 server
.hash_max_zipmap_entries
= strtol(argv
[1], NULL
, 10);
1854 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
1855 server
.hash_max_zipmap_value
= strtol(argv
[1], NULL
, 10);
1856 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
1857 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
1859 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
1861 for (j
= 0; j
< argc
; j
++)
1866 if (fp
!= stdin
) fclose(fp
);
1870 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
1871 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
1872 fprintf(stderr
, ">>> '%s'\n", line
);
1873 fprintf(stderr
, "%s\n", err
);
1877 static void freeClientArgv(redisClient
*c
) {
1880 for (j
= 0; j
< c
->argc
; j
++)
1881 decrRefCount(c
->argv
[j
]);
1882 for (j
= 0; j
< c
->mbargc
; j
++)
1883 decrRefCount(c
->mbargv
[j
]);
1888 static void freeClient(redisClient
*c
) {
1891 /* Note that if the client we are freeing is blocked into a blocking
1892 * call, we have to set querybuf to NULL *before* to call
1893 * unblockClientWaitingData() to avoid processInputBuffer() will get
1894 * called. Also it is important to remove the file events after
1895 * this, because this call adds the READABLE event. */
1896 sdsfree(c
->querybuf
);
1898 if (c
->flags
& REDIS_BLOCKED
)
1899 unblockClientWaitingData(c
);
1901 /* Unsubscribe from all the pubsub channels */
1902 pubsubUnsubscribeAllChannels(c
,0);
1903 pubsubUnsubscribeAllPatterns(c
,0);
1904 dictRelease(c
->pubsub_channels
);
1905 listRelease(c
->pubsub_patterns
);
1906 /* Obvious cleanup */
1907 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
1908 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
1909 listRelease(c
->reply
);
1912 /* Remove from the list of clients */
1913 ln
= listSearchKey(server
.clients
,c
);
1914 redisAssert(ln
!= NULL
);
1915 listDelNode(server
.clients
,ln
);
1916 /* Remove from the list of clients waiting for swapped keys */
1917 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
1918 ln
= listSearchKey(server
.io_ready_clients
,c
);
1920 listDelNode(server
.io_ready_clients
,ln
);
1921 server
.vm_blocked_clients
--;
1924 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
1925 ln
= listFirst(c
->io_keys
);
1926 dontWaitForSwappedKey(c
,ln
->value
);
1928 listRelease(c
->io_keys
);
1929 /* Master/slave cleanup */
1930 if (c
->flags
& REDIS_SLAVE
) {
1931 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
1933 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
1934 ln
= listSearchKey(l
,c
);
1935 redisAssert(ln
!= NULL
);
1938 if (c
->flags
& REDIS_MASTER
) {
1939 server
.master
= NULL
;
1940 server
.replstate
= REDIS_REPL_CONNECT
;
1942 /* Release memory */
1945 freeClientMultiState(c
);
1949 #define GLUEREPLY_UP_TO (1024)
1950 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
1952 char buf
[GLUEREPLY_UP_TO
];
1957 listRewind(c
->reply
,&li
);
1958 while((ln
= listNext(&li
))) {
1962 objlen
= sdslen(o
->ptr
);
1963 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
1964 memcpy(buf
+copylen
,o
->ptr
,objlen
);
1966 listDelNode(c
->reply
,ln
);
1968 if (copylen
== 0) return;
1972 /* Now the output buffer is empty, add the new single element */
1973 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
1974 listAddNodeHead(c
->reply
,o
);
1977 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
1978 redisClient
*c
= privdata
;
1979 int nwritten
= 0, totwritten
= 0, objlen
;
1982 REDIS_NOTUSED(mask
);
1984 /* Use writev() if we have enough buffers to send */
1985 if (!server
.glueoutputbuf
&&
1986 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
1987 !(c
->flags
& REDIS_MASTER
))
1989 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
1993 while(listLength(c
->reply
)) {
1994 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
1995 glueReplyBuffersIfNeeded(c
);
1997 o
= listNodeValue(listFirst(c
->reply
));
1998 objlen
= sdslen(o
->ptr
);
2001 listDelNode(c
->reply
,listFirst(c
->reply
));
2005 if (c
->flags
& REDIS_MASTER
) {
2006 /* Don't reply to a master */
2007 nwritten
= objlen
- c
->sentlen
;
2009 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2010 if (nwritten
<= 0) break;
2012 c
->sentlen
+= nwritten
;
2013 totwritten
+= nwritten
;
2014 /* If we fully sent the object on head go to the next one */
2015 if (c
->sentlen
== objlen
) {
2016 listDelNode(c
->reply
,listFirst(c
->reply
));
2019 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2020 * bytes, in a single threaded server it's a good idea to serve
2021 * other clients as well, even if a very large request comes from
2022 * super fast link that is always able to accept data (in real world
2023 * scenario think about 'KEYS *' against the loopback interfae) */
2024 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2026 if (nwritten
== -1) {
2027 if (errno
== EAGAIN
) {
2030 redisLog(REDIS_VERBOSE
,
2031 "Error writing to client: %s", strerror(errno
));
2036 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2037 if (listLength(c
->reply
) == 0) {
2039 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2043 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2045 redisClient
*c
= privdata
;
2046 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2048 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2049 int offset
, ion
= 0;
2051 REDIS_NOTUSED(mask
);
2054 while (listLength(c
->reply
)) {
2055 offset
= c
->sentlen
;
2059 /* fill-in the iov[] array */
2060 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2061 o
= listNodeValue(node
);
2062 objlen
= sdslen(o
->ptr
);
2064 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2067 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2068 break; /* no more iovecs */
2070 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2071 iov
[ion
].iov_len
= objlen
- offset
;
2072 willwrite
+= objlen
- offset
;
2073 offset
= 0; /* just for the first item */
2080 /* write all collected blocks at once */
2081 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2082 if (errno
!= EAGAIN
) {
2083 redisLog(REDIS_VERBOSE
,
2084 "Error writing to client: %s", strerror(errno
));
2091 totwritten
+= nwritten
;
2092 offset
= c
->sentlen
;
2094 /* remove written robjs from c->reply */
2095 while (nwritten
&& listLength(c
->reply
)) {
2096 o
= listNodeValue(listFirst(c
->reply
));
2097 objlen
= sdslen(o
->ptr
);
2099 if(nwritten
>= objlen
- offset
) {
2100 listDelNode(c
->reply
, listFirst(c
->reply
));
2101 nwritten
-= objlen
- offset
;
2105 c
->sentlen
+= nwritten
;
2113 c
->lastinteraction
= time(NULL
);
2115 if (listLength(c
->reply
) == 0) {
2117 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2121 static struct redisCommand
*lookupCommand(char *name
) {
2123 while(cmdTable
[j
].name
!= NULL
) {
2124 if (!strcasecmp(name
,cmdTable
[j
].name
)) return &cmdTable
[j
];
2130 /* resetClient prepare the client to process the next command */
2131 static void resetClient(redisClient
*c
) {
2137 /* Call() is the core of Redis execution of a command */
2138 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2141 dirty
= server
.dirty
;
2143 dirty
= server
.dirty
-dirty
;
2145 if (server
.appendonly
&& dirty
)
2146 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2147 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2148 listLength(server
.slaves
))
2149 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2150 if (listLength(server
.monitors
))
2151 replicationFeedSlaves(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2152 server
.stat_numcommands
++;
2155 /* If this function gets called we already read a whole
2156 * command, argments are in the client argv/argc fields.
2157 * processCommand() execute the command or prepare the
2158 * server for a bulk read from the client.
2160 * If 1 is returned the client is still alive and valid and
2161 * and other operations can be performed by the caller. Otherwise
2162 * if 0 is returned the client was destroied (i.e. after QUIT). */
2163 static int processCommand(redisClient
*c
) {
2164 struct redisCommand
*cmd
;
2166 /* Free some memory if needed (maxmemory setting) */
2167 if (server
.maxmemory
) freeMemoryIfNeeded();
2169 /* Handle the multi bulk command type. This is an alternative protocol
2170 * supported by Redis in order to receive commands that are composed of
2171 * multiple binary-safe "bulk" arguments. The latency of processing is
2172 * a bit higher but this allows things like multi-sets, so if this
2173 * protocol is used only for MSET and similar commands this is a big win. */
2174 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2175 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2176 if (c
->multibulk
<= 0) {
2180 decrRefCount(c
->argv
[c
->argc
-1]);
2184 } else if (c
->multibulk
) {
2185 if (c
->bulklen
== -1) {
2186 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2187 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2191 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2192 decrRefCount(c
->argv
[0]);
2193 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2195 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2200 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2204 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2205 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2209 if (c
->multibulk
== 0) {
2213 /* Here we need to swap the multi-bulk argc/argv with the
2214 * normal argc/argv of the client structure. */
2216 c
->argv
= c
->mbargv
;
2217 c
->mbargv
= auxargv
;
2220 c
->argc
= c
->mbargc
;
2221 c
->mbargc
= auxargc
;
2223 /* We need to set bulklen to something different than -1
2224 * in order for the code below to process the command without
2225 * to try to read the last argument of a bulk command as
2226 * a special argument. */
2228 /* continue below and process the command */
2235 /* -- end of multi bulk commands processing -- */
2237 /* The QUIT command is handled as a special case. Normal command
2238 * procs are unable to close the client connection safely */
2239 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2244 /* Now lookup the command and check ASAP about trivial error conditions
2245 * such wrong arity, bad command name and so forth. */
2246 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2249 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2250 (char*)c
->argv
[0]->ptr
));
2253 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2254 (c
->argc
< -cmd
->arity
)) {
2256 sdscatprintf(sdsempty(),
2257 "-ERR wrong number of arguments for '%s' command\r\n",
2261 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2262 /* This is a bulk command, we have to read the last argument yet. */
2263 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2265 decrRefCount(c
->argv
[c
->argc
-1]);
2266 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2268 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2273 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2274 /* It is possible that the bulk read is already in the
2275 * buffer. Check this condition and handle it accordingly.
2276 * This is just a fast path, alternative to call processInputBuffer().
2277 * It's a good idea since the code is small and this condition
2278 * happens most of the times. */
2279 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2280 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2282 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2284 /* Otherwise return... there is to read the last argument
2285 * from the socket. */
2289 /* Let's try to encode the bulk object to save space. */
2290 if (cmd
->flags
& REDIS_CMD_BULK
)
2291 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2293 /* Check if the user is authenticated */
2294 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2295 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2300 /* Handle the maxmemory directive */
2301 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2302 zmalloc_used_memory() > server
.maxmemory
)
2304 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2309 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2310 if (dictSize(c
->pubsub_channels
) > 0 &&
2311 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2312 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2313 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2318 /* Exec the command */
2319 if (c
->flags
& REDIS_MULTI
&& cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
) {
2320 queueMultiCommand(c
,cmd
);
2321 addReply(c
,shared
.queued
);
2323 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2324 blockClientOnSwappedKeys(cmd
,c
)) return 1;
2328 /* Prepare the client for the next command */
2333 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2338 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2339 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2340 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2341 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2344 if (argc
<= REDIS_STATIC_ARGS
) {
2347 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2350 lenobj
= createObject(REDIS_STRING
,
2351 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2352 lenobj
->refcount
= 0;
2353 outv
[outc
++] = lenobj
;
2354 for (j
= 0; j
< argc
; j
++) {
2355 lenobj
= createObject(REDIS_STRING
,
2356 sdscatprintf(sdsempty(),"$%lu\r\n",
2357 (unsigned long) stringObjectLen(argv
[j
])));
2358 lenobj
->refcount
= 0;
2359 outv
[outc
++] = lenobj
;
2360 outv
[outc
++] = argv
[j
];
2361 outv
[outc
++] = shared
.crlf
;
2364 /* Increment all the refcounts at start and decrement at end in order to
2365 * be sure to free objects if there is no slave in a replication state
2366 * able to be feed with commands */
2367 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2368 listRewind(slaves
,&li
);
2369 while((ln
= listNext(&li
))) {
2370 redisClient
*slave
= ln
->value
;
2372 /* Don't feed slaves that are still waiting for BGSAVE to start */
2373 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2375 /* Feed all the other slaves, MONITORs and so on */
2376 if (slave
->slaveseldb
!= dictid
) {
2380 case 0: selectcmd
= shared
.select0
; break;
2381 case 1: selectcmd
= shared
.select1
; break;
2382 case 2: selectcmd
= shared
.select2
; break;
2383 case 3: selectcmd
= shared
.select3
; break;
2384 case 4: selectcmd
= shared
.select4
; break;
2385 case 5: selectcmd
= shared
.select5
; break;
2386 case 6: selectcmd
= shared
.select6
; break;
2387 case 7: selectcmd
= shared
.select7
; break;
2388 case 8: selectcmd
= shared
.select8
; break;
2389 case 9: selectcmd
= shared
.select9
; break;
2391 selectcmd
= createObject(REDIS_STRING
,
2392 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2393 selectcmd
->refcount
= 0;
2396 addReply(slave
,selectcmd
);
2397 slave
->slaveseldb
= dictid
;
2399 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2401 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2402 if (outv
!= static_outv
) zfree(outv
);
2405 static void processInputBuffer(redisClient
*c
) {
2407 /* Before to process the input buffer, make sure the client is not
2408 * waitig for a blocking operation such as BLPOP. Note that the first
2409 * iteration the client is never blocked, otherwise the processInputBuffer
2410 * would not be called at all, but after the execution of the first commands
2411 * in the input buffer the client may be blocked, and the "goto again"
2412 * will try to reiterate. The following line will make it return asap. */
2413 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2414 if (c
->bulklen
== -1) {
2415 /* Read the first line of the query */
2416 char *p
= strchr(c
->querybuf
,'\n');
2423 query
= c
->querybuf
;
2424 c
->querybuf
= sdsempty();
2425 querylen
= 1+(p
-(query
));
2426 if (sdslen(query
) > querylen
) {
2427 /* leave data after the first line of the query in the buffer */
2428 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2430 *p
= '\0'; /* remove "\n" */
2431 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2432 sdsupdatelen(query
);
2434 /* Now we can split the query in arguments */
2435 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2438 if (c
->argv
) zfree(c
->argv
);
2439 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2441 for (j
= 0; j
< argc
; j
++) {
2442 if (sdslen(argv
[j
])) {
2443 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2451 /* Execute the command. If the client is still valid
2452 * after processCommand() return and there is something
2453 * on the query buffer try to process the next command. */
2454 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2456 /* Nothing to process, argc == 0. Just process the query
2457 * buffer if it's not empty or return to the caller */
2458 if (sdslen(c
->querybuf
)) goto again
;
2461 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2462 redisLog(REDIS_VERBOSE
, "Client protocol error");
2467 /* Bulk read handling. Note that if we are at this point
2468 the client already sent a command terminated with a newline,
2469 we are reading the bulk data that is actually the last
2470 argument of the command. */
2471 int qbl
= sdslen(c
->querybuf
);
2473 if (c
->bulklen
<= qbl
) {
2474 /* Copy everything but the final CRLF as final argument */
2475 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2477 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2478 /* Process the command. If the client is still valid after
2479 * the processing and there is more data in the buffer
2480 * try to parse it. */
2481 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2487 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2488 redisClient
*c
= (redisClient
*) privdata
;
2489 char buf
[REDIS_IOBUF_LEN
];
2492 REDIS_NOTUSED(mask
);
2494 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2496 if (errno
== EAGAIN
) {
2499 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2503 } else if (nread
== 0) {
2504 redisLog(REDIS_VERBOSE
, "Client closed connection");
2509 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2510 c
->lastinteraction
= time(NULL
);
2514 processInputBuffer(c
);
2517 static int selectDb(redisClient
*c
, int id
) {
2518 if (id
< 0 || id
>= server
.dbnum
)
2520 c
->db
= &server
.db
[id
];
2524 static void *dupClientReplyValue(void *o
) {
2525 incrRefCount((robj
*)o
);
2529 static int listMatchObjects(void *a
, void *b
) {
2530 return compareStringObjects(a
,b
) == 0;
2533 static redisClient
*createClient(int fd
) {
2534 redisClient
*c
= zmalloc(sizeof(*c
));
2536 anetNonBlock(NULL
,fd
);
2537 anetTcpNoDelay(NULL
,fd
);
2538 if (!c
) return NULL
;
2541 c
->querybuf
= sdsempty();
2550 c
->lastinteraction
= time(NULL
);
2551 c
->authenticated
= 0;
2552 c
->replstate
= REDIS_REPL_NONE
;
2553 c
->reply
= listCreate();
2554 listSetFreeMethod(c
->reply
,decrRefCount
);
2555 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2556 c
->blockingkeys
= NULL
;
2557 c
->blockingkeysnum
= 0;
2558 c
->io_keys
= listCreate();
2559 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2560 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2561 c
->pubsub_patterns
= listCreate();
2562 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2563 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2564 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2565 readQueryFromClient
, c
) == AE_ERR
) {
2569 listAddNodeTail(server
.clients
,c
);
2570 initClientMultiState(c
);
2574 static void addReply(redisClient
*c
, robj
*obj
) {
2575 if (listLength(c
->reply
) == 0 &&
2576 (c
->replstate
== REDIS_REPL_NONE
||
2577 c
->replstate
== REDIS_REPL_ONLINE
) &&
2578 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2579 sendReplyToClient
, c
) == AE_ERR
) return;
2581 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2582 obj
= dupStringObject(obj
);
2583 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2585 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2588 static void addReplySds(redisClient
*c
, sds s
) {
2589 robj
*o
= createObject(REDIS_STRING
,s
);
2594 static void addReplyDouble(redisClient
*c
, double d
) {
2597 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2598 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2599 (unsigned long) strlen(buf
),buf
));
2602 static void addReplyLong(redisClient
*c
, long l
) {
2607 addReply(c
,shared
.czero
);
2609 } else if (l
== 1) {
2610 addReply(c
,shared
.cone
);
2613 len
= snprintf(buf
,sizeof(buf
),":%ld\r\n",l
);
2614 addReplySds(c
,sdsnewlen(buf
,len
));
2617 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2622 addReply(c
,shared
.czero
);
2624 } else if (ll
== 1) {
2625 addReply(c
,shared
.cone
);
2628 len
= snprintf(buf
,sizeof(buf
),":%lld\r\n",ll
);
2629 addReplySds(c
,sdsnewlen(buf
,len
));
2632 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2637 addReply(c
,shared
.czero
);
2639 } else if (ul
== 1) {
2640 addReply(c
,shared
.cone
);
2643 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2644 addReplySds(c
,sdsnewlen(buf
,len
));
2647 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2650 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2651 len
= sdslen(obj
->ptr
);
2653 long n
= (long)obj
->ptr
;
2655 /* Compute how many bytes will take this integer as a radix 10 string */
2661 while((n
= n
/10) != 0) {
2665 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len
));
2668 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2669 addReplyBulkLen(c
,obj
);
2671 addReply(c
,shared
.crlf
);
2674 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2675 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2677 addReply(c
,shared
.nullbulk
);
2679 robj
*o
= createStringObject(s
,strlen(s
));
2685 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2690 REDIS_NOTUSED(mask
);
2691 REDIS_NOTUSED(privdata
);
2693 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2694 if (cfd
== AE_ERR
) {
2695 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2698 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2699 if ((c
= createClient(cfd
)) == NULL
) {
2700 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2701 close(cfd
); /* May be already closed, just ingore errors */
2704 /* If maxclient directive is set and this is one client more... close the
2705 * connection. Note that we create the client instead to check before
2706 * for this condition, since now the socket is already set in nonblocking
2707 * mode and we can send an error for free using the Kernel I/O */
2708 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2709 char *err
= "-ERR max number of clients reached\r\n";
2711 /* That's a best effort error message, don't check write errors */
2712 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2713 /* Nothing to do, Just to avoid the warning... */
2718 server
.stat_numconnections
++;
2721 /* ======================= Redis objects implementation ===================== */
2723 static robj
*createObject(int type
, void *ptr
) {
2726 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2727 if (listLength(server
.objfreelist
)) {
2728 listNode
*head
= listFirst(server
.objfreelist
);
2729 o
= listNodeValue(head
);
2730 listDelNode(server
.objfreelist
,head
);
2731 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2733 if (server
.vm_enabled
) {
2734 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2735 o
= zmalloc(sizeof(*o
));
2737 o
= zmalloc(sizeof(*o
)-sizeof(struct redisObjectVM
));
2741 o
->encoding
= REDIS_ENCODING_RAW
;
2744 if (server
.vm_enabled
) {
2745 /* Note that this code may run in the context of an I/O thread
2746 * and accessing to server.unixtime in theory is an error
2747 * (no locks). But in practice this is safe, and even if we read
2748 * garbage Redis will not fail, as it's just a statistical info */
2749 o
->vm
.atime
= server
.unixtime
;
2750 o
->storage
= REDIS_VM_MEMORY
;
2755 static robj
*createStringObject(char *ptr
, size_t len
) {
2756 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
2759 static robj
*dupStringObject(robj
*o
) {
2760 assert(o
->encoding
== REDIS_ENCODING_RAW
);
2761 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
2764 static robj
*createListObject(void) {
2765 list
*l
= listCreate();
2767 listSetFreeMethod(l
,decrRefCount
);
2768 return createObject(REDIS_LIST
,l
);
2771 static robj
*createSetObject(void) {
2772 dict
*d
= dictCreate(&setDictType
,NULL
);
2773 return createObject(REDIS_SET
,d
);
2776 static robj
*createHashObject(void) {
2777 /* All the Hashes start as zipmaps. Will be automatically converted
2778 * into hash tables if there are enough elements or big elements
2780 unsigned char *zm
= zipmapNew();
2781 robj
*o
= createObject(REDIS_HASH
,zm
);
2782 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
2786 static robj
*createZsetObject(void) {
2787 zset
*zs
= zmalloc(sizeof(*zs
));
2789 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
2790 zs
->zsl
= zslCreate();
2791 return createObject(REDIS_ZSET
,zs
);
2794 static void freeStringObject(robj
*o
) {
2795 if (o
->encoding
== REDIS_ENCODING_RAW
) {
2800 static void freeListObject(robj
*o
) {
2801 listRelease((list
*) o
->ptr
);
2804 static void freeSetObject(robj
*o
) {
2805 dictRelease((dict
*) o
->ptr
);
2808 static void freeZsetObject(robj
*o
) {
2811 dictRelease(zs
->dict
);
2816 static void freeHashObject(robj
*o
) {
2817 switch (o
->encoding
) {
2818 case REDIS_ENCODING_HT
:
2819 dictRelease((dict
*) o
->ptr
);
2821 case REDIS_ENCODING_ZIPMAP
:
2830 static void incrRefCount(robj
*o
) {
2834 static void decrRefCount(void *obj
) {
2837 /* Object is a key of a swapped out value, or in the process of being
2839 if (server
.vm_enabled
&&
2840 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
2842 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(obj
);
2843 redisAssert(o
->type
== REDIS_STRING
);
2844 freeStringObject(o
);
2845 vmMarkPagesFree(o
->vm
.page
,o
->vm
.usedpages
);
2846 pthread_mutex_lock(&server
.obj_freelist_mutex
);
2847 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2848 !listAddNodeHead(server
.objfreelist
,o
))
2850 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2851 server
.vm_stats_swapped_objects
--;
2854 /* Object is in memory, or in the process of being swapped out. */
2855 if (--(o
->refcount
) == 0) {
2856 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
2857 vmCancelThreadedIOJob(obj
);
2859 case REDIS_STRING
: freeStringObject(o
); break;
2860 case REDIS_LIST
: freeListObject(o
); break;
2861 case REDIS_SET
: freeSetObject(o
); break;
2862 case REDIS_ZSET
: freeZsetObject(o
); break;
2863 case REDIS_HASH
: freeHashObject(o
); break;
2864 default: redisAssert(0); break;
2866 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2867 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
2868 !listAddNodeHead(server
.objfreelist
,o
))
2870 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2874 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
2875 dictEntry
*de
= dictFind(db
->dict
,key
);
2877 robj
*key
= dictGetEntryKey(de
);
2878 robj
*val
= dictGetEntryVal(de
);
2880 if (server
.vm_enabled
) {
2881 if (key
->storage
== REDIS_VM_MEMORY
||
2882 key
->storage
== REDIS_VM_SWAPPING
)
2884 /* If we were swapping the object out, stop it, this key
2886 if (key
->storage
== REDIS_VM_SWAPPING
)
2887 vmCancelThreadedIOJob(key
);
2888 /* Update the access time of the key for the aging algorithm. */
2889 key
->vm
.atime
= server
.unixtime
;
2891 int notify
= (key
->storage
== REDIS_VM_LOADING
);
2893 /* Our value was swapped on disk. Bring it at home. */
2894 redisAssert(val
== NULL
);
2895 val
= vmLoadObject(key
);
2896 dictGetEntryVal(de
) = val
;
2898 /* Clients blocked by the VM subsystem may be waiting for
2900 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
2909 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
2910 expireIfNeeded(db
,key
);
2911 return lookupKey(db
,key
);
2914 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
2915 deleteIfVolatile(db
,key
);
2916 return lookupKey(db
,key
);
2919 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2920 robj
*o
= lookupKeyRead(c
->db
, key
);
2921 if (!o
) addReply(c
,reply
);
2925 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
2926 robj
*o
= lookupKeyWrite(c
->db
, key
);
2927 if (!o
) addReply(c
,reply
);
2931 static int checkType(redisClient
*c
, robj
*o
, int type
) {
2932 if (o
->type
!= type
) {
2933 addReply(c
,shared
.wrongtypeerr
);
2939 static int deleteKey(redisDb
*db
, robj
*key
) {
2942 /* We need to protect key from destruction: after the first dictDelete()
2943 * it may happen that 'key' is no longer valid if we don't increment
2944 * it's count. This may happen when we get the object reference directly
2945 * from the hash table with dictRandomKey() or dict iterators */
2947 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
);
2948 retval
= dictDelete(db
->dict
,key
);
2951 return retval
== DICT_OK
;
2954 /* Check if the nul-terminated string 's' can be represented by a long
2955 * (that is, is a number that fits into long without any other space or
2956 * character before or after the digits).
2958 * If so, the function returns REDIS_OK and *longval is set to the value
2959 * of the number. Otherwise REDIS_ERR is returned */
2960 static int isStringRepresentableAsLong(sds s
, long *longval
) {
2961 char buf
[32], *endptr
;
2965 value
= strtol(s
, &endptr
, 10);
2966 if (endptr
[0] != '\0') return REDIS_ERR
;
2967 slen
= snprintf(buf
,32,"%ld",value
);
2969 /* If the number converted back into a string is not identical
2970 * then it's not possible to encode the string as integer */
2971 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
2972 if (longval
) *longval
= value
;
2976 /* Try to encode a string object in order to save space */
2977 static robj
*tryObjectEncoding(robj
*o
) {
2981 if (o
->encoding
!= REDIS_ENCODING_RAW
)
2982 return o
; /* Already encoded */
2984 /* It's not safe to encode shared objects: shared objects can be shared
2985 * everywhere in the "object space" of Redis. Encoded objects can only
2986 * appear as "values" (and not, for instance, as keys) */
2987 if (o
->refcount
> 1) return o
;
2989 /* Currently we try to encode only strings */
2990 redisAssert(o
->type
== REDIS_STRING
);
2992 /* Check if we can represent this string as a long integer */
2993 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
2995 /* Ok, this object can be encoded */
2996 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
2998 incrRefCount(shared
.integers
[value
]);
2999 return shared
.integers
[value
];
3001 o
->encoding
= REDIS_ENCODING_INT
;
3003 o
->ptr
= (void*) value
;
3008 /* Get a decoded version of an encoded object (returned as a new object).
3009 * If the object is already raw-encoded just increment the ref count. */
3010 static robj
*getDecodedObject(robj
*o
) {
3013 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3017 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3020 snprintf(buf
,32,"%ld",(long)o
->ptr
);
3021 dec
= createStringObject(buf
,strlen(buf
));
3024 redisAssert(1 != 1);
3028 /* Compare two string objects via strcmp() or alike.
3029 * Note that the objects may be integer-encoded. In such a case we
3030 * use snprintf() to get a string representation of the numbers on the stack
3031 * and compare the strings, it's much faster than calling getDecodedObject().
3033 * Important note: if objects are not integer encoded, but binary-safe strings,
3034 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3036 static int compareStringObjects(robj
*a
, robj
*b
) {
3037 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3038 char bufa
[128], bufb
[128], *astr
, *bstr
;
3041 if (a
== b
) return 0;
3042 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3043 snprintf(bufa
,sizeof(bufa
),"%ld",(long) a
->ptr
);
3049 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3050 snprintf(bufb
,sizeof(bufb
),"%ld",(long) b
->ptr
);
3056 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3059 static size_t stringObjectLen(robj
*o
) {
3060 redisAssert(o
->type
== REDIS_STRING
);
3061 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3062 return sdslen(o
->ptr
);
3066 return snprintf(buf
,32,"%ld",(long)o
->ptr
);
3070 static int getDoubleFromObject(redisClient
*c
, robj
*o
, double *value
) {
3074 if (o
&& o
->type
!= REDIS_STRING
) {
3075 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3081 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3082 parsedValue
= strtod(o
->ptr
, &eptr
);
3083 else if (o
->encoding
== REDIS_ENCODING_INT
)
3084 parsedValue
= (long)o
->ptr
;
3086 redisAssert(1 != 1);
3088 if (eptr
!= NULL
&& *eptr
!= '\0') {
3089 addReplySds(c
,sdsnew("-ERR value is not a double\r\n"));
3093 *value
= parsedValue
;
3098 static int getLongLongFromObject(redisClient
*c
, robj
*o
, long long *value
) {
3099 long long parsedValue
;
3102 if (o
&& o
->type
!= REDIS_STRING
) {
3103 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3109 else if (o
->encoding
== REDIS_ENCODING_RAW
)
3110 parsedValue
= strtoll(o
->ptr
, &eptr
, 10);
3111 else if (o
->encoding
== REDIS_ENCODING_INT
)
3112 parsedValue
= (long)o
->ptr
;
3114 redisAssert(1 != 1);
3116 if (eptr
!= NULL
&& *eptr
!= '\0') {
3117 addReplySds(c
,sdsnew("-ERR value is not an integer\r\n"));
3121 *value
= parsedValue
;
3126 static int getLongFromObject(redisClient
*c
, robj
*o
, long *value
) {
3127 long long actualValue
;
3129 if (getLongLongFromObject(c
, o
, &actualValue
) != REDIS_OK
) return REDIS_ERR
;
3131 if (actualValue
< LONG_MIN
|| actualValue
> LONG_MAX
) {
3132 addReplySds(c
,sdsnew("-ERR value is out of range\r\n"));
3136 *value
= actualValue
;
3141 /*============================ RDB saving/loading =========================== */
3143 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3144 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3148 static int rdbSaveTime(FILE *fp
, time_t t
) {
3149 int32_t t32
= (int32_t) t
;
3150 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3154 /* check rdbLoadLen() comments for more info */
3155 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3156 unsigned char buf
[2];
3159 /* Save a 6 bit len */
3160 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3161 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3162 } else if (len
< (1<<14)) {
3163 /* Save a 14 bit len */
3164 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3166 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3168 /* Save a 32 bit len */
3169 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3170 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3172 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3177 /* String objects in the form "2391" "-100" without any space and with a
3178 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3179 * encoded as integers to save space */
3180 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3182 char *endptr
, buf
[32];
3184 /* Check if it's possible to encode this value as a number */
3185 value
= strtoll(s
, &endptr
, 10);
3186 if (endptr
[0] != '\0') return 0;
3187 snprintf(buf
,32,"%lld",value
);
3189 /* If the number converted back into a string is not identical
3190 * then it's not possible to encode the string as integer */
3191 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3193 /* Finally check if it fits in our ranges */
3194 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3195 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3196 enc
[1] = value
&0xFF;
3198 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3199 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3200 enc
[1] = value
&0xFF;
3201 enc
[2] = (value
>>8)&0xFF;
3203 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3204 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3205 enc
[1] = value
&0xFF;
3206 enc
[2] = (value
>>8)&0xFF;
3207 enc
[3] = (value
>>16)&0xFF;
3208 enc
[4] = (value
>>24)&0xFF;
3215 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3216 size_t comprlen
, outlen
;
3220 /* We require at least four bytes compression for this to be worth it */
3221 if (len
<= 4) return 0;
3223 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3224 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3225 if (comprlen
== 0) {
3229 /* Data compressed! Let's save it on disk */
3230 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3231 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3232 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3233 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3234 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3243 /* Save a string objet as [len][data] on disk. If the object is a string
3244 * representation of an integer value we try to safe it in a special form */
3245 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3248 /* Try integer encoding */
3250 unsigned char buf
[5];
3251 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3252 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3257 /* Try LZF compression - under 20 bytes it's unable to compress even
3258 * aaaaaaaaaaaaaaaaaa so skip it */
3259 if (server
.rdbcompression
&& len
> 20) {
3262 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3263 if (retval
== -1) return -1;
3264 if (retval
> 0) return 0;
3265 /* retval == 0 means data can't be compressed, save the old way */
3268 /* Store verbatim */
3269 if (rdbSaveLen(fp
,len
) == -1) return -1;
3270 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3274 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3275 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3278 /* Avoid incr/decr ref count business when possible.
3279 * This plays well with copy-on-write given that we are probably
3280 * in a child process (BGSAVE). Also this makes sure key objects
3281 * of swapped objects are not incRefCount-ed (an assert does not allow
3282 * this in order to avoid bugs) */
3283 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
3284 obj
= getDecodedObject(obj
);
3285 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3288 retval
= rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3293 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3294 * 8 bit integer specifing the length of the representation.
3295 * This 8 bit integer has special values in order to specify the following
3301 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3302 unsigned char buf
[128];
3308 } else if (!isfinite(val
)) {
3310 buf
[0] = (val
< 0) ? 255 : 254;
3312 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3313 buf
[0] = strlen((char*)buf
+1);
3316 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3320 /* Save a Redis object. */
3321 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3322 if (o
->type
== REDIS_STRING
) {
3323 /* Save a string value */
3324 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3325 } else if (o
->type
== REDIS_LIST
) {
3326 /* Save a list value */
3327 list
*list
= o
->ptr
;
3331 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3332 listRewind(list
,&li
);
3333 while((ln
= listNext(&li
))) {
3334 robj
*eleobj
= listNodeValue(ln
);
3336 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3338 } else if (o
->type
== REDIS_SET
) {
3339 /* Save a set value */
3341 dictIterator
*di
= dictGetIterator(set
);
3344 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3345 while((de
= dictNext(di
)) != NULL
) {
3346 robj
*eleobj
= dictGetEntryKey(de
);
3348 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3350 dictReleaseIterator(di
);
3351 } else if (o
->type
== REDIS_ZSET
) {
3352 /* Save a set value */
3354 dictIterator
*di
= dictGetIterator(zs
->dict
);
3357 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3358 while((de
= dictNext(di
)) != NULL
) {
3359 robj
*eleobj
= dictGetEntryKey(de
);
3360 double *score
= dictGetEntryVal(de
);
3362 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3363 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3365 dictReleaseIterator(di
);
3366 } else if (o
->type
== REDIS_HASH
) {
3367 /* Save a hash value */
3368 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3369 unsigned char *p
= zipmapRewind(o
->ptr
);
3370 unsigned int count
= zipmapLen(o
->ptr
);
3371 unsigned char *key
, *val
;
3372 unsigned int klen
, vlen
;
3374 if (rdbSaveLen(fp
,count
) == -1) return -1;
3375 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3376 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3377 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3380 dictIterator
*di
= dictGetIterator(o
->ptr
);
3383 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3384 while((de
= dictNext(di
)) != NULL
) {
3385 robj
*key
= dictGetEntryKey(de
);
3386 robj
*val
= dictGetEntryVal(de
);
3388 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3389 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3391 dictReleaseIterator(di
);
3399 /* Return the length the object will have on disk if saved with
3400 * the rdbSaveObject() function. Currently we use a trick to get
3401 * this length with very little changes to the code. In the future
3402 * we could switch to a faster solution. */
3403 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3404 if (fp
== NULL
) fp
= server
.devnull
;
3406 assert(rdbSaveObject(fp
,o
) != 1);
3410 /* Return the number of pages required to save this object in the swap file */
3411 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3412 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3414 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3417 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3418 static int rdbSave(char *filename
) {
3419 dictIterator
*di
= NULL
;
3424 time_t now
= time(NULL
);
3426 /* Wait for I/O therads to terminate, just in case this is a
3427 * foreground-saving, to avoid seeking the swap file descriptor at the
3429 if (server
.vm_enabled
)
3430 waitEmptyIOJobsQueue();
3432 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3433 fp
= fopen(tmpfile
,"w");
3435 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3438 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3439 for (j
= 0; j
< server
.dbnum
; j
++) {
3440 redisDb
*db
= server
.db
+j
;
3442 if (dictSize(d
) == 0) continue;
3443 di
= dictGetIterator(d
);
3449 /* Write the SELECT DB opcode */
3450 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3451 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3453 /* Iterate this DB writing every entry */
3454 while((de
= dictNext(di
)) != NULL
) {
3455 robj
*key
= dictGetEntryKey(de
);
3456 robj
*o
= dictGetEntryVal(de
);
3457 time_t expiretime
= getExpire(db
,key
);
3459 /* Save the expire time */
3460 if (expiretime
!= -1) {
3461 /* If this key is already expired skip it */
3462 if (expiretime
< now
) continue;
3463 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3464 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3466 /* Save the key and associated value. This requires special
3467 * handling if the value is swapped out. */
3468 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
3469 key
->storage
== REDIS_VM_SWAPPING
) {
3470 /* Save type, key, value */
3471 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3472 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3473 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3475 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3477 /* Get a preview of the object in memory */
3478 po
= vmPreviewObject(key
);
3479 /* Save type, key, value */
3480 if (rdbSaveType(fp
,key
->vtype
) == -1) goto werr
;
3481 if (rdbSaveStringObject(fp
,key
) == -1) goto werr
;
3482 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3483 /* Remove the loaded object from memory */
3487 dictReleaseIterator(di
);
3490 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3492 /* Make sure data will not remain on the OS's output buffers */
3497 /* Use RENAME to make sure the DB file is changed atomically only
3498 * if the generate DB file is ok. */
3499 if (rename(tmpfile
,filename
) == -1) {
3500 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3504 redisLog(REDIS_NOTICE
,"DB saved on disk");
3506 server
.lastsave
= time(NULL
);
3512 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3513 if (di
) dictReleaseIterator(di
);
3517 static int rdbSaveBackground(char *filename
) {
3520 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3521 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3522 if ((childpid
= fork()) == 0) {
3524 if (server
.vm_enabled
) vmReopenSwapFile();
3526 if (rdbSave(filename
) == REDIS_OK
) {
3533 if (childpid
== -1) {
3534 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3538 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3539 server
.bgsavechildpid
= childpid
;
3540 updateDictResizePolicy();
3543 return REDIS_OK
; /* unreached */
3546 static void rdbRemoveTempFile(pid_t childpid
) {
3549 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3553 static int rdbLoadType(FILE *fp
) {
3555 if (fread(&type
,1,1,fp
) == 0) return -1;
3559 static time_t rdbLoadTime(FILE *fp
) {
3561 if (fread(&t32
,4,1,fp
) == 0) return -1;
3562 return (time_t) t32
;
3565 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3566 * of this file for a description of how this are stored on disk.
3568 * isencoded is set to 1 if the readed length is not actually a length but
3569 * an "encoding type", check the above comments for more info */
3570 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
3571 unsigned char buf
[2];
3575 if (isencoded
) *isencoded
= 0;
3576 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3577 type
= (buf
[0]&0xC0)>>6;
3578 if (type
== REDIS_RDB_6BITLEN
) {
3579 /* Read a 6 bit len */
3581 } else if (type
== REDIS_RDB_ENCVAL
) {
3582 /* Read a 6 bit len encoding type */
3583 if (isencoded
) *isencoded
= 1;
3585 } else if (type
== REDIS_RDB_14BITLEN
) {
3586 /* Read a 14 bit len */
3587 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
3588 return ((buf
[0]&0x3F)<<8)|buf
[1];
3590 /* Read a 32 bit len */
3591 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
3596 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
) {
3597 unsigned char enc
[4];
3600 if (enctype
== REDIS_RDB_ENC_INT8
) {
3601 if (fread(enc
,1,1,fp
) == 0) return NULL
;
3602 val
= (signed char)enc
[0];
3603 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
3605 if (fread(enc
,2,1,fp
) == 0) return NULL
;
3606 v
= enc
[0]|(enc
[1]<<8);
3608 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
3610 if (fread(enc
,4,1,fp
) == 0) return NULL
;
3611 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
3614 val
= 0; /* anti-warning */
3617 return createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",val
));
3620 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
3621 unsigned int len
, clen
;
3622 unsigned char *c
= NULL
;
3625 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3626 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3627 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
3628 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
3629 if (fread(c
,clen
,1,fp
) == 0) goto err
;
3630 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
3632 return createObject(REDIS_STRING
,val
);
3639 static robj
*rdbLoadStringObject(FILE*fp
) {
3644 len
= rdbLoadLen(fp
,&isencoded
);
3647 case REDIS_RDB_ENC_INT8
:
3648 case REDIS_RDB_ENC_INT16
:
3649 case REDIS_RDB_ENC_INT32
:
3650 return rdbLoadIntegerObject(fp
,len
);
3651 case REDIS_RDB_ENC_LZF
:
3652 return rdbLoadLzfStringObject(fp
);
3658 if (len
== REDIS_RDB_LENERR
) return NULL
;
3659 val
= sdsnewlen(NULL
,len
);
3660 if (len
&& fread(val
,len
,1,fp
) == 0) {
3664 return createObject(REDIS_STRING
,val
);
3667 /* For information about double serialization check rdbSaveDoubleValue() */
3668 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
3672 if (fread(&len
,1,1,fp
) == 0) return -1;
3674 case 255: *val
= R_NegInf
; return 0;
3675 case 254: *val
= R_PosInf
; return 0;
3676 case 253: *val
= R_Nan
; return 0;
3678 if (fread(buf
,len
,1,fp
) == 0) return -1;
3680 sscanf(buf
, "%lg", val
);
3685 /* Load a Redis object of the specified type from the specified file.
3686 * On success a newly allocated object is returned, otherwise NULL. */
3687 static robj
*rdbLoadObject(int type
, FILE *fp
) {
3690 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
3691 if (type
== REDIS_STRING
) {
3692 /* Read string value */
3693 if ((o
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3694 o
= tryObjectEncoding(o
);
3695 } else if (type
== REDIS_LIST
|| type
== REDIS_SET
) {
3696 /* Read list/set value */
3699 if ((listlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3700 o
= (type
== REDIS_LIST
) ? createListObject() : createSetObject();
3701 /* It's faster to expand the dict to the right size asap in order
3702 * to avoid rehashing */
3703 if (type
== REDIS_SET
&& listlen
> DICT_HT_INITIAL_SIZE
)
3704 dictExpand(o
->ptr
,listlen
);
3705 /* Load every single element of the list/set */
3709 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3710 ele
= tryObjectEncoding(ele
);
3711 if (type
== REDIS_LIST
) {
3712 listAddNodeTail((list
*)o
->ptr
,ele
);
3714 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
3717 } else if (type
== REDIS_ZSET
) {
3718 /* Read list/set value */
3722 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3723 o
= createZsetObject();
3725 /* Load every single element of the list/set */
3728 double *score
= zmalloc(sizeof(double));
3730 if ((ele
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3731 ele
= tryObjectEncoding(ele
);
3732 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
3733 dictAdd(zs
->dict
,ele
,score
);
3734 zslInsert(zs
->zsl
,*score
,ele
);
3735 incrRefCount(ele
); /* added to skiplist */
3737 } else if (type
== REDIS_HASH
) {
3740 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
3741 o
= createHashObject();
3742 /* Too many entries? Use an hash table. */
3743 if (hashlen
> server
.hash_max_zipmap_entries
)
3744 convertToRealHash(o
);
3745 /* Load every key/value, then set it into the zipmap or hash
3746 * table, as needed. */
3750 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3751 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
3752 /* If we are using a zipmap and there are too big values
3753 * the object is converted to real hash table encoding. */
3754 if (o
->encoding
!= REDIS_ENCODING_HT
&&
3755 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
3756 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
3758 convertToRealHash(o
);
3761 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3762 unsigned char *zm
= o
->ptr
;
3764 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
3765 val
->ptr
,sdslen(val
->ptr
),NULL
);
3770 key
= tryObjectEncoding(key
);
3771 val
= tryObjectEncoding(val
);
3772 dictAdd((dict
*)o
->ptr
,key
,val
);
3781 static int rdbLoad(char *filename
) {
3783 robj
*keyobj
= NULL
;
3785 int type
, retval
, rdbver
;
3786 dict
*d
= server
.db
[0].dict
;
3787 redisDb
*db
= server
.db
+0;
3789 time_t expiretime
= -1, now
= time(NULL
);
3790 long long loadedkeys
= 0;
3792 fp
= fopen(filename
,"r");
3793 if (!fp
) return REDIS_ERR
;
3794 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
3796 if (memcmp(buf
,"REDIS",5) != 0) {
3798 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
3801 rdbver
= atoi(buf
+5);
3804 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
3811 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3812 if (type
== REDIS_EXPIRETIME
) {
3813 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
3814 /* We read the time so we need to read the object type again */
3815 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
3817 if (type
== REDIS_EOF
) break;
3818 /* Handle SELECT DB opcode as a special case */
3819 if (type
== REDIS_SELECTDB
) {
3820 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
3822 if (dbid
>= (unsigned)server
.dbnum
) {
3823 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
3826 db
= server
.db
+dbid
;
3831 if ((keyobj
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
3833 if ((o
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
3834 /* Add the new object in the hash table */
3835 retval
= dictAdd(d
,keyobj
,o
);
3836 if (retval
== DICT_ERR
) {
3837 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj
->ptr
);
3840 /* Set the expire time if needed */
3841 if (expiretime
!= -1) {
3842 setExpire(db
,keyobj
,expiretime
);
3843 /* Delete this key if already expired */
3844 if (expiretime
< now
) deleteKey(db
,keyobj
);
3848 /* Handle swapping while loading big datasets when VM is on */
3850 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
3851 while (zmalloc_used_memory() > server
.vm_max_memory
) {
3852 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
3859 eoferr
: /* unexpected end of file is handled here with a fatal exit */
3860 if (keyobj
) decrRefCount(keyobj
);
3861 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
3863 return REDIS_ERR
; /* Just to avoid warning */
3866 /*================================== Commands =============================== */
3868 static void authCommand(redisClient
*c
) {
3869 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
3870 c
->authenticated
= 1;
3871 addReply(c
,shared
.ok
);
3873 c
->authenticated
= 0;
3874 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
3878 static void pingCommand(redisClient
*c
) {
3879 addReply(c
,shared
.pong
);
3882 static void echoCommand(redisClient
*c
) {
3883 addReplyBulk(c
,c
->argv
[1]);
3886 /*=================================== Strings =============================== */
3888 static void setGenericCommand(redisClient
*c
, int nx
) {
3891 if (nx
) deleteIfVolatile(c
->db
,c
->argv
[1]);
3892 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3893 if (retval
== DICT_ERR
) {
3895 /* If the key is about a swapped value, we want a new key object
3896 * to overwrite the old. So we delete the old key in the database.
3897 * This will also make sure that swap pages about the old object
3898 * will be marked as free. */
3899 if (server
.vm_enabled
&& deleteIfSwapped(c
->db
,c
->argv
[1]))
3900 incrRefCount(c
->argv
[1]);
3901 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3902 incrRefCount(c
->argv
[2]);
3904 addReply(c
,shared
.czero
);
3908 incrRefCount(c
->argv
[1]);
3909 incrRefCount(c
->argv
[2]);
3912 removeExpire(c
->db
,c
->argv
[1]);
3913 addReply(c
, nx
? shared
.cone
: shared
.ok
);
3916 static void setCommand(redisClient
*c
) {
3917 setGenericCommand(c
,0);
3920 static void setnxCommand(redisClient
*c
) {
3921 setGenericCommand(c
,1);
3924 static int getGenericCommand(redisClient
*c
) {
3927 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
3930 if (o
->type
!= REDIS_STRING
) {
3931 addReply(c
,shared
.wrongtypeerr
);
3939 static void getCommand(redisClient
*c
) {
3940 getGenericCommand(c
);
3943 static void getsetCommand(redisClient
*c
) {
3944 if (getGenericCommand(c
) == REDIS_ERR
) return;
3945 if (dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]) == DICT_ERR
) {
3946 dictReplace(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
3948 incrRefCount(c
->argv
[1]);
3950 incrRefCount(c
->argv
[2]);
3952 removeExpire(c
->db
,c
->argv
[1]);
3955 static void mgetCommand(redisClient
*c
) {
3958 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
3959 for (j
= 1; j
< c
->argc
; j
++) {
3960 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
3962 addReply(c
,shared
.nullbulk
);
3964 if (o
->type
!= REDIS_STRING
) {
3965 addReply(c
,shared
.nullbulk
);
3973 static void msetGenericCommand(redisClient
*c
, int nx
) {
3974 int j
, busykeys
= 0;
3976 if ((c
->argc
% 2) == 0) {
3977 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
3980 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3981 * set nothing at all if at least one already key exists. */
3983 for (j
= 1; j
< c
->argc
; j
+= 2) {
3984 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
3990 addReply(c
, shared
.czero
);
3994 for (j
= 1; j
< c
->argc
; j
+= 2) {
3997 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
3998 retval
= dictAdd(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
3999 if (retval
== DICT_ERR
) {
4000 dictReplace(c
->db
->dict
,c
->argv
[j
],c
->argv
[j
+1]);
4001 incrRefCount(c
->argv
[j
+1]);
4003 incrRefCount(c
->argv
[j
]);
4004 incrRefCount(c
->argv
[j
+1]);
4006 removeExpire(c
->db
,c
->argv
[j
]);
4008 server
.dirty
+= (c
->argc
-1)/2;
4009 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4012 static void msetCommand(redisClient
*c
) {
4013 msetGenericCommand(c
,0);
4016 static void msetnxCommand(redisClient
*c
) {
4017 msetGenericCommand(c
,1);
4020 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4025 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4027 if (getLongLongFromObject(c
, o
, &value
) != REDIS_OK
) return;
4030 o
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
4031 o
= tryObjectEncoding(o
);
4032 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],o
);
4033 if (retval
== DICT_ERR
) {
4034 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4035 removeExpire(c
->db
,c
->argv
[1]);
4037 incrRefCount(c
->argv
[1]);
4040 addReply(c
,shared
.colon
);
4042 addReply(c
,shared
.crlf
);
4045 static void incrCommand(redisClient
*c
) {
4046 incrDecrCommand(c
,1);
4049 static void decrCommand(redisClient
*c
) {
4050 incrDecrCommand(c
,-1);
4053 static void incrbyCommand(redisClient
*c
) {
4056 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4058 incrDecrCommand(c
,incr
);
4061 static void decrbyCommand(redisClient
*c
) {
4064 if (getLongLongFromObject(c
, c
->argv
[2], &incr
) != REDIS_OK
) return;
4066 incrDecrCommand(c
,-incr
);
4069 static void appendCommand(redisClient
*c
) {
4074 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4076 /* Create the key */
4077 retval
= dictAdd(c
->db
->dict
,c
->argv
[1],c
->argv
[2]);
4078 incrRefCount(c
->argv
[1]);
4079 incrRefCount(c
->argv
[2]);
4080 totlen
= stringObjectLen(c
->argv
[2]);
4084 de
= dictFind(c
->db
->dict
,c
->argv
[1]);
4087 o
= dictGetEntryVal(de
);
4088 if (o
->type
!= REDIS_STRING
) {
4089 addReply(c
,shared
.wrongtypeerr
);
4092 /* If the object is specially encoded or shared we have to make
4094 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4095 robj
*decoded
= getDecodedObject(o
);
4097 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4098 decrRefCount(decoded
);
4099 dictReplace(c
->db
->dict
,c
->argv
[1],o
);
4102 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4103 o
->ptr
= sdscatlen(o
->ptr
,
4104 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4106 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4107 (unsigned long) c
->argv
[2]->ptr
);
4109 totlen
= sdslen(o
->ptr
);
4112 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4115 static void substrCommand(redisClient
*c
) {
4117 long start
= atoi(c
->argv
[2]->ptr
);
4118 long end
= atoi(c
->argv
[3]->ptr
);
4119 size_t rangelen
, strlen
;
4122 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4123 checkType(c
,o
,REDIS_STRING
)) return;
4125 o
= getDecodedObject(o
);
4126 strlen
= sdslen(o
->ptr
);
4128 /* convert negative indexes */
4129 if (start
< 0) start
= strlen
+start
;
4130 if (end
< 0) end
= strlen
+end
;
4131 if (start
< 0) start
= 0;
4132 if (end
< 0) end
= 0;
4134 /* indexes sanity checks */
4135 if (start
> end
|| (size_t)start
>= strlen
) {
4136 /* Out of range start or start > end result in null reply */
4137 addReply(c
,shared
.nullbulk
);
4141 if ((size_t)end
>= strlen
) end
= strlen
-1;
4142 rangelen
= (end
-start
)+1;
4144 /* Return the result */
4145 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4146 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4147 addReplySds(c
,range
);
4148 addReply(c
,shared
.crlf
);
4152 /* ========================= Type agnostic commands ========================= */
4154 static void delCommand(redisClient
*c
) {
4157 for (j
= 1; j
< c
->argc
; j
++) {
4158 if (deleteKey(c
->db
,c
->argv
[j
])) {
4163 addReplyLong(c
,deleted
);
4166 static void existsCommand(redisClient
*c
) {
4167 addReply(c
,lookupKeyRead(c
->db
,c
->argv
[1]) ? shared
.cone
: shared
.czero
);
4170 static void selectCommand(redisClient
*c
) {
4171 int id
= atoi(c
->argv
[1]->ptr
);
4173 if (selectDb(c
,id
) == REDIS_ERR
) {
4174 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4176 addReply(c
,shared
.ok
);
4180 static void randomkeyCommand(redisClient
*c
) {
4184 de
= dictGetRandomKey(c
->db
->dict
);
4185 if (!de
|| expireIfNeeded(c
->db
,dictGetEntryKey(de
)) == 0) break;
4188 addReply(c
,shared
.plus
);
4189 addReply(c
,shared
.crlf
);
4191 addReply(c
,shared
.plus
);
4192 addReply(c
,dictGetEntryKey(de
));
4193 addReply(c
,shared
.crlf
);
4197 static void keysCommand(redisClient
*c
) {
4200 sds pattern
= c
->argv
[1]->ptr
;
4201 int plen
= sdslen(pattern
);
4202 unsigned long numkeys
= 0;
4203 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4205 di
= dictGetIterator(c
->db
->dict
);
4207 decrRefCount(lenobj
);
4208 while((de
= dictNext(di
)) != NULL
) {
4209 robj
*keyobj
= dictGetEntryKey(de
);
4211 sds key
= keyobj
->ptr
;
4212 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4213 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4214 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4215 addReplyBulk(c
,keyobj
);
4220 dictReleaseIterator(di
);
4221 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4224 static void dbsizeCommand(redisClient
*c
) {
4226 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4229 static void lastsaveCommand(redisClient
*c
) {
4231 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4234 static void typeCommand(redisClient
*c
) {
4238 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4243 case REDIS_STRING
: type
= "+string"; break;
4244 case REDIS_LIST
: type
= "+list"; break;
4245 case REDIS_SET
: type
= "+set"; break;
4246 case REDIS_ZSET
: type
= "+zset"; break;
4247 case REDIS_HASH
: type
= "+hash"; break;
4248 default: type
= "+unknown"; break;
4251 addReplySds(c
,sdsnew(type
));
4252 addReply(c
,shared
.crlf
);
4255 static void saveCommand(redisClient
*c
) {
4256 if (server
.bgsavechildpid
!= -1) {
4257 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4260 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4261 addReply(c
,shared
.ok
);
4263 addReply(c
,shared
.err
);
4267 static void bgsaveCommand(redisClient
*c
) {
4268 if (server
.bgsavechildpid
!= -1) {
4269 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4272 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4273 char *status
= "+Background saving started\r\n";
4274 addReplySds(c
,sdsnew(status
));
4276 addReply(c
,shared
.err
);
4280 static void shutdownCommand(redisClient
*c
) {
4281 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4282 /* Kill the saving child if there is a background saving in progress.
4283 We want to avoid race conditions, for instance our saving child may
4284 overwrite the synchronous saving did by SHUTDOWN. */
4285 if (server
.bgsavechildpid
!= -1) {
4286 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4287 kill(server
.bgsavechildpid
,SIGKILL
);
4288 rdbRemoveTempFile(server
.bgsavechildpid
);
4290 if (server
.appendonly
) {
4291 /* Append only file: fsync() the AOF and exit */
4292 fsync(server
.appendfd
);
4293 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4296 /* Snapshotting. Perform a SYNC SAVE and exit */
4297 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4298 if (server
.daemonize
)
4299 unlink(server
.pidfile
);
4300 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4301 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4302 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4305 /* Ooops.. error saving! The best we can do is to continue
4306 * operating. Note that if there was a background saving process,
4307 * in the next cron() Redis will be notified that the background
4308 * saving aborted, handling special stuff like slaves pending for
4309 * synchronization... */
4310 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4312 sdsnew("-ERR can't quit, problems saving the DB\r\n"));
4317 static void renameGenericCommand(redisClient
*c
, int nx
) {
4320 /* To use the same key as src and dst is probably an error */
4321 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4322 addReply(c
,shared
.sameobjecterr
);
4326 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4330 deleteIfVolatile(c
->db
,c
->argv
[2]);
4331 if (dictAdd(c
->db
->dict
,c
->argv
[2],o
) == DICT_ERR
) {
4334 addReply(c
,shared
.czero
);
4337 dictReplace(c
->db
->dict
,c
->argv
[2],o
);
4339 incrRefCount(c
->argv
[2]);
4341 deleteKey(c
->db
,c
->argv
[1]);
4343 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4346 static void renameCommand(redisClient
*c
) {
4347 renameGenericCommand(c
,0);
4350 static void renamenxCommand(redisClient
*c
) {
4351 renameGenericCommand(c
,1);
4354 static void moveCommand(redisClient
*c
) {
4359 /* Obtain source and target DB pointers */
4362 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4363 addReply(c
,shared
.outofrangeerr
);
4367 selectDb(c
,srcid
); /* Back to the source DB */
4369 /* If the user is moving using as target the same
4370 * DB as the source DB it is probably an error. */
4372 addReply(c
,shared
.sameobjecterr
);
4376 /* Check if the element exists and get a reference */
4377 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4379 addReply(c
,shared
.czero
);
4383 /* Try to add the element to the target DB */
4384 deleteIfVolatile(dst
,c
->argv
[1]);
4385 if (dictAdd(dst
->dict
,c
->argv
[1],o
) == DICT_ERR
) {
4386 addReply(c
,shared
.czero
);
4389 incrRefCount(c
->argv
[1]);
4392 /* OK! key moved, free the entry in the source DB */
4393 deleteKey(src
,c
->argv
[1]);
4395 addReply(c
,shared
.cone
);
4398 /* =================================== Lists ================================ */
4399 static void pushGenericCommand(redisClient
*c
, int where
) {
4403 lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4405 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4406 addReply(c
,shared
.cone
);
4409 lobj
= createListObject();
4411 if (where
== REDIS_HEAD
) {
4412 listAddNodeHead(list
,c
->argv
[2]);
4414 listAddNodeTail(list
,c
->argv
[2]);
4416 dictAdd(c
->db
->dict
,c
->argv
[1],lobj
);
4417 incrRefCount(c
->argv
[1]);
4418 incrRefCount(c
->argv
[2]);
4420 if (lobj
->type
!= REDIS_LIST
) {
4421 addReply(c
,shared
.wrongtypeerr
);
4424 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
4425 addReply(c
,shared
.cone
);
4429 if (where
== REDIS_HEAD
) {
4430 listAddNodeHead(list
,c
->argv
[2]);
4432 listAddNodeTail(list
,c
->argv
[2]);
4434 incrRefCount(c
->argv
[2]);
4437 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",listLength(list
)));
4440 static void lpushCommand(redisClient
*c
) {
4441 pushGenericCommand(c
,REDIS_HEAD
);
4444 static void rpushCommand(redisClient
*c
) {
4445 pushGenericCommand(c
,REDIS_TAIL
);
4448 static void llenCommand(redisClient
*c
) {
4452 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4453 checkType(c
,o
,REDIS_LIST
)) return;
4456 addReplyUlong(c
,listLength(l
));
4459 static void lindexCommand(redisClient
*c
) {
4461 int index
= atoi(c
->argv
[2]->ptr
);
4465 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4466 checkType(c
,o
,REDIS_LIST
)) return;
4469 ln
= listIndex(list
, index
);
4471 addReply(c
,shared
.nullbulk
);
4473 robj
*ele
= listNodeValue(ln
);
4474 addReplyBulk(c
,ele
);
4478 static void lsetCommand(redisClient
*c
) {
4480 int index
= atoi(c
->argv
[2]->ptr
);
4484 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
||
4485 checkType(c
,o
,REDIS_LIST
)) return;
4488 ln
= listIndex(list
, index
);
4490 addReply(c
,shared
.outofrangeerr
);
4492 robj
*ele
= listNodeValue(ln
);
4495 listNodeValue(ln
) = c
->argv
[3];
4496 incrRefCount(c
->argv
[3]);
4497 addReply(c
,shared
.ok
);
4502 static void popGenericCommand(redisClient
*c
, int where
) {
4507 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4508 checkType(c
,o
,REDIS_LIST
)) return;
4511 if (where
== REDIS_HEAD
)
4512 ln
= listFirst(list
);
4514 ln
= listLast(list
);
4517 addReply(c
,shared
.nullbulk
);
4519 robj
*ele
= listNodeValue(ln
);
4520 addReplyBulk(c
,ele
);
4521 listDelNode(list
,ln
);
4522 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4527 static void lpopCommand(redisClient
*c
) {
4528 popGenericCommand(c
,REDIS_HEAD
);
4531 static void rpopCommand(redisClient
*c
) {
4532 popGenericCommand(c
,REDIS_TAIL
);
4535 static void lrangeCommand(redisClient
*c
) {
4537 int start
= atoi(c
->argv
[2]->ptr
);
4538 int end
= atoi(c
->argv
[3]->ptr
);
4545 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
4546 || checkType(c
,o
,REDIS_LIST
)) return;
4548 llen
= listLength(list
);
4550 /* convert negative indexes */
4551 if (start
< 0) start
= llen
+start
;
4552 if (end
< 0) end
= llen
+end
;
4553 if (start
< 0) start
= 0;
4554 if (end
< 0) end
= 0;
4556 /* indexes sanity checks */
4557 if (start
> end
|| start
>= llen
) {
4558 /* Out of range start or start > end result in empty list */
4559 addReply(c
,shared
.emptymultibulk
);
4562 if (end
>= llen
) end
= llen
-1;
4563 rangelen
= (end
-start
)+1;
4565 /* Return the result in form of a multi-bulk reply */
4566 ln
= listIndex(list
, start
);
4567 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
4568 for (j
= 0; j
< rangelen
; j
++) {
4569 ele
= listNodeValue(ln
);
4570 addReplyBulk(c
,ele
);
4575 static void ltrimCommand(redisClient
*c
) {
4577 int start
= atoi(c
->argv
[2]->ptr
);
4578 int end
= atoi(c
->argv
[3]->ptr
);
4580 int j
, ltrim
, rtrim
;
4584 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
4585 checkType(c
,o
,REDIS_LIST
)) return;
4587 llen
= listLength(list
);
4589 /* convert negative indexes */
4590 if (start
< 0) start
= llen
+start
;
4591 if (end
< 0) end
= llen
+end
;
4592 if (start
< 0) start
= 0;
4593 if (end
< 0) end
= 0;
4595 /* indexes sanity checks */
4596 if (start
> end
|| start
>= llen
) {
4597 /* Out of range start or start > end result in empty list */
4601 if (end
>= llen
) end
= llen
-1;
4606 /* Remove list elements to perform the trim */
4607 for (j
= 0; j
< ltrim
; j
++) {
4608 ln
= listFirst(list
);
4609 listDelNode(list
,ln
);
4611 for (j
= 0; j
< rtrim
; j
++) {
4612 ln
= listLast(list
);
4613 listDelNode(list
,ln
);
4615 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4617 addReply(c
,shared
.ok
);
4620 static void lremCommand(redisClient
*c
) {
4623 listNode
*ln
, *next
;
4624 int toremove
= atoi(c
->argv
[2]->ptr
);
4628 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4629 checkType(c
,o
,REDIS_LIST
)) return;
4633 toremove
= -toremove
;
4636 ln
= fromtail
? list
->tail
: list
->head
;
4638 robj
*ele
= listNodeValue(ln
);
4640 next
= fromtail
? ln
->prev
: ln
->next
;
4641 if (compareStringObjects(ele
,c
->argv
[3]) == 0) {
4642 listDelNode(list
,ln
);
4645 if (toremove
&& removed
== toremove
) break;
4649 if (listLength(list
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4650 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
4653 /* This is the semantic of this command:
4654 * RPOPLPUSH srclist dstlist:
4655 * IF LLEN(srclist) > 0
4656 * element = RPOP srclist
4657 * LPUSH dstlist element
4664 * The idea is to be able to get an element from a list in a reliable way
4665 * since the element is not just returned but pushed against another list
4666 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4668 static void rpoplpushcommand(redisClient
*c
) {
4673 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4674 checkType(c
,sobj
,REDIS_LIST
)) return;
4675 srclist
= sobj
->ptr
;
4676 ln
= listLast(srclist
);
4679 addReply(c
,shared
.nullbulk
);
4681 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4682 robj
*ele
= listNodeValue(ln
);
4685 if (dobj
&& dobj
->type
!= REDIS_LIST
) {
4686 addReply(c
,shared
.wrongtypeerr
);
4690 /* Add the element to the target list (unless it's directly
4691 * passed to some BLPOP-ing client */
4692 if (!handleClientsWaitingListPush(c
,c
->argv
[2],ele
)) {
4694 /* Create the list if the key does not exist */
4695 dobj
= createListObject();
4696 dictAdd(c
->db
->dict
,c
->argv
[2],dobj
);
4697 incrRefCount(c
->argv
[2]);
4699 dstlist
= dobj
->ptr
;
4700 listAddNodeHead(dstlist
,ele
);
4704 /* Send the element to the client as reply as well */
4705 addReplyBulk(c
,ele
);
4707 /* Finally remove the element from the source list */
4708 listDelNode(srclist
,ln
);
4709 if (listLength(srclist
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4714 /* ==================================== Sets ================================ */
4716 static void saddCommand(redisClient
*c
) {
4719 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4721 set
= createSetObject();
4722 dictAdd(c
->db
->dict
,c
->argv
[1],set
);
4723 incrRefCount(c
->argv
[1]);
4725 if (set
->type
!= REDIS_SET
) {
4726 addReply(c
,shared
.wrongtypeerr
);
4730 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
4731 incrRefCount(c
->argv
[2]);
4733 addReply(c
,shared
.cone
);
4735 addReply(c
,shared
.czero
);
4739 static void sremCommand(redisClient
*c
) {
4742 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4743 checkType(c
,set
,REDIS_SET
)) return;
4745 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
4747 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4748 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4749 addReply(c
,shared
.cone
);
4751 addReply(c
,shared
.czero
);
4755 static void smoveCommand(redisClient
*c
) {
4756 robj
*srcset
, *dstset
;
4758 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4759 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
4761 /* If the source key does not exist return 0, if it's of the wrong type
4763 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
4764 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
4767 /* Error if the destination key is not a set as well */
4768 if (dstset
&& dstset
->type
!= REDIS_SET
) {
4769 addReply(c
,shared
.wrongtypeerr
);
4772 /* Remove the element from the source set */
4773 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
4774 /* Key not found in the src set! return zero */
4775 addReply(c
,shared
.czero
);
4778 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
4779 deleteKey(c
->db
,c
->argv
[1]);
4781 /* Add the element to the destination set */
4783 dstset
= createSetObject();
4784 dictAdd(c
->db
->dict
,c
->argv
[2],dstset
);
4785 incrRefCount(c
->argv
[2]);
4787 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
4788 incrRefCount(c
->argv
[3]);
4789 addReply(c
,shared
.cone
);
4792 static void sismemberCommand(redisClient
*c
) {
4795 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4796 checkType(c
,set
,REDIS_SET
)) return;
4798 if (dictFind(set
->ptr
,c
->argv
[2]))
4799 addReply(c
,shared
.cone
);
4801 addReply(c
,shared
.czero
);
4804 static void scardCommand(redisClient
*c
) {
4808 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
4809 checkType(c
,o
,REDIS_SET
)) return;
4812 addReplyUlong(c
,dictSize(s
));
4815 static void spopCommand(redisClient
*c
) {
4819 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4820 checkType(c
,set
,REDIS_SET
)) return;
4822 de
= dictGetRandomKey(set
->ptr
);
4824 addReply(c
,shared
.nullbulk
);
4826 robj
*ele
= dictGetEntryKey(de
);
4828 addReplyBulk(c
,ele
);
4829 dictDelete(set
->ptr
,ele
);
4830 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
4831 if (dictSize((dict
*)set
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
4836 static void srandmemberCommand(redisClient
*c
) {
4840 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4841 checkType(c
,set
,REDIS_SET
)) return;
4843 de
= dictGetRandomKey(set
->ptr
);
4845 addReply(c
,shared
.nullbulk
);
4847 robj
*ele
= dictGetEntryKey(de
);
4849 addReplyBulk(c
,ele
);
4853 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
4854 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
4856 return dictSize(*d1
)-dictSize(*d2
);
4859 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
4860 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4863 robj
*lenobj
= NULL
, *dstset
= NULL
;
4864 unsigned long j
, cardinality
= 0;
4866 for (j
= 0; j
< setsnum
; j
++) {
4870 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4871 lookupKeyRead(c
->db
,setskeys
[j
]);
4875 if (deleteKey(c
->db
,dstkey
))
4877 addReply(c
,shared
.czero
);
4879 addReply(c
,shared
.emptymultibulk
);
4883 if (setobj
->type
!= REDIS_SET
) {
4885 addReply(c
,shared
.wrongtypeerr
);
4888 dv
[j
] = setobj
->ptr
;
4890 /* Sort sets from the smallest to largest, this will improve our
4891 * algorithm's performace */
4892 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
4894 /* The first thing we should output is the total number of elements...
4895 * since this is a multi-bulk write, but at this stage we don't know
4896 * the intersection set size, so we use a trick, append an empty object
4897 * to the output list and save the pointer to later modify it with the
4900 lenobj
= createObject(REDIS_STRING
,NULL
);
4902 decrRefCount(lenobj
);
4904 /* If we have a target key where to store the resulting set
4905 * create this key with an empty set inside */
4906 dstset
= createSetObject();
4909 /* Iterate all the elements of the first (smallest) set, and test
4910 * the element against all the other sets, if at least one set does
4911 * not include the element it is discarded */
4912 di
= dictGetIterator(dv
[0]);
4914 while((de
= dictNext(di
)) != NULL
) {
4917 for (j
= 1; j
< setsnum
; j
++)
4918 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
4920 continue; /* at least one set does not contain the member */
4921 ele
= dictGetEntryKey(de
);
4923 addReplyBulk(c
,ele
);
4926 dictAdd(dstset
->ptr
,ele
,NULL
);
4930 dictReleaseIterator(di
);
4933 /* Store the resulting set into the target, if the intersection
4934 * is not an empty set. */
4935 deleteKey(c
->db
,dstkey
);
4936 if (dictSize((dict
*)dstset
->ptr
) > 0) {
4937 dictAdd(c
->db
->dict
,dstkey
,dstset
);
4938 incrRefCount(dstkey
);
4939 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
4941 decrRefCount(dstset
);
4942 addReply(c
,shared
.czero
);
4946 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
4951 static void sinterCommand(redisClient
*c
) {
4952 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
4955 static void sinterstoreCommand(redisClient
*c
) {
4956 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
4959 #define REDIS_OP_UNION 0
4960 #define REDIS_OP_DIFF 1
4961 #define REDIS_OP_INTER 2
4963 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
4964 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
4967 robj
*dstset
= NULL
;
4968 int j
, cardinality
= 0;
4970 for (j
= 0; j
< setsnum
; j
++) {
4974 lookupKeyWrite(c
->db
,setskeys
[j
]) :
4975 lookupKeyRead(c
->db
,setskeys
[j
]);
4980 if (setobj
->type
!= REDIS_SET
) {
4982 addReply(c
,shared
.wrongtypeerr
);
4985 dv
[j
] = setobj
->ptr
;
4988 /* We need a temp set object to store our union. If the dstkey
4989 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4990 * this set object will be the resulting object to set into the target key*/
4991 dstset
= createSetObject();
4993 /* Iterate all the elements of all the sets, add every element a single
4994 * time to the result set */
4995 for (j
= 0; j
< setsnum
; j
++) {
4996 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
4997 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
4999 di
= dictGetIterator(dv
[j
]);
5001 while((de
= dictNext(di
)) != NULL
) {
5004 /* dictAdd will not add the same element multiple times */
5005 ele
= dictGetEntryKey(de
);
5006 if (op
== REDIS_OP_UNION
|| j
== 0) {
5007 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5011 } else if (op
== REDIS_OP_DIFF
) {
5012 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5017 dictReleaseIterator(di
);
5019 /* result set is empty? Exit asap. */
5020 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5023 /* Output the content of the resulting set, if not in STORE mode */
5025 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5026 di
= dictGetIterator(dstset
->ptr
);
5027 while((de
= dictNext(di
)) != NULL
) {
5030 ele
= dictGetEntryKey(de
);
5031 addReplyBulk(c
,ele
);
5033 dictReleaseIterator(di
);
5034 decrRefCount(dstset
);
5036 /* If we have a target key where to store the resulting set
5037 * create this key with the result set inside */
5038 deleteKey(c
->db
,dstkey
);
5039 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5040 dictAdd(c
->db
->dict
,dstkey
,dstset
);
5041 incrRefCount(dstkey
);
5042 addReplyLong(c
,dictSize((dict
*)dstset
->ptr
));
5044 decrRefCount(dstset
);
5045 addReply(c
,shared
.czero
);
5052 static void sunionCommand(redisClient
*c
) {
5053 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5056 static void sunionstoreCommand(redisClient
*c
) {
5057 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5060 static void sdiffCommand(redisClient
*c
) {
5061 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5064 static void sdiffstoreCommand(redisClient
*c
) {
5065 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5068 /* ==================================== ZSets =============================== */
5070 /* ZSETs are ordered sets using two data structures to hold the same elements
5071 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5074 * The elements are added to an hash table mapping Redis objects to scores.
5075 * At the same time the elements are added to a skip list mapping scores
5076 * to Redis objects (so objects are sorted by scores in this "view"). */
5078 /* This skiplist implementation is almost a C translation of the original
5079 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5080 * Alternative to Balanced Trees", modified in three ways:
5081 * a) this implementation allows for repeated values.
5082 * b) the comparison is not just by key (our 'score') but by satellite data.
5083 * c) there is a back pointer, so it's a doubly linked list with the back
5084 * pointers being only at "level 1". This allows to traverse the list
5085 * from tail to head, useful for ZREVRANGE. */
5087 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5088 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5090 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5092 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5098 static zskiplist
*zslCreate(void) {
5102 zsl
= zmalloc(sizeof(*zsl
));
5105 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5106 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5107 zsl
->header
->forward
[j
] = NULL
;
5109 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5110 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5111 zsl
->header
->span
[j
] = 0;
5113 zsl
->header
->backward
= NULL
;
5118 static void zslFreeNode(zskiplistNode
*node
) {
5119 decrRefCount(node
->obj
);
5120 zfree(node
->forward
);
5125 static void zslFree(zskiplist
*zsl
) {
5126 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5128 zfree(zsl
->header
->forward
);
5129 zfree(zsl
->header
->span
);
5132 next
= node
->forward
[0];
5139 static int zslRandomLevel(void) {
5141 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5143 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5146 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5147 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5148 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5152 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5153 /* store rank that is crossed to reach the insert position */
5154 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5156 while (x
->forward
[i
] &&
5157 (x
->forward
[i
]->score
< score
||
5158 (x
->forward
[i
]->score
== score
&&
5159 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5160 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5165 /* we assume the key is not already inside, since we allow duplicated
5166 * scores, and the re-insertion of score and redis object should never
5167 * happpen since the caller of zslInsert() should test in the hash table
5168 * if the element is already inside or not. */
5169 level
= zslRandomLevel();
5170 if (level
> zsl
->level
) {
5171 for (i
= zsl
->level
; i
< level
; i
++) {
5173 update
[i
] = zsl
->header
;
5174 update
[i
]->span
[i
-1] = zsl
->length
;
5178 x
= zslCreateNode(level
,score
,obj
);
5179 for (i
= 0; i
< level
; i
++) {
5180 x
->forward
[i
] = update
[i
]->forward
[i
];
5181 update
[i
]->forward
[i
] = x
;
5183 /* update span covered by update[i] as x is inserted here */
5185 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5186 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5190 /* increment span for untouched levels */
5191 for (i
= level
; i
< zsl
->level
; i
++) {
5192 update
[i
]->span
[i
-1]++;
5195 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5197 x
->forward
[0]->backward
= x
;
5203 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5204 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5206 for (i
= 0; i
< zsl
->level
; i
++) {
5207 if (update
[i
]->forward
[i
] == x
) {
5209 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5211 update
[i
]->forward
[i
] = x
->forward
[i
];
5213 /* invariant: i > 0, because update[0]->forward[0]
5214 * is always equal to x */
5215 update
[i
]->span
[i
-1] -= 1;
5218 if (x
->forward
[0]) {
5219 x
->forward
[0]->backward
= x
->backward
;
5221 zsl
->tail
= x
->backward
;
5223 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5228 /* Delete an element with matching score/object from the skiplist. */
5229 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5230 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5234 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5235 while (x
->forward
[i
] &&
5236 (x
->forward
[i
]->score
< score
||
5237 (x
->forward
[i
]->score
== score
&&
5238 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5242 /* We may have multiple elements with the same score, what we need
5243 * is to find the element with both the right score and object. */
5245 if (x
&& score
== x
->score
&& compareStringObjects(x
->obj
,obj
) == 0) {
5246 zslDeleteNode(zsl
, x
, update
);
5250 return 0; /* not found */
5252 return 0; /* not found */
5255 /* Delete all the elements with score between min and max from the skiplist.
5256 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5257 * Note that this function takes the reference to the hash table view of the
5258 * sorted set, in order to remove the elements from the hash table too. */
5259 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5260 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5261 unsigned long removed
= 0;
5265 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5266 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5270 /* We may have multiple elements with the same score, what we need
5271 * is to find the element with both the right score and object. */
5273 while (x
&& x
->score
<= max
) {
5274 zskiplistNode
*next
= x
->forward
[0];
5275 zslDeleteNode(zsl
, x
, update
);
5276 dictDelete(dict
,x
->obj
);
5281 return removed
; /* not found */
5284 /* Delete all the elements with rank between start and end from the skiplist.
5285 * Start and end are inclusive. Note that start and end need to be 1-based */
5286 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5287 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5288 unsigned long traversed
= 0, removed
= 0;
5292 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5293 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5294 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5302 while (x
&& traversed
<= end
) {
5303 zskiplistNode
*next
= x
->forward
[0];
5304 zslDeleteNode(zsl
, x
, update
);
5305 dictDelete(dict
,x
->obj
);
5314 /* Find the first node having a score equal or greater than the specified one.
5315 * Returns NULL if there is no match. */
5316 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5321 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5322 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
5325 /* We may have multiple elements with the same score, what we need
5326 * is to find the element with both the right score and object. */
5327 return x
->forward
[0];
5330 /* Find the rank for an element by both score and key.
5331 * Returns 0 when the element cannot be found, rank otherwise.
5332 * Note that the rank is 1-based due to the span of zsl->header to the
5334 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
5336 unsigned long rank
= 0;
5340 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5341 while (x
->forward
[i
] &&
5342 (x
->forward
[i
]->score
< score
||
5343 (x
->forward
[i
]->score
== score
&&
5344 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
5345 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
5349 /* x might be equal to zsl->header, so test if obj is non-NULL */
5350 if (x
->obj
&& compareStringObjects(x
->obj
,o
) == 0) {
5357 /* Finds an element by its rank. The rank argument needs to be 1-based. */
5358 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
5360 unsigned long traversed
= 0;
5364 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5365 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
5367 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5370 if (traversed
== rank
) {
5377 /* The actual Z-commands implementations */
5379 /* This generic command implements both ZADD and ZINCRBY.
5380 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
5381 * the increment if the operation is a ZINCRBY (doincrement == 1). */
5382 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
5387 zsetobj
= lookupKeyWrite(c
->db
,key
);
5388 if (zsetobj
== NULL
) {
5389 zsetobj
= createZsetObject();
5390 dictAdd(c
->db
->dict
,key
,zsetobj
);
5393 if (zsetobj
->type
!= REDIS_ZSET
) {
5394 addReply(c
,shared
.wrongtypeerr
);
5400 /* Ok now since we implement both ZADD and ZINCRBY here the code
5401 * needs to handle the two different conditions. It's all about setting
5402 * '*score', that is, the new score to set, to the right value. */
5403 score
= zmalloc(sizeof(double));
5407 /* Read the old score. If the element was not present starts from 0 */
5408 de
= dictFind(zs
->dict
,ele
);
5410 double *oldscore
= dictGetEntryVal(de
);
5411 *score
= *oldscore
+ scoreval
;
5419 /* What follows is a simple remove and re-insert operation that is common
5420 * to both ZADD and ZINCRBY... */
5421 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
5422 /* case 1: New element */
5423 incrRefCount(ele
); /* added to hash */
5424 zslInsert(zs
->zsl
,*score
,ele
);
5425 incrRefCount(ele
); /* added to skiplist */
5428 addReplyDouble(c
,*score
);
5430 addReply(c
,shared
.cone
);
5435 /* case 2: Score update operation */
5436 de
= dictFind(zs
->dict
,ele
);
5437 redisAssert(de
!= NULL
);
5438 oldscore
= dictGetEntryVal(de
);
5439 if (*score
!= *oldscore
) {
5442 /* Remove and insert the element in the skip list with new score */
5443 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
5444 redisAssert(deleted
!= 0);
5445 zslInsert(zs
->zsl
,*score
,ele
);
5447 /* Update the score in the hash table */
5448 dictReplace(zs
->dict
,ele
,score
);
5454 addReplyDouble(c
,*score
);
5456 addReply(c
,shared
.czero
);
5460 static void zaddCommand(redisClient
*c
) {
5463 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5465 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
5468 static void zincrbyCommand(redisClient
*c
) {
5471 if (getDoubleFromObject(c
, c
->argv
[2], &scoreval
) != REDIS_OK
) return;
5473 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
5476 static void zremCommand(redisClient
*c
) {
5483 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5484 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5487 de
= dictFind(zs
->dict
,c
->argv
[2]);
5489 addReply(c
,shared
.czero
);
5492 /* Delete from the skiplist */
5493 oldscore
= dictGetEntryVal(de
);
5494 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
5495 redisAssert(deleted
!= 0);
5497 /* Delete from the hash table */
5498 dictDelete(zs
->dict
,c
->argv
[2]);
5499 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5500 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5502 addReply(c
,shared
.cone
);
5505 static void zremrangebyscoreCommand(redisClient
*c
) {
5512 if ((getDoubleFromObject(c
, c
->argv
[2], &min
) != REDIS_OK
) ||
5513 (getDoubleFromObject(c
, c
->argv
[3], &max
) != REDIS_OK
)) return;
5515 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5516 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5519 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
5520 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5521 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5522 server
.dirty
+= deleted
;
5523 addReplyLong(c
,deleted
);
5526 static void zremrangebyrankCommand(redisClient
*c
) {
5534 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5535 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5537 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5538 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
5540 llen
= zs
->zsl
->length
;
5542 /* convert negative indexes */
5543 if (start
< 0) start
= llen
+start
;
5544 if (end
< 0) end
= llen
+end
;
5545 if (start
< 0) start
= 0;
5546 if (end
< 0) end
= 0;
5548 /* indexes sanity checks */
5549 if (start
> end
|| start
>= llen
) {
5550 addReply(c
,shared
.czero
);
5553 if (end
>= llen
) end
= llen
-1;
5555 /* increment start and end because zsl*Rank functions
5556 * use 1-based rank */
5557 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
5558 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
5559 if (dictSize(zs
->dict
) == 0) deleteKey(c
->db
,c
->argv
[1]);
5560 server
.dirty
+= deleted
;
5561 addReplyLong(c
, deleted
);
5569 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
5570 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
5571 unsigned long size1
, size2
;
5572 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
5573 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
5574 return size1
- size2
;
5577 #define REDIS_AGGR_SUM 1
5578 #define REDIS_AGGR_MIN 2
5579 #define REDIS_AGGR_MAX 3
5581 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
5582 if (aggregate
== REDIS_AGGR_SUM
) {
5583 *target
= *target
+ val
;
5584 } else if (aggregate
== REDIS_AGGR_MIN
) {
5585 *target
= val
< *target
? val
: *target
;
5586 } else if (aggregate
== REDIS_AGGR_MAX
) {
5587 *target
= val
> *target
? val
: *target
;
5590 redisAssert(0 != 0);
5594 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
5596 int aggregate
= REDIS_AGGR_SUM
;
5603 /* expect zsetnum input keys to be given */
5604 zsetnum
= atoi(c
->argv
[2]->ptr
);
5606 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNION/ZINTER\r\n"));
5610 /* test if the expected number of keys would overflow */
5611 if (3+zsetnum
> c
->argc
) {
5612 addReply(c
,shared
.syntaxerr
);
5616 /* read keys to be used for input */
5617 src
= zmalloc(sizeof(zsetopsrc
) * zsetnum
);
5618 for (i
= 0, j
= 3; i
< zsetnum
; i
++, j
++) {
5619 robj
*zsetobj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
5623 if (zsetobj
->type
!= REDIS_ZSET
) {
5625 addReply(c
,shared
.wrongtypeerr
);
5628 src
[i
].dict
= ((zset
*)zsetobj
->ptr
)->dict
;
5631 /* default all weights to 1 */
5632 src
[i
].weight
= 1.0;
5635 /* parse optional extra arguments */
5637 int remaining
= c
->argc
- j
;
5640 if (remaining
>= (zsetnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
5642 for (i
= 0; i
< zsetnum
; i
++, j
++, remaining
--) {
5643 if (getDoubleFromObject(c
, c
->argv
[j
], &src
[i
].weight
) != REDIS_OK
)
5646 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
5648 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
5649 aggregate
= REDIS_AGGR_SUM
;
5650 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
5651 aggregate
= REDIS_AGGR_MIN
;
5652 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
5653 aggregate
= REDIS_AGGR_MAX
;
5656 addReply(c
,shared
.syntaxerr
);
5662 addReply(c
,shared
.syntaxerr
);
5668 /* sort sets from the smallest to largest, this will improve our
5669 * algorithm's performance */
5670 qsort(src
,zsetnum
,sizeof(zsetopsrc
), qsortCompareZsetopsrcByCardinality
);
5672 dstobj
= createZsetObject();
5673 dstzset
= dstobj
->ptr
;
5675 if (op
== REDIS_OP_INTER
) {
5676 /* skip going over all entries if the smallest zset is NULL or empty */
5677 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
5678 /* precondition: as src[0].dict is non-empty and the zsets are ordered
5679 * from small to large, all src[i > 0].dict are non-empty too */
5680 di
= dictGetIterator(src
[0].dict
);
5681 while((de
= dictNext(di
)) != NULL
) {
5682 double *score
= zmalloc(sizeof(double)), value
;
5683 *score
= src
[0].weight
* (*(double*)dictGetEntryVal(de
));
5685 for (j
= 1; j
< zsetnum
; j
++) {
5686 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5688 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5689 zunionInterAggregate(score
, value
, aggregate
);
5695 /* skip entry when not present in every source dict */
5699 robj
*o
= dictGetEntryKey(de
);
5700 dictAdd(dstzset
->dict
,o
,score
);
5701 incrRefCount(o
); /* added to dictionary */
5702 zslInsert(dstzset
->zsl
,*score
,o
);
5703 incrRefCount(o
); /* added to skiplist */
5706 dictReleaseIterator(di
);
5708 } else if (op
== REDIS_OP_UNION
) {
5709 for (i
= 0; i
< zsetnum
; i
++) {
5710 if (!src
[i
].dict
) continue;
5712 di
= dictGetIterator(src
[i
].dict
);
5713 while((de
= dictNext(di
)) != NULL
) {
5714 /* skip key when already processed */
5715 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
5717 double *score
= zmalloc(sizeof(double)), value
;
5718 *score
= src
[i
].weight
* (*(double*)dictGetEntryVal(de
));
5720 /* because the zsets are sorted by size, its only possible
5721 * for sets at larger indices to hold this entry */
5722 for (j
= (i
+1); j
< zsetnum
; j
++) {
5723 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
5725 value
= src
[j
].weight
* (*(double*)dictGetEntryVal(other
));
5726 zunionInterAggregate(score
, value
, aggregate
);
5730 robj
*o
= dictGetEntryKey(de
);
5731 dictAdd(dstzset
->dict
,o
,score
);
5732 incrRefCount(o
); /* added to dictionary */
5733 zslInsert(dstzset
->zsl
,*score
,o
);
5734 incrRefCount(o
); /* added to skiplist */
5736 dictReleaseIterator(di
);
5739 /* unknown operator */
5740 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
5743 deleteKey(c
->db
,dstkey
);
5744 if (dstzset
->zsl
->length
) {
5745 dictAdd(c
->db
->dict
,dstkey
,dstobj
);
5746 incrRefCount(dstkey
);
5747 addReplyLong(c
, dstzset
->zsl
->length
);
5750 decrRefCount(dstobj
);
5751 addReply(c
, shared
.czero
);
5756 static void zunionCommand(redisClient
*c
) {
5757 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
5760 static void zinterCommand(redisClient
*c
) {
5761 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
5764 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
5776 if ((getLongFromObject(c
, c
->argv
[2], &start
) != REDIS_OK
) ||
5777 (getLongFromObject(c
, c
->argv
[3], &end
) != REDIS_OK
)) return;
5779 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
5781 } else if (c
->argc
>= 5) {
5782 addReply(c
,shared
.syntaxerr
);
5786 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5787 || checkType(c
,o
,REDIS_ZSET
)) return;
5792 /* convert negative indexes */
5793 if (start
< 0) start
= llen
+start
;
5794 if (end
< 0) end
= llen
+end
;
5795 if (start
< 0) start
= 0;
5796 if (end
< 0) end
= 0;
5798 /* indexes sanity checks */
5799 if (start
> end
|| start
>= llen
) {
5800 /* Out of range start or start > end result in empty list */
5801 addReply(c
,shared
.emptymultibulk
);
5804 if (end
>= llen
) end
= llen
-1;
5805 rangelen
= (end
-start
)+1;
5807 /* check if starting point is trivial, before searching
5808 * the element in log(N) time */
5810 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
5813 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
5816 /* Return the result in form of a multi-bulk reply */
5817 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
5818 withscores
? (rangelen
*2) : rangelen
));
5819 for (j
= 0; j
< rangelen
; j
++) {
5821 addReplyBulk(c
,ele
);
5823 addReplyDouble(c
,ln
->score
);
5824 ln
= reverse
? ln
->backward
: ln
->forward
[0];
5828 static void zrangeCommand(redisClient
*c
) {
5829 zrangeGenericCommand(c
,0);
5832 static void zrevrangeCommand(redisClient
*c
) {
5833 zrangeGenericCommand(c
,1);
5836 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
5837 * If justcount is non-zero, just the count is returned. */
5838 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
5841 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
5842 int offset
= 0, limit
= -1;
5846 /* Parse the min-max interval. If one of the values is prefixed
5847 * by the "(" character, it's considered "open". For instance
5848 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
5849 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
5850 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
5851 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
5854 min
= strtod(c
->argv
[2]->ptr
,NULL
);
5856 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
5857 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
5860 max
= strtod(c
->argv
[3]->ptr
,NULL
);
5863 /* Parse "WITHSCORES": note that if the command was called with
5864 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
5865 * enter the following paths to parse WITHSCORES and LIMIT. */
5866 if (c
->argc
== 5 || c
->argc
== 8) {
5867 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
5872 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
5876 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
5881 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
5882 addReply(c
,shared
.syntaxerr
);
5884 } else if (c
->argc
== (7 + withscores
)) {
5885 offset
= atoi(c
->argv
[5]->ptr
);
5886 limit
= atoi(c
->argv
[6]->ptr
);
5887 if (offset
< 0) offset
= 0;
5890 /* Ok, lookup the key and get the range */
5891 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
5893 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5895 if (o
->type
!= REDIS_ZSET
) {
5896 addReply(c
,shared
.wrongtypeerr
);
5898 zset
*zsetobj
= o
->ptr
;
5899 zskiplist
*zsl
= zsetobj
->zsl
;
5901 robj
*ele
, *lenobj
= NULL
;
5902 unsigned long rangelen
= 0;
5904 /* Get the first node with the score >= min, or with
5905 * score > min if 'minex' is true. */
5906 ln
= zslFirstWithScore(zsl
,min
);
5907 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
5910 /* No element matching the speciifed interval */
5911 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
5915 /* We don't know in advance how many matching elements there
5916 * are in the list, so we push this object that will represent
5917 * the multi-bulk length in the output buffer, and will "fix"
5920 lenobj
= createObject(REDIS_STRING
,NULL
);
5922 decrRefCount(lenobj
);
5925 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
5928 ln
= ln
->forward
[0];
5931 if (limit
== 0) break;
5934 addReplyBulk(c
,ele
);
5936 addReplyDouble(c
,ln
->score
);
5938 ln
= ln
->forward
[0];
5940 if (limit
> 0) limit
--;
5943 addReplyLong(c
,(long)rangelen
);
5945 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
5946 withscores
? (rangelen
*2) : rangelen
);
5952 static void zrangebyscoreCommand(redisClient
*c
) {
5953 genericZrangebyscoreCommand(c
,0);
5956 static void zcountCommand(redisClient
*c
) {
5957 genericZrangebyscoreCommand(c
,1);
5960 static void zcardCommand(redisClient
*c
) {
5964 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5965 checkType(c
,o
,REDIS_ZSET
)) return;
5968 addReplyUlong(c
,zs
->zsl
->length
);
5971 static void zscoreCommand(redisClient
*c
) {
5976 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5977 checkType(c
,o
,REDIS_ZSET
)) return;
5980 de
= dictFind(zs
->dict
,c
->argv
[2]);
5982 addReply(c
,shared
.nullbulk
);
5984 double *score
= dictGetEntryVal(de
);
5986 addReplyDouble(c
,*score
);
5990 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
5998 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5999 checkType(c
,o
,REDIS_ZSET
)) return;
6003 de
= dictFind(zs
->dict
,c
->argv
[2]);
6005 addReply(c
,shared
.nullbulk
);
6009 score
= dictGetEntryVal(de
);
6010 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6013 addReplyLong(c
, zsl
->length
- rank
);
6015 addReplyLong(c
, rank
-1);
6018 addReply(c
,shared
.nullbulk
);
6022 static void zrankCommand(redisClient
*c
) {
6023 zrankGenericCommand(c
, 0);
6026 static void zrevrankCommand(redisClient
*c
) {
6027 zrankGenericCommand(c
, 1);
6030 /* =================================== Hashes =============================== */
6031 static void hsetCommand(redisClient
*c
) {
6033 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6036 o
= createHashObject();
6037 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6038 incrRefCount(c
->argv
[1]);
6040 if (o
->type
!= REDIS_HASH
) {
6041 addReply(c
,shared
.wrongtypeerr
);
6045 /* We want to convert the zipmap into an hash table right now if the
6046 * entry to be added is too big. Note that we check if the object
6047 * is integer encoded before to try fetching the length in the test below.
6048 * This is because integers are small, but currently stringObjectLen()
6049 * performs a slow conversion: not worth it. */
6050 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
&&
6051 ((c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
&&
6052 sdslen(c
->argv
[2]->ptr
) > server
.hash_max_zipmap_value
) ||
6053 (c
->argv
[3]->encoding
== REDIS_ENCODING_RAW
&&
6054 sdslen(c
->argv
[3]->ptr
) > server
.hash_max_zipmap_value
)))
6056 convertToRealHash(o
);
6059 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6060 unsigned char *zm
= o
->ptr
;
6061 robj
*valobj
= getDecodedObject(c
->argv
[3]);
6063 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6064 valobj
->ptr
,sdslen(valobj
->ptr
),&update
);
6065 decrRefCount(valobj
);
6068 /* And here there is the second check for hash conversion. */
6069 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6070 convertToRealHash(o
);
6072 c
->argv
[2] = tryObjectEncoding(c
->argv
[2]);
6073 /* note that c->argv[3] is already encoded, as the latest arg
6074 * of a bulk command is always integer encoded if possible. */
6075 if (dictReplace(o
->ptr
,c
->argv
[2],c
->argv
[3])) {
6076 incrRefCount(c
->argv
[2]);
6080 incrRefCount(c
->argv
[3]);
6083 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",update
== 0));
6086 static void hmsetCommand(redisClient
*c
) {
6088 robj
*o
, *key
, *val
;
6090 if ((c
->argc
% 2) == 1) {
6091 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6095 if ((o
= lookupKeyWrite(c
->db
,c
->argv
[1])) == NULL
) {
6096 o
= createHashObject();
6097 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6098 incrRefCount(c
->argv
[1]);
6100 if (o
->type
!= REDIS_HASH
) {
6101 addReply(c
,shared
.wrongtypeerr
);
6106 /* We want to convert the zipmap into an hash table right now if the
6107 * entry to be added is too big. */
6108 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6109 for (i
= 2; i
< c
->argc
; i
+=2) {
6110 if ((c
->argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6111 sdslen(c
->argv
[i
]->ptr
) > server
.hash_max_zipmap_value
) ||
6112 (c
->argv
[i
+1]->encoding
== REDIS_ENCODING_RAW
&&
6113 sdslen(c
->argv
[i
+1]->ptr
) > server
.hash_max_zipmap_value
)) {
6114 convertToRealHash(o
);
6120 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6121 unsigned char *zm
= o
->ptr
;
6123 for (i
= 2; i
< c
->argc
; i
+=2) {
6124 key
= getDecodedObject(c
->argv
[i
]);
6125 val
= getDecodedObject(c
->argv
[i
+1]);
6126 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
6127 val
->ptr
,sdslen(val
->ptr
),NULL
);
6133 /* And here there is the second check for hash conversion. */
6134 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6135 convertToRealHash(o
);
6137 for (i
= 2; i
< c
->argc
; i
+=2) {
6138 key
= tryObjectEncoding(c
->argv
[i
]);
6139 val
= tryObjectEncoding(c
->argv
[i
+1]);
6140 if (dictReplace(o
->ptr
,key
,val
)) {
6147 addReply(c
, shared
.ok
);
6150 static void hincrbyCommand(redisClient
*c
) {
6151 long long value
= 0, incr
= 0;
6152 robj
*o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
6155 o
= createHashObject();
6156 dictAdd(c
->db
->dict
,c
->argv
[1],o
);
6157 incrRefCount(c
->argv
[1]);
6159 if (o
->type
!= REDIS_HASH
) {
6160 addReply(c
,shared
.wrongtypeerr
);
6165 if (getLongLongFromObject(c
, c
->argv
[3], &incr
) != REDIS_OK
) return;
6167 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6168 unsigned char *zm
= o
->ptr
;
6169 unsigned char *zval
;
6172 /* Find value if already present in hash */
6173 if (zipmapGet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6175 /* strtoll needs the char* to have a trailing \0, but
6176 * the zipmap doesn't include them. */
6177 sds szval
= sdsnewlen(zval
, zvlen
);
6178 value
= strtoll(szval
,NULL
,10);
6183 sds svalue
= sdscatprintf(sdsempty(),"%lld",value
);
6184 zm
= zipmapSet(zm
,c
->argv
[2]->ptr
,sdslen(c
->argv
[2]->ptr
),
6185 (unsigned char*)svalue
,sdslen(svalue
),NULL
);
6189 /* Check if the zipmap needs to be converted. */
6190 if (zipmapLen(zm
) > server
.hash_max_zipmap_entries
)
6191 convertToRealHash(o
);
6196 /* Find value if already present in hash */
6197 de
= dictFind(o
->ptr
,c
->argv
[2]);
6199 hval
= dictGetEntryVal(de
);
6200 if (hval
->encoding
== REDIS_ENCODING_RAW
)
6201 value
= strtoll(hval
->ptr
,NULL
,10);
6202 else if (hval
->encoding
== REDIS_ENCODING_INT
)
6203 value
= (long)hval
->ptr
;
6205 redisAssert(1 != 1);
6209 hval
= createObject(REDIS_STRING
,sdscatprintf(sdsempty(),"%lld",value
));
6210 hval
= tryObjectEncoding(hval
);
6211 if (dictReplace(o
->ptr
,c
->argv
[2],hval
)) {
6212 incrRefCount(c
->argv
[2]);
6217 addReplyLongLong(c
, value
);
6220 static void hgetCommand(redisClient
*c
) {
6223 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6224 checkType(c
,o
,REDIS_HASH
)) return;
6226 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6227 unsigned char *zm
= o
->ptr
;
6232 field
= getDecodedObject(c
->argv
[2]);
6233 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
), &val
,&vlen
)) {
6234 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6235 addReplySds(c
,sdsnewlen(val
,vlen
));
6236 addReply(c
,shared
.crlf
);
6237 decrRefCount(field
);
6240 addReply(c
,shared
.nullbulk
);
6241 decrRefCount(field
);
6245 struct dictEntry
*de
;
6247 de
= dictFind(o
->ptr
,c
->argv
[2]);
6249 addReply(c
,shared
.nullbulk
);
6251 robj
*e
= dictGetEntryVal(de
);
6258 static void hmgetCommand(redisClient
*c
) {
6261 robj
*o
= lookupKeyRead(c
->db
, c
->argv
[1]);
6263 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6264 for (i
= 2; i
< c
->argc
; i
++) {
6265 addReply(c
,shared
.nullbulk
);
6269 if (o
->type
!= REDIS_HASH
) {
6270 addReply(c
,shared
.wrongtypeerr
);
6275 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
6276 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6277 unsigned char *zm
= o
->ptr
;
6282 for (i
= 2; i
< c
->argc
; i
++) {
6283 field
= getDecodedObject(c
->argv
[i
]);
6284 if (zipmapGet(zm
,field
->ptr
,sdslen(field
->ptr
),&v
,&vlen
)) {
6285 addReplySds(c
,sdscatprintf(sdsempty(),"$%u\r\n", vlen
));
6286 addReplySds(c
,sdsnewlen(v
,vlen
));
6287 addReply(c
,shared
.crlf
);
6289 addReply(c
,shared
.nullbulk
);
6291 decrRefCount(field
);
6296 for (i
= 2; i
< c
->argc
; i
++) {
6297 de
= dictFind(o
->ptr
,c
->argv
[i
]);
6299 addReplyBulk(c
,(robj
*)dictGetEntryVal(de
));
6301 addReply(c
,shared
.nullbulk
);
6307 static void hdelCommand(redisClient
*c
) {
6311 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6312 checkType(c
,o
,REDIS_HASH
)) return;
6314 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6315 robj
*field
= getDecodedObject(c
->argv
[2]);
6317 o
->ptr
= zipmapDel((unsigned char*) o
->ptr
,
6318 (unsigned char*) field
->ptr
,
6319 sdslen(field
->ptr
), &deleted
);
6320 decrRefCount(field
);
6321 if (zipmapLen((unsigned char*) o
->ptr
) == 0)
6322 deleteKey(c
->db
,c
->argv
[1]);
6324 deleted
= dictDelete((dict
*)o
->ptr
,c
->argv
[2]) == DICT_OK
;
6325 if (htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6326 if (dictSize((dict
*)o
->ptr
) == 0) deleteKey(c
->db
,c
->argv
[1]);
6328 if (deleted
) server
.dirty
++;
6329 addReply(c
,deleted
? shared
.cone
: shared
.czero
);
6332 static void hlenCommand(redisClient
*c
) {
6336 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6337 checkType(c
,o
,REDIS_HASH
)) return;
6339 len
= (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6340 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6341 addReplyUlong(c
,len
);
6344 #define REDIS_GETALL_KEYS 1
6345 #define REDIS_GETALL_VALS 2
6346 static void genericHgetallCommand(redisClient
*c
, int flags
) {
6348 unsigned long count
= 0;
6350 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6351 || checkType(c
,o
,REDIS_HASH
)) return;
6353 lenobj
= createObject(REDIS_STRING
,NULL
);
6355 decrRefCount(lenobj
);
6357 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6358 unsigned char *p
= zipmapRewind(o
->ptr
);
6359 unsigned char *field
, *val
;
6360 unsigned int flen
, vlen
;
6362 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
6365 if (flags
& REDIS_GETALL_KEYS
) {
6366 aux
= createStringObject((char*)field
,flen
);
6367 addReplyBulk(c
,aux
);
6371 if (flags
& REDIS_GETALL_VALS
) {
6372 aux
= createStringObject((char*)val
,vlen
);
6373 addReplyBulk(c
,aux
);
6379 dictIterator
*di
= dictGetIterator(o
->ptr
);
6382 while((de
= dictNext(di
)) != NULL
) {
6383 robj
*fieldobj
= dictGetEntryKey(de
);
6384 robj
*valobj
= dictGetEntryVal(de
);
6386 if (flags
& REDIS_GETALL_KEYS
) {
6387 addReplyBulk(c
,fieldobj
);
6390 if (flags
& REDIS_GETALL_VALS
) {
6391 addReplyBulk(c
,valobj
);
6395 dictReleaseIterator(di
);
6397 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
6400 static void hkeysCommand(redisClient
*c
) {
6401 genericHgetallCommand(c
,REDIS_GETALL_KEYS
);
6404 static void hvalsCommand(redisClient
*c
) {
6405 genericHgetallCommand(c
,REDIS_GETALL_VALS
);
6408 static void hgetallCommand(redisClient
*c
) {
6409 genericHgetallCommand(c
,REDIS_GETALL_KEYS
|REDIS_GETALL_VALS
);
6412 static void hexistsCommand(redisClient
*c
) {
6416 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6417 checkType(c
,o
,REDIS_HASH
)) return;
6419 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6421 unsigned char *zm
= o
->ptr
;
6423 field
= getDecodedObject(c
->argv
[2]);
6424 exists
= zipmapExists(zm
,field
->ptr
,sdslen(field
->ptr
));
6425 decrRefCount(field
);
6427 exists
= dictFind(o
->ptr
,c
->argv
[2]) != NULL
;
6429 addReply(c
,exists
? shared
.cone
: shared
.czero
);
6432 static void convertToRealHash(robj
*o
) {
6433 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
6434 unsigned int klen
, vlen
;
6435 dict
*dict
= dictCreate(&hashDictType
,NULL
);
6437 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
6438 p
= zipmapRewind(zm
);
6439 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
6440 robj
*keyobj
, *valobj
;
6442 keyobj
= createStringObject((char*)key
,klen
);
6443 valobj
= createStringObject((char*)val
,vlen
);
6444 keyobj
= tryObjectEncoding(keyobj
);
6445 valobj
= tryObjectEncoding(valobj
);
6446 dictAdd(dict
,keyobj
,valobj
);
6448 o
->encoding
= REDIS_ENCODING_HT
;
6453 /* ========================= Non type-specific commands ==================== */
6455 static void flushdbCommand(redisClient
*c
) {
6456 server
.dirty
+= dictSize(c
->db
->dict
);
6457 dictEmpty(c
->db
->dict
);
6458 dictEmpty(c
->db
->expires
);
6459 addReply(c
,shared
.ok
);
6462 static void flushallCommand(redisClient
*c
) {
6463 server
.dirty
+= emptyDb();
6464 addReply(c
,shared
.ok
);
6465 if (server
.bgsavechildpid
!= -1) {
6466 kill(server
.bgsavechildpid
,SIGKILL
);
6467 rdbRemoveTempFile(server
.bgsavechildpid
);
6469 rdbSave(server
.dbfilename
);
6473 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
6474 redisSortOperation
*so
= zmalloc(sizeof(*so
));
6476 so
->pattern
= pattern
;
6480 /* Return the value associated to the key with a name obtained
6481 * substituting the first occurence of '*' in 'pattern' with 'subst' */
6482 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
6486 int prefixlen
, sublen
, postfixlen
;
6487 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6491 char buf
[REDIS_SORTKEY_MAX
+1];
6494 /* If the pattern is "#" return the substitution object itself in order
6495 * to implement the "SORT ... GET #" feature. */
6496 spat
= pattern
->ptr
;
6497 if (spat
[0] == '#' && spat
[1] == '\0') {
6501 /* The substitution object may be specially encoded. If so we create
6502 * a decoded object on the fly. Otherwise getDecodedObject will just
6503 * increment the ref count, that we'll decrement later. */
6504 subst
= getDecodedObject(subst
);
6507 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
6508 p
= strchr(spat
,'*');
6510 decrRefCount(subst
);
6515 sublen
= sdslen(ssub
);
6516 postfixlen
= sdslen(spat
)-(prefixlen
+1);
6517 memcpy(keyname
.buf
,spat
,prefixlen
);
6518 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
6519 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
6520 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
6521 keyname
.len
= prefixlen
+sublen
+postfixlen
;
6523 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2))
6524 decrRefCount(subst
);
6526 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
6527 return lookupKeyRead(db
,&keyobj
);
6530 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6531 * the additional parameter is not standard but a BSD-specific we have to
6532 * pass sorting parameters via the global 'server' structure */
6533 static int sortCompare(const void *s1
, const void *s2
) {
6534 const redisSortObject
*so1
= s1
, *so2
= s2
;
6537 if (!server
.sort_alpha
) {
6538 /* Numeric sorting. Here it's trivial as we precomputed scores */
6539 if (so1
->u
.score
> so2
->u
.score
) {
6541 } else if (so1
->u
.score
< so2
->u
.score
) {
6547 /* Alphanumeric sorting */
6548 if (server
.sort_bypattern
) {
6549 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
6550 /* At least one compare object is NULL */
6551 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
6553 else if (so1
->u
.cmpobj
== NULL
)
6558 /* We have both the objects, use strcoll */
6559 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
6562 /* Compare elements directly */
6565 dec1
= getDecodedObject(so1
->obj
);
6566 dec2
= getDecodedObject(so2
->obj
);
6567 cmp
= strcoll(dec1
->ptr
,dec2
->ptr
);
6572 return server
.sort_desc
? -cmp
: cmp
;
6575 /* The SORT command is the most complex command in Redis. Warning: this code
6576 * is optimized for speed and a bit less for readability */
6577 static void sortCommand(redisClient
*c
) {
6580 int desc
= 0, alpha
= 0;
6581 int limit_start
= 0, limit_count
= -1, start
, end
;
6582 int j
, dontsort
= 0, vectorlen
;
6583 int getop
= 0; /* GET operation counter */
6584 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
6585 redisSortObject
*vector
; /* Resulting vector to sort */
6587 /* Lookup the key to sort. It must be of the right types */
6588 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
6589 if (sortval
== NULL
) {
6590 addReply(c
,shared
.emptymultibulk
);
6593 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
6594 sortval
->type
!= REDIS_ZSET
)
6596 addReply(c
,shared
.wrongtypeerr
);
6600 /* Create a list of operations to perform for every sorted element.
6601 * Operations can be GET/DEL/INCR/DECR */
6602 operations
= listCreate();
6603 listSetFreeMethod(operations
,zfree
);
6606 /* Now we need to protect sortval incrementing its count, in the future
6607 * SORT may have options able to overwrite/delete keys during the sorting
6608 * and the sorted key itself may get destroied */
6609 incrRefCount(sortval
);
6611 /* The SORT command has an SQL-alike syntax, parse it */
6612 while(j
< c
->argc
) {
6613 int leftargs
= c
->argc
-j
-1;
6614 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
6616 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
6618 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
6620 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
6621 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
6622 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
6624 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
6625 storekey
= c
->argv
[j
+1];
6627 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
6628 sortby
= c
->argv
[j
+1];
6629 /* If the BY pattern does not contain '*', i.e. it is constant,
6630 * we don't need to sort nor to lookup the weight keys. */
6631 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
6633 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
6634 listAddNodeTail(operations
,createSortOperation(
6635 REDIS_SORT_GET
,c
->argv
[j
+1]));
6639 decrRefCount(sortval
);
6640 listRelease(operations
);
6641 addReply(c
,shared
.syntaxerr
);
6647 /* Load the sorting vector with all the objects to sort */
6648 switch(sortval
->type
) {
6649 case REDIS_LIST
: vectorlen
= listLength((list
*)sortval
->ptr
); break;
6650 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
6651 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
6652 default: vectorlen
= 0; redisAssert(0); /* Avoid GCC warning */
6654 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
6657 if (sortval
->type
== REDIS_LIST
) {
6658 list
*list
= sortval
->ptr
;
6662 listRewind(list
,&li
);
6663 while((ln
= listNext(&li
))) {
6664 robj
*ele
= ln
->value
;
6665 vector
[j
].obj
= ele
;
6666 vector
[j
].u
.score
= 0;
6667 vector
[j
].u
.cmpobj
= NULL
;
6675 if (sortval
->type
== REDIS_SET
) {
6678 zset
*zs
= sortval
->ptr
;
6682 di
= dictGetIterator(set
);
6683 while((setele
= dictNext(di
)) != NULL
) {
6684 vector
[j
].obj
= dictGetEntryKey(setele
);
6685 vector
[j
].u
.score
= 0;
6686 vector
[j
].u
.cmpobj
= NULL
;
6689 dictReleaseIterator(di
);
6691 redisAssert(j
== vectorlen
);
6693 /* Now it's time to load the right scores in the sorting vector */
6694 if (dontsort
== 0) {
6695 for (j
= 0; j
< vectorlen
; j
++) {
6699 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
6700 if (!byval
|| byval
->type
!= REDIS_STRING
) continue;
6702 vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
6704 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
6705 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
6707 /* Don't need to decode the object if it's
6708 * integer-encoded (the only encoding supported) so
6709 * far. We can just cast it */
6710 if (byval
->encoding
== REDIS_ENCODING_INT
) {
6711 vector
[j
].u
.score
= (long)byval
->ptr
;
6713 redisAssert(1 != 1);
6718 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_RAW
)
6719 vector
[j
].u
.score
= strtod(vector
[j
].obj
->ptr
,NULL
);
6721 if (vector
[j
].obj
->encoding
== REDIS_ENCODING_INT
)
6722 vector
[j
].u
.score
= (long) vector
[j
].obj
->ptr
;
6724 redisAssert(1 != 1);
6731 /* We are ready to sort the vector... perform a bit of sanity check
6732 * on the LIMIT option too. We'll use a partial version of quicksort. */
6733 start
= (limit_start
< 0) ? 0 : limit_start
;
6734 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
6735 if (start
>= vectorlen
) {
6736 start
= vectorlen
-1;
6739 if (end
>= vectorlen
) end
= vectorlen
-1;
6741 if (dontsort
== 0) {
6742 server
.sort_desc
= desc
;
6743 server
.sort_alpha
= alpha
;
6744 server
.sort_bypattern
= sortby
? 1 : 0;
6745 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
6746 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
6748 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
6751 /* Send command output to the output buffer, performing the specified
6752 * GET/DEL/INCR/DECR operations if any. */
6753 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
6754 if (storekey
== NULL
) {
6755 /* STORE option not specified, sent the sorting result to client */
6756 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
6757 for (j
= start
; j
<= end
; j
++) {
6761 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
6762 listRewind(operations
,&li
);
6763 while((ln
= listNext(&li
))) {
6764 redisSortOperation
*sop
= ln
->value
;
6765 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6768 if (sop
->type
== REDIS_SORT_GET
) {
6769 if (!val
|| val
->type
!= REDIS_STRING
) {
6770 addReply(c
,shared
.nullbulk
);
6772 addReplyBulk(c
,val
);
6775 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6780 robj
*listObject
= createListObject();
6781 list
*listPtr
= (list
*) listObject
->ptr
;
6783 /* STORE option specified, set the sorting result as a List object */
6784 for (j
= start
; j
<= end
; j
++) {
6789 listAddNodeTail(listPtr
,vector
[j
].obj
);
6790 incrRefCount(vector
[j
].obj
);
6792 listRewind(operations
,&li
);
6793 while((ln
= listNext(&li
))) {
6794 redisSortOperation
*sop
= ln
->value
;
6795 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
6798 if (sop
->type
== REDIS_SORT_GET
) {
6799 if (!val
|| val
->type
!= REDIS_STRING
) {
6800 listAddNodeTail(listPtr
,createStringObject("",0));
6802 listAddNodeTail(listPtr
,val
);
6806 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
6810 if (dictReplace(c
->db
->dict
,storekey
,listObject
)) {
6811 incrRefCount(storekey
);
6813 /* Note: we add 1 because the DB is dirty anyway since even if the
6814 * SORT result is empty a new key is set and maybe the old content
6816 server
.dirty
+= 1+outputlen
;
6817 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
6821 decrRefCount(sortval
);
6822 listRelease(operations
);
6823 for (j
= 0; j
< vectorlen
; j
++) {
6824 if (sortby
&& alpha
&& vector
[j
].u
.cmpobj
)
6825 decrRefCount(vector
[j
].u
.cmpobj
);
6830 /* Convert an amount of bytes into a human readable string in the form
6831 * of 100B, 2G, 100M, 4K, and so forth. */
6832 static void bytesToHuman(char *s
, unsigned long long n
) {
6837 sprintf(s
,"%lluB",n
);
6839 } else if (n
< (1024*1024)) {
6840 d
= (double)n
/(1024);
6841 sprintf(s
,"%.2fK",d
);
6842 } else if (n
< (1024LL*1024*1024)) {
6843 d
= (double)n
/(1024*1024);
6844 sprintf(s
,"%.2fM",d
);
6845 } else if (n
< (1024LL*1024*1024*1024)) {
6846 d
= (double)n
/(1024LL*1024*1024);
6847 sprintf(s
,"%.2fG",d
);
6851 /* Create the string returned by the INFO command. This is decoupled
6852 * by the INFO command itself as we need to report the same information
6853 * on memory corruption problems. */
6854 static sds
genRedisInfoString(void) {
6856 time_t uptime
= time(NULL
)-server
.stat_starttime
;
6860 bytesToHuman(hmem
,zmalloc_used_memory());
6861 info
= sdscatprintf(sdsempty(),
6862 "redis_version:%s\r\n"
6864 "multiplexing_api:%s\r\n"
6865 "process_id:%ld\r\n"
6866 "uptime_in_seconds:%ld\r\n"
6867 "uptime_in_days:%ld\r\n"
6868 "connected_clients:%d\r\n"
6869 "connected_slaves:%d\r\n"
6870 "blocked_clients:%d\r\n"
6871 "used_memory:%zu\r\n"
6872 "used_memory_human:%s\r\n"
6873 "changes_since_last_save:%lld\r\n"
6874 "bgsave_in_progress:%d\r\n"
6875 "last_save_time:%ld\r\n"
6876 "bgrewriteaof_in_progress:%d\r\n"
6877 "total_connections_received:%lld\r\n"
6878 "total_commands_processed:%lld\r\n"
6879 "expired_keys:%lld\r\n"
6880 "hash_max_zipmap_entries:%ld\r\n"
6881 "hash_max_zipmap_value:%ld\r\n"
6882 "pubsub_channels:%ld\r\n"
6883 "pubsub_patterns:%u\r\n"
6887 (sizeof(long) == 8) ? "64" : "32",
6892 listLength(server
.clients
)-listLength(server
.slaves
),
6893 listLength(server
.slaves
),
6894 server
.blpop_blocked_clients
,
6895 zmalloc_used_memory(),
6898 server
.bgsavechildpid
!= -1,
6900 server
.bgrewritechildpid
!= -1,
6901 server
.stat_numconnections
,
6902 server
.stat_numcommands
,
6903 server
.stat_expiredkeys
,
6904 server
.hash_max_zipmap_entries
,
6905 server
.hash_max_zipmap_value
,
6906 dictSize(server
.pubsub_channels
),
6907 listLength(server
.pubsub_patterns
),
6908 server
.vm_enabled
!= 0,
6909 server
.masterhost
== NULL
? "master" : "slave"
6911 if (server
.masterhost
) {
6912 info
= sdscatprintf(info
,
6913 "master_host:%s\r\n"
6914 "master_port:%d\r\n"
6915 "master_link_status:%s\r\n"
6916 "master_last_io_seconds_ago:%d\r\n"
6919 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
6921 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
6924 if (server
.vm_enabled
) {
6926 info
= sdscatprintf(info
,
6927 "vm_conf_max_memory:%llu\r\n"
6928 "vm_conf_page_size:%llu\r\n"
6929 "vm_conf_pages:%llu\r\n"
6930 "vm_stats_used_pages:%llu\r\n"
6931 "vm_stats_swapped_objects:%llu\r\n"
6932 "vm_stats_swappin_count:%llu\r\n"
6933 "vm_stats_swappout_count:%llu\r\n"
6934 "vm_stats_io_newjobs_len:%lu\r\n"
6935 "vm_stats_io_processing_len:%lu\r\n"
6936 "vm_stats_io_processed_len:%lu\r\n"
6937 "vm_stats_io_active_threads:%lu\r\n"
6938 "vm_stats_blocked_clients:%lu\r\n"
6939 ,(unsigned long long) server
.vm_max_memory
,
6940 (unsigned long long) server
.vm_page_size
,
6941 (unsigned long long) server
.vm_pages
,
6942 (unsigned long long) server
.vm_stats_used_pages
,
6943 (unsigned long long) server
.vm_stats_swapped_objects
,
6944 (unsigned long long) server
.vm_stats_swapins
,
6945 (unsigned long long) server
.vm_stats_swapouts
,
6946 (unsigned long) listLength(server
.io_newjobs
),
6947 (unsigned long) listLength(server
.io_processing
),
6948 (unsigned long) listLength(server
.io_processed
),
6949 (unsigned long) server
.io_active_threads
,
6950 (unsigned long) server
.vm_blocked_clients
6954 for (j
= 0; j
< server
.dbnum
; j
++) {
6955 long long keys
, vkeys
;
6957 keys
= dictSize(server
.db
[j
].dict
);
6958 vkeys
= dictSize(server
.db
[j
].expires
);
6959 if (keys
|| vkeys
) {
6960 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
6967 static void infoCommand(redisClient
*c
) {
6968 sds info
= genRedisInfoString();
6969 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
6970 (unsigned long)sdslen(info
)));
6971 addReplySds(c
,info
);
6972 addReply(c
,shared
.crlf
);
6975 static void monitorCommand(redisClient
*c
) {
6976 /* ignore MONITOR if aleady slave or in monitor mode */
6977 if (c
->flags
& REDIS_SLAVE
) return;
6979 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
6981 listAddNodeTail(server
.monitors
,c
);
6982 addReply(c
,shared
.ok
);
6985 /* ================================= Expire ================================= */
6986 static int removeExpire(redisDb
*db
, robj
*key
) {
6987 if (dictDelete(db
->expires
,key
) == DICT_OK
) {
6994 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
6995 if (dictAdd(db
->expires
,key
,(void*)when
) == DICT_ERR
) {
7003 /* Return the expire time of the specified key, or -1 if no expire
7004 * is associated with this key (i.e. the key is non volatile) */
7005 static time_t getExpire(redisDb
*db
, robj
*key
) {
7008 /* No expire? return ASAP */
7009 if (dictSize(db
->expires
) == 0 ||
7010 (de
= dictFind(db
->expires
,key
)) == NULL
) return -1;
7012 return (time_t) dictGetEntryVal(de
);
7015 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7019 /* No expire? return ASAP */
7020 if (dictSize(db
->expires
) == 0 ||
7021 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7023 /* Lookup the expire */
7024 when
= (time_t) dictGetEntryVal(de
);
7025 if (time(NULL
) <= when
) return 0;
7027 /* Delete the key */
7028 dictDelete(db
->expires
,key
);
7029 server
.stat_expiredkeys
++;
7030 return dictDelete(db
->dict
,key
) == DICT_OK
;
7033 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7036 /* No expire? return ASAP */
7037 if (dictSize(db
->expires
) == 0 ||
7038 (de
= dictFind(db
->expires
,key
)) == NULL
) return 0;
7040 /* Delete the key */
7042 server
.stat_expiredkeys
++;
7043 dictDelete(db
->expires
,key
);
7044 return dictDelete(db
->dict
,key
) == DICT_OK
;
7047 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7051 if (getLongFromObject(c
, param
, &seconds
) != REDIS_OK
) return;
7055 de
= dictFind(c
->db
->dict
,key
);
7057 addReply(c
,shared
.czero
);
7061 if (deleteKey(c
->db
,key
)) server
.dirty
++;
7062 addReply(c
, shared
.cone
);
7065 time_t when
= time(NULL
)+seconds
;
7066 if (setExpire(c
->db
,key
,when
)) {
7067 addReply(c
,shared
.cone
);
7070 addReply(c
,shared
.czero
);
7076 static void expireCommand(redisClient
*c
) {
7077 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7080 static void expireatCommand(redisClient
*c
) {
7081 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7084 static void ttlCommand(redisClient
*c
) {
7088 expire
= getExpire(c
->db
,c
->argv
[1]);
7090 ttl
= (int) (expire
-time(NULL
));
7091 if (ttl
< 0) ttl
= -1;
7093 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7096 /* ================================ MULTI/EXEC ============================== */
7098 /* Client state initialization for MULTI/EXEC */
7099 static void initClientMultiState(redisClient
*c
) {
7100 c
->mstate
.commands
= NULL
;
7101 c
->mstate
.count
= 0;
7104 /* Release all the resources associated with MULTI/EXEC state */
7105 static void freeClientMultiState(redisClient
*c
) {
7108 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7110 multiCmd
*mc
= c
->mstate
.commands
+j
;
7112 for (i
= 0; i
< mc
->argc
; i
++)
7113 decrRefCount(mc
->argv
[i
]);
7116 zfree(c
->mstate
.commands
);
7119 /* Add a new command into the MULTI commands queue */
7120 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7124 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7125 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7126 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7129 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7130 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7131 for (j
= 0; j
< c
->argc
; j
++)
7132 incrRefCount(mc
->argv
[j
]);
7136 static void multiCommand(redisClient
*c
) {
7137 c
->flags
|= REDIS_MULTI
;
7138 addReply(c
,shared
.ok
);
7141 static void discardCommand(redisClient
*c
) {
7142 if (!(c
->flags
& REDIS_MULTI
)) {
7143 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7147 freeClientMultiState(c
);
7148 initClientMultiState(c
);
7149 c
->flags
&= (~REDIS_MULTI
);
7150 addReply(c
,shared
.ok
);
7153 static void execCommand(redisClient
*c
) {
7158 if (!(c
->flags
& REDIS_MULTI
)) {
7159 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7163 orig_argv
= c
->argv
;
7164 orig_argc
= c
->argc
;
7165 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7166 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7167 c
->argc
= c
->mstate
.commands
[j
].argc
;
7168 c
->argv
= c
->mstate
.commands
[j
].argv
;
7169 call(c
,c
->mstate
.commands
[j
].cmd
);
7171 c
->argv
= orig_argv
;
7172 c
->argc
= orig_argc
;
7173 freeClientMultiState(c
);
7174 initClientMultiState(c
);
7175 c
->flags
&= (~REDIS_MULTI
);
7178 /* =========================== Blocking Operations ========================= */
7180 /* Currently Redis blocking operations support is limited to list POP ops,
7181 * so the current implementation is not fully generic, but it is also not
7182 * completely specific so it will not require a rewrite to support new
7183 * kind of blocking operations in the future.
7185 * Still it's important to note that list blocking operations can be already
7186 * used as a notification mechanism in order to implement other blocking
7187 * operations at application level, so there must be a very strong evidence
7188 * of usefulness and generality before new blocking operations are implemented.
7190 * This is how the current blocking POP works, we use BLPOP as example:
7191 * - If the user calls BLPOP and the key exists and contains a non empty list
7192 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7193 * if there is not to block.
7194 * - If instead BLPOP is called and the key does not exists or the list is
7195 * empty we need to block. In order to do so we remove the notification for
7196 * new data to read in the client socket (so that we'll not serve new
7197 * requests if the blocking request is not served). Also we put the client
7198 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
7199 * blocking for this keys.
7200 * - If a PUSH operation against a key with blocked clients waiting is
7201 * performed, we serve the first in the list: basically instead to push
7202 * the new element inside the list we return it to the (first / oldest)
7203 * blocking client, unblock the client, and remove it form the list.
7205 * The above comment and the source code should be enough in order to understand
7206 * the implementation and modify / fix it later.
7209 /* Set a client in blocking mode for the specified key, with the specified
7211 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7216 c
->blockingkeys
= zmalloc(sizeof(robj
*)*numkeys
);
7217 c
->blockingkeysnum
= numkeys
;
7218 c
->blockingto
= timeout
;
7219 for (j
= 0; j
< numkeys
; j
++) {
7220 /* Add the key in the client structure, to map clients -> keys */
7221 c
->blockingkeys
[j
] = keys
[j
];
7222 incrRefCount(keys
[j
]);
7224 /* And in the other "side", to map keys -> clients */
7225 de
= dictFind(c
->db
->blockingkeys
,keys
[j
]);
7229 /* For every key we take a list of clients blocked for it */
7231 retval
= dictAdd(c
->db
->blockingkeys
,keys
[j
],l
);
7232 incrRefCount(keys
[j
]);
7233 assert(retval
== DICT_OK
);
7235 l
= dictGetEntryVal(de
);
7237 listAddNodeTail(l
,c
);
7239 /* Mark the client as a blocked client */
7240 c
->flags
|= REDIS_BLOCKED
;
7241 server
.blpop_blocked_clients
++;
7244 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7245 static void unblockClientWaitingData(redisClient
*c
) {
7250 assert(c
->blockingkeys
!= NULL
);
7251 /* The client may wait for multiple keys, so unblock it for every key. */
7252 for (j
= 0; j
< c
->blockingkeysnum
; j
++) {
7253 /* Remove this client from the list of clients waiting for this key. */
7254 de
= dictFind(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7256 l
= dictGetEntryVal(de
);
7257 listDelNode(l
,listSearchKey(l
,c
));
7258 /* If the list is empty we need to remove it to avoid wasting memory */
7259 if (listLength(l
) == 0)
7260 dictDelete(c
->db
->blockingkeys
,c
->blockingkeys
[j
]);
7261 decrRefCount(c
->blockingkeys
[j
]);
7263 /* Cleanup the client structure */
7264 zfree(c
->blockingkeys
);
7265 c
->blockingkeys
= NULL
;
7266 c
->flags
&= (~REDIS_BLOCKED
);
7267 server
.blpop_blocked_clients
--;
7268 /* We want to process data if there is some command waiting
7269 * in the input buffer. Note that this is safe even if
7270 * unblockClientWaitingData() gets called from freeClient() because
7271 * freeClient() will be smart enough to call this function
7272 * *after* c->querybuf was set to NULL. */
7273 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
7276 /* This should be called from any function PUSHing into lists.
7277 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7278 * 'ele' is the element pushed.
7280 * If the function returns 0 there was no client waiting for a list push
7283 * If the function returns 1 there was a client waiting for a list push
7284 * against this key, the element was passed to this client thus it's not
7285 * needed to actually add it to the list and the caller should return asap. */
7286 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
7287 struct dictEntry
*de
;
7288 redisClient
*receiver
;
7292 de
= dictFind(c
->db
->blockingkeys
,key
);
7293 if (de
== NULL
) return 0;
7294 l
= dictGetEntryVal(de
);
7297 receiver
= ln
->value
;
7299 addReplySds(receiver
,sdsnew("*2\r\n"));
7300 addReplyBulk(receiver
,key
);
7301 addReplyBulk(receiver
,ele
);
7302 unblockClientWaitingData(receiver
);
7306 /* Blocking RPOP/LPOP */
7307 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
7312 for (j
= 1; j
< c
->argc
-1; j
++) {
7313 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
7315 if (o
->type
!= REDIS_LIST
) {
7316 addReply(c
,shared
.wrongtypeerr
);
7319 list
*list
= o
->ptr
;
7320 if (listLength(list
) != 0) {
7321 /* If the list contains elements fall back to the usual
7322 * non-blocking POP operation */
7323 robj
*argv
[2], **orig_argv
;
7326 /* We need to alter the command arguments before to call
7327 * popGenericCommand() as the command takes a single key. */
7328 orig_argv
= c
->argv
;
7329 orig_argc
= c
->argc
;
7330 argv
[1] = c
->argv
[j
];
7334 /* Also the return value is different, we need to output
7335 * the multi bulk reply header and the key name. The
7336 * "real" command will add the last element (the value)
7337 * for us. If this souds like an hack to you it's just
7338 * because it is... */
7339 addReplySds(c
,sdsnew("*2\r\n"));
7340 addReplyBulk(c
,argv
[1]);
7341 popGenericCommand(c
,where
);
7343 /* Fix the client structure with the original stuff */
7344 c
->argv
= orig_argv
;
7345 c
->argc
= orig_argc
;
7351 /* If the list is empty or the key does not exists we must block */
7352 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
7353 if (timeout
> 0) timeout
+= time(NULL
);
7354 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
7357 static void blpopCommand(redisClient
*c
) {
7358 blockingPopGenericCommand(c
,REDIS_HEAD
);
7361 static void brpopCommand(redisClient
*c
) {
7362 blockingPopGenericCommand(c
,REDIS_TAIL
);
7365 /* =============================== Replication ============================= */
7367 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7368 ssize_t nwritten
, ret
= size
;
7369 time_t start
= time(NULL
);
7373 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
7374 nwritten
= write(fd
,ptr
,size
);
7375 if (nwritten
== -1) return -1;
7379 if ((time(NULL
)-start
) > timeout
) {
7387 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7388 ssize_t nread
, totread
= 0;
7389 time_t start
= time(NULL
);
7393 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
7394 nread
= read(fd
,ptr
,size
);
7395 if (nread
== -1) return -1;
7400 if ((time(NULL
)-start
) > timeout
) {
7408 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
7415 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
7418 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
7429 static void syncCommand(redisClient
*c
) {
7430 /* ignore SYNC if aleady slave or in monitor mode */
7431 if (c
->flags
& REDIS_SLAVE
) return;
7433 /* SYNC can't be issued when the server has pending data to send to
7434 * the client about already issued commands. We need a fresh reply
7435 * buffer registering the differences between the BGSAVE and the current
7436 * dataset, so that we can copy to other slaves if needed. */
7437 if (listLength(c
->reply
) != 0) {
7438 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7442 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
7443 /* Here we need to check if there is a background saving operation
7444 * in progress, or if it is required to start one */
7445 if (server
.bgsavechildpid
!= -1) {
7446 /* Ok a background save is in progress. Let's check if it is a good
7447 * one for replication, i.e. if there is another slave that is
7448 * registering differences since the server forked to save */
7453 listRewind(server
.slaves
,&li
);
7454 while((ln
= listNext(&li
))) {
7456 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
7459 /* Perfect, the server is already registering differences for
7460 * another slave. Set the right state, and copy the buffer. */
7461 listRelease(c
->reply
);
7462 c
->reply
= listDup(slave
->reply
);
7463 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7464 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
7466 /* No way, we need to wait for the next BGSAVE in order to
7467 * register differences */
7468 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7469 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
7472 /* Ok we don't have a BGSAVE in progress, let's start one */
7473 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
7474 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7475 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
7476 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
7479 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7482 c
->flags
|= REDIS_SLAVE
;
7484 listAddNodeTail(server
.slaves
,c
);
7488 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
7489 redisClient
*slave
= privdata
;
7491 REDIS_NOTUSED(mask
);
7492 char buf
[REDIS_IOBUF_LEN
];
7493 ssize_t nwritten
, buflen
;
7495 if (slave
->repldboff
== 0) {
7496 /* Write the bulk write count before to transfer the DB. In theory here
7497 * we don't know how much room there is in the output buffer of the
7498 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
7499 * operations) will never be smaller than the few bytes we need. */
7502 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
7504 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
7512 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
7513 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
7515 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
7516 (buflen
== 0) ? "premature EOF" : strerror(errno
));
7520 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
7521 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
7526 slave
->repldboff
+= nwritten
;
7527 if (slave
->repldboff
== slave
->repldbsize
) {
7528 close(slave
->repldbfd
);
7529 slave
->repldbfd
= -1;
7530 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7531 slave
->replstate
= REDIS_REPL_ONLINE
;
7532 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
7533 sendReplyToClient
, slave
) == AE_ERR
) {
7537 addReplySds(slave
,sdsempty());
7538 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
7542 /* This function is called at the end of every backgrond saving.
7543 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
7544 * otherwise REDIS_ERR is passed to the function.
7546 * The goal of this function is to handle slaves waiting for a successful
7547 * background saving in order to perform non-blocking synchronization. */
7548 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
7550 int startbgsave
= 0;
7553 listRewind(server
.slaves
,&li
);
7554 while((ln
= listNext(&li
))) {
7555 redisClient
*slave
= ln
->value
;
7557 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
7559 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
7560 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
7561 struct redis_stat buf
;
7563 if (bgsaveerr
!= REDIS_OK
) {
7565 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
7568 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
7569 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
7571 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
7574 slave
->repldboff
= 0;
7575 slave
->repldbsize
= buf
.st_size
;
7576 slave
->replstate
= REDIS_REPL_SEND_BULK
;
7577 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
7578 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
7585 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
7588 listRewind(server
.slaves
,&li
);
7589 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
7590 while((ln
= listNext(&li
))) {
7591 redisClient
*slave
= ln
->value
;
7593 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
7600 static int syncWithMaster(void) {
7601 char buf
[1024], tmpfile
[256], authcmd
[1024];
7603 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
7604 int dfd
, maxtries
= 5;
7607 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
7612 /* AUTH with the master if required. */
7613 if(server
.masterauth
) {
7614 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
7615 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
7617 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
7621 /* Read the AUTH result. */
7622 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7624 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
7628 if (buf
[0] != '+') {
7630 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
7635 /* Issue the SYNC command */
7636 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
7638 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
7642 /* Read the bulk write count */
7643 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
7645 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
7649 if (buf
[0] != '$') {
7651 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
7654 dumpsize
= strtol(buf
+1,NULL
,10);
7655 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
7656 /* Read the bulk write data on a temp file */
7658 snprintf(tmpfile
,256,
7659 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
7660 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
7661 if (dfd
!= -1) break;
7666 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
7670 int nread
, nwritten
;
7672 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
7674 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
7680 nwritten
= write(dfd
,buf
,nread
);
7681 if (nwritten
== -1) {
7682 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
7690 if (rename(tmpfile
,server
.dbfilename
) == -1) {
7691 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
7697 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
7698 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
7702 server
.master
= createClient(fd
);
7703 server
.master
->flags
|= REDIS_MASTER
;
7704 server
.master
->authenticated
= 1;
7705 server
.replstate
= REDIS_REPL_CONNECTED
;
7709 static void slaveofCommand(redisClient
*c
) {
7710 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
7711 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
7712 if (server
.masterhost
) {
7713 sdsfree(server
.masterhost
);
7714 server
.masterhost
= NULL
;
7715 if (server
.master
) freeClient(server
.master
);
7716 server
.replstate
= REDIS_REPL_NONE
;
7717 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
7720 sdsfree(server
.masterhost
);
7721 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
7722 server
.masterport
= atoi(c
->argv
[2]->ptr
);
7723 if (server
.master
) freeClient(server
.master
);
7724 server
.replstate
= REDIS_REPL_CONNECT
;
7725 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
7726 server
.masterhost
, server
.masterport
);
7728 addReply(c
,shared
.ok
);
7731 /* ============================ Maxmemory directive ======================== */
7733 /* Try to free one object form the pre-allocated objects free list.
7734 * This is useful under low mem conditions as by default we take 1 million
7735 * free objects allocated. On success REDIS_OK is returned, otherwise
7737 static int tryFreeOneObjectFromFreelist(void) {
7740 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
7741 if (listLength(server
.objfreelist
)) {
7742 listNode
*head
= listFirst(server
.objfreelist
);
7743 o
= listNodeValue(head
);
7744 listDelNode(server
.objfreelist
,head
);
7745 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7749 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
7754 /* This function gets called when 'maxmemory' is set on the config file to limit
7755 * the max memory used by the server, and we are out of memory.
7756 * This function will try to, in order:
7758 * - Free objects from the free list
7759 * - Try to remove keys with an EXPIRE set
7761 * It is not possible to free enough memory to reach used-memory < maxmemory
7762 * the server will start refusing commands that will enlarge even more the
7765 static void freeMemoryIfNeeded(void) {
7766 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
7767 int j
, k
, freed
= 0;
7769 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
7770 for (j
= 0; j
< server
.dbnum
; j
++) {
7772 robj
*minkey
= NULL
;
7773 struct dictEntry
*de
;
7775 if (dictSize(server
.db
[j
].expires
)) {
7777 /* From a sample of three keys drop the one nearest to
7778 * the natural expire */
7779 for (k
= 0; k
< 3; k
++) {
7782 de
= dictGetRandomKey(server
.db
[j
].expires
);
7783 t
= (time_t) dictGetEntryVal(de
);
7784 if (minttl
== -1 || t
< minttl
) {
7785 minkey
= dictGetEntryKey(de
);
7789 deleteKey(server
.db
+j
,minkey
);
7792 if (!freed
) return; /* nothing to free... */
7796 /* ============================== Append Only file ========================== */
7798 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
7799 sds buf
= sdsempty();
7805 /* The DB this command was targetting is not the same as the last command
7806 * we appendend. To issue a SELECT command is needed. */
7807 if (dictid
!= server
.appendseldb
) {
7810 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
7811 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
7812 (unsigned long)strlen(seldb
),seldb
);
7813 server
.appendseldb
= dictid
;
7816 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
7817 * EXPIREs into EXPIREATs calls */
7818 if (cmd
->proc
== expireCommand
) {
7821 tmpargv
[0] = createStringObject("EXPIREAT",8);
7822 tmpargv
[1] = argv
[1];
7823 incrRefCount(argv
[1]);
7824 when
= time(NULL
)+strtol(argv
[2]->ptr
,NULL
,10);
7825 tmpargv
[2] = createObject(REDIS_STRING
,
7826 sdscatprintf(sdsempty(),"%ld",when
));
7830 /* Append the actual command */
7831 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
7832 for (j
= 0; j
< argc
; j
++) {
7835 o
= getDecodedObject(o
);
7836 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
7837 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
7838 buf
= sdscatlen(buf
,"\r\n",2);
7842 /* Free the objects from the modified argv for EXPIREAT */
7843 if (cmd
->proc
== expireCommand
) {
7844 for (j
= 0; j
< 3; j
++)
7845 decrRefCount(argv
[j
]);
7848 /* We want to perform a single write. This should be guaranteed atomic
7849 * at least if the filesystem we are writing is a real physical one.
7850 * While this will save us against the server being killed I don't think
7851 * there is much to do about the whole server stopping for power problems
7853 nwritten
= write(server
.appendfd
,buf
,sdslen(buf
));
7854 if (nwritten
!= (signed)sdslen(buf
)) {
7855 /* Ooops, we are in troubles. The best thing to do for now is
7856 * to simply exit instead to give the illusion that everything is
7857 * working as expected. */
7858 if (nwritten
== -1) {
7859 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
7861 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
7865 /* If a background append only file rewriting is in progress we want to
7866 * accumulate the differences between the child DB and the current one
7867 * in a buffer, so that when the child process will do its work we
7868 * can append the differences to the new append only file. */
7869 if (server
.bgrewritechildpid
!= -1)
7870 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
7874 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
7875 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
7876 now
-server
.lastfsync
> 1))
7878 fsync(server
.appendfd
); /* Let's try to get this data on the disk */
7879 server
.lastfsync
= now
;
7883 /* In Redis commands are always executed in the context of a client, so in
7884 * order to load the append only file we need to create a fake client. */
7885 static struct redisClient
*createFakeClient(void) {
7886 struct redisClient
*c
= zmalloc(sizeof(*c
));
7890 c
->querybuf
= sdsempty();
7894 /* We set the fake client as a slave waiting for the synchronization
7895 * so that Redis will not try to send replies to this client. */
7896 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
7897 c
->reply
= listCreate();
7898 listSetFreeMethod(c
->reply
,decrRefCount
);
7899 listSetDupMethod(c
->reply
,dupClientReplyValue
);
7903 static void freeFakeClient(struct redisClient
*c
) {
7904 sdsfree(c
->querybuf
);
7905 listRelease(c
->reply
);
7909 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
7910 * error (the append only file is zero-length) REDIS_ERR is returned. On
7911 * fatal error an error message is logged and the program exists. */
7912 int loadAppendOnlyFile(char *filename
) {
7913 struct redisClient
*fakeClient
;
7914 FILE *fp
= fopen(filename
,"r");
7915 struct redis_stat sb
;
7916 unsigned long long loadedkeys
= 0;
7918 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
7922 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
7926 fakeClient
= createFakeClient();
7933 struct redisCommand
*cmd
;
7935 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
7941 if (buf
[0] != '*') goto fmterr
;
7943 argv
= zmalloc(sizeof(robj
*)*argc
);
7944 for (j
= 0; j
< argc
; j
++) {
7945 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
7946 if (buf
[0] != '$') goto fmterr
;
7947 len
= strtol(buf
+1,NULL
,10);
7948 argsds
= sdsnewlen(NULL
,len
);
7949 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
7950 argv
[j
] = createObject(REDIS_STRING
,argsds
);
7951 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
7954 /* Command lookup */
7955 cmd
= lookupCommand(argv
[0]->ptr
);
7957 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
7960 /* Try object encoding */
7961 if (cmd
->flags
& REDIS_CMD_BULK
)
7962 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
7963 /* Run the command in the context of a fake client */
7964 fakeClient
->argc
= argc
;
7965 fakeClient
->argv
= argv
;
7966 cmd
->proc(fakeClient
);
7967 /* Discard the reply objects list from the fake client */
7968 while(listLength(fakeClient
->reply
))
7969 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
7970 /* Clean up, ready for the next command */
7971 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
7973 /* Handle swapping while loading big datasets when VM is on */
7975 if (server
.vm_enabled
&& (loadedkeys
% 5000) == 0) {
7976 while (zmalloc_used_memory() > server
.vm_max_memory
) {
7977 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
7982 freeFakeClient(fakeClient
);
7987 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
7989 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
7993 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
7997 /* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
7998 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8002 /* Avoid the incr/decr ref count business if possible to help
8003 * copy-on-write (we are often in a child process when this function
8005 * Also makes sure that key objects don't get incrRefCount-ed when VM
8007 if (obj
->encoding
!= REDIS_ENCODING_RAW
) {
8008 obj
= getDecodedObject(obj
);
8011 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(long)sdslen(obj
->ptr
));
8012 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) goto err
;
8013 if (sdslen(obj
->ptr
) && fwrite(obj
->ptr
,sdslen(obj
->ptr
),1,fp
) == 0)
8015 if (fwrite("\r\n",2,1,fp
) == 0) goto err
;
8016 if (decrrc
) decrRefCount(obj
);
8019 if (decrrc
) decrRefCount(obj
);
8023 /* Write binary-safe string into a file in the bulkformat
8024 * $<count>\r\n<payload>\r\n */
8025 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8028 snprintf(buf
,sizeof(buf
),"$%ld\r\n",(unsigned long)len
);
8029 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8030 if (len
&& fwrite(s
,len
,1,fp
) == 0) return 0;
8031 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8035 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8036 static int fwriteBulkDouble(FILE *fp
, double d
) {
8037 char buf
[128], dbuf
[128];
8039 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8040 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8041 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8042 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8046 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8047 static int fwriteBulkLong(FILE *fp
, long l
) {
8048 char buf
[128], lbuf
[128];
8050 snprintf(lbuf
,sizeof(lbuf
),"%ld\r\n",l
);
8051 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(lbuf
)-2);
8052 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8053 if (fwrite(lbuf
,strlen(lbuf
),1,fp
) == 0) return 0;
8057 /* Write a sequence of commands able to fully rebuild the dataset into
8058 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8059 static int rewriteAppendOnlyFile(char *filename
) {
8060 dictIterator
*di
= NULL
;
8065 time_t now
= time(NULL
);
8067 /* Note that we have to use a different temp name here compared to the
8068 * one used by rewriteAppendOnlyFileBackground() function. */
8069 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8070 fp
= fopen(tmpfile
,"w");
8072 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8075 for (j
= 0; j
< server
.dbnum
; j
++) {
8076 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8077 redisDb
*db
= server
.db
+j
;
8079 if (dictSize(d
) == 0) continue;
8080 di
= dictGetIterator(d
);
8086 /* SELECT the new DB */
8087 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8088 if (fwriteBulkLong(fp
,j
) == 0) goto werr
;
8090 /* Iterate this DB writing every entry */
8091 while((de
= dictNext(di
)) != NULL
) {
8096 key
= dictGetEntryKey(de
);
8097 /* If the value for this key is swapped, load a preview in memory.
8098 * We use a "swapped" flag to remember if we need to free the
8099 * value object instead to just increment the ref count anyway
8100 * in order to avoid copy-on-write of pages if we are forked() */
8101 if (!server
.vm_enabled
|| key
->storage
== REDIS_VM_MEMORY
||
8102 key
->storage
== REDIS_VM_SWAPPING
) {
8103 o
= dictGetEntryVal(de
);
8106 o
= vmPreviewObject(key
);
8109 expiretime
= getExpire(db
,key
);
8111 /* Save the key and associated value */
8112 if (o
->type
== REDIS_STRING
) {
8113 /* Emit a SET command */
8114 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8115 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8117 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8118 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8119 } else if (o
->type
== REDIS_LIST
) {
8120 /* Emit the RPUSHes needed to rebuild the list */
8121 list
*list
= o
->ptr
;
8125 listRewind(list
,&li
);
8126 while((ln
= listNext(&li
))) {
8127 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8128 robj
*eleobj
= listNodeValue(ln
);
8130 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8131 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8132 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8134 } else if (o
->type
== REDIS_SET
) {
8135 /* Emit the SADDs needed to rebuild the set */
8137 dictIterator
*di
= dictGetIterator(set
);
8140 while((de
= dictNext(di
)) != NULL
) {
8141 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
8142 robj
*eleobj
= dictGetEntryKey(de
);
8144 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8145 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8146 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8148 dictReleaseIterator(di
);
8149 } else if (o
->type
== REDIS_ZSET
) {
8150 /* Emit the ZADDs needed to rebuild the sorted set */
8152 dictIterator
*di
= dictGetIterator(zs
->dict
);
8155 while((de
= dictNext(di
)) != NULL
) {
8156 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
8157 robj
*eleobj
= dictGetEntryKey(de
);
8158 double *score
= dictGetEntryVal(de
);
8160 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8161 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8162 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
8163 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8165 dictReleaseIterator(di
);
8166 } else if (o
->type
== REDIS_HASH
) {
8167 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
8169 /* Emit the HSETs needed to rebuild the hash */
8170 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8171 unsigned char *p
= zipmapRewind(o
->ptr
);
8172 unsigned char *field
, *val
;
8173 unsigned int flen
, vlen
;
8175 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
8176 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8177 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8178 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
8180 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
8184 dictIterator
*di
= dictGetIterator(o
->ptr
);
8187 while((de
= dictNext(di
)) != NULL
) {
8188 robj
*field
= dictGetEntryKey(de
);
8189 robj
*val
= dictGetEntryVal(de
);
8191 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8192 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8193 if (fwriteBulkObject(fp
,field
) == -1) return -1;
8194 if (fwriteBulkObject(fp
,val
) == -1) return -1;
8196 dictReleaseIterator(di
);
8201 /* Save the expire time */
8202 if (expiretime
!= -1) {
8203 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
8204 /* If this key is already expired skip it */
8205 if (expiretime
< now
) continue;
8206 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8207 if (fwriteBulkObject(fp
,key
) == 0) goto werr
;
8208 if (fwriteBulkLong(fp
,expiretime
) == 0) goto werr
;
8210 if (swapped
) decrRefCount(o
);
8212 dictReleaseIterator(di
);
8215 /* Make sure data will not remain on the OS's output buffers */
8220 /* Use RENAME to make sure the DB file is changed atomically only
8221 * if the generate DB file is ok. */
8222 if (rename(tmpfile
,filename
) == -1) {
8223 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
8227 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
8233 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
8234 if (di
) dictReleaseIterator(di
);
8238 /* This is how rewriting of the append only file in background works:
8240 * 1) The user calls BGREWRITEAOF
8241 * 2) Redis calls this function, that forks():
8242 * 2a) the child rewrite the append only file in a temp file.
8243 * 2b) the parent accumulates differences in server.bgrewritebuf.
8244 * 3) When the child finished '2a' exists.
8245 * 4) The parent will trap the exit code, if it's OK, will append the
8246 * data accumulated into server.bgrewritebuf into the temp file, and
8247 * finally will rename(2) the temp file in the actual file name.
8248 * The the new file is reopened as the new append only file. Profit!
8250 static int rewriteAppendOnlyFileBackground(void) {
8253 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
8254 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
8255 if ((childpid
= fork()) == 0) {
8259 if (server
.vm_enabled
) vmReopenSwapFile();
8261 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8262 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
8269 if (childpid
== -1) {
8270 redisLog(REDIS_WARNING
,
8271 "Can't rewrite append only file in background: fork: %s",
8275 redisLog(REDIS_NOTICE
,
8276 "Background append only file rewriting started by pid %d",childpid
);
8277 server
.bgrewritechildpid
= childpid
;
8278 updateDictResizePolicy();
8279 /* We set appendseldb to -1 in order to force the next call to the
8280 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8281 * accumulated by the parent into server.bgrewritebuf will start
8282 * with a SELECT statement and it will be safe to merge. */
8283 server
.appendseldb
= -1;
8286 return REDIS_OK
; /* unreached */
8289 static void bgrewriteaofCommand(redisClient
*c
) {
8290 if (server
.bgrewritechildpid
!= -1) {
8291 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8294 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
8295 char *status
= "+Background append only file rewriting started\r\n";
8296 addReplySds(c
,sdsnew(status
));
8298 addReply(c
,shared
.err
);
8302 static void aofRemoveTempFile(pid_t childpid
) {
8305 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
8309 /* Virtual Memory is composed mainly of two subsystems:
8310 * - Blocking Virutal Memory
8311 * - Threaded Virtual Memory I/O
8312 * The two parts are not fully decoupled, but functions are split among two
8313 * different sections of the source code (delimited by comments) in order to
8314 * make more clear what functionality is about the blocking VM and what about
8315 * the threaded (not blocking) VM.
8319 * Redis VM is a blocking VM (one that blocks reading swapped values from
8320 * disk into memory when a value swapped out is needed in memory) that is made
8321 * unblocking by trying to examine the command argument vector in order to
8322 * load in background values that will likely be needed in order to exec
8323 * the command. The command is executed only once all the relevant keys
8324 * are loaded into memory.
8326 * This basically is almost as simple of a blocking VM, but almost as parallel
8327 * as a fully non-blocking VM.
8330 /* =================== Virtual Memory - Blocking Side ====================== */
8332 /* substitute the first occurrence of '%p' with the process pid in the
8333 * swap file name. */
8334 static void expandVmSwapFilename(void) {
8335 char *p
= strstr(server
.vm_swap_file
,"%p");
8341 new = sdscat(new,server
.vm_swap_file
);
8342 new = sdscatprintf(new,"%ld",(long) getpid());
8343 new = sdscat(new,p
+2);
8344 zfree(server
.vm_swap_file
);
8345 server
.vm_swap_file
= new;
8348 static void vmInit(void) {
8353 if (server
.vm_max_threads
!= 0)
8354 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8356 expandVmSwapFilename();
8357 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
8358 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
8359 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
8361 if (server
.vm_fp
== NULL
) {
8362 redisLog(REDIS_WARNING
,
8363 "Impossible to open the swap file: %s. Exiting.",
8367 server
.vm_fd
= fileno(server
.vm_fp
);
8368 server
.vm_next_page
= 0;
8369 server
.vm_near_pages
= 0;
8370 server
.vm_stats_used_pages
= 0;
8371 server
.vm_stats_swapped_objects
= 0;
8372 server
.vm_stats_swapouts
= 0;
8373 server
.vm_stats_swapins
= 0;
8374 totsize
= server
.vm_pages
*server
.vm_page_size
;
8375 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
8376 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
8377 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
8381 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
8383 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
8384 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
8385 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
8386 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
8388 /* Initialize threaded I/O (used by Virtual Memory) */
8389 server
.io_newjobs
= listCreate();
8390 server
.io_processing
= listCreate();
8391 server
.io_processed
= listCreate();
8392 server
.io_ready_clients
= listCreate();
8393 pthread_mutex_init(&server
.io_mutex
,NULL
);
8394 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
8395 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
8396 server
.io_active_threads
= 0;
8397 if (pipe(pipefds
) == -1) {
8398 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
8402 server
.io_ready_pipe_read
= pipefds
[0];
8403 server
.io_ready_pipe_write
= pipefds
[1];
8404 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
8405 /* LZF requires a lot of stack */
8406 pthread_attr_init(&server
.io_threads_attr
);
8407 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
8408 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
8409 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
8410 /* Listen for events in the threaded I/O pipe */
8411 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
8412 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
8413 oom("creating file event");
8416 /* Mark the page as used */
8417 static void vmMarkPageUsed(off_t page
) {
8418 off_t byte
= page
/8;
8420 redisAssert(vmFreePage(page
) == 1);
8421 server
.vm_bitmap
[byte
] |= 1<<bit
;
8424 /* Mark N contiguous pages as used, with 'page' being the first. */
8425 static void vmMarkPagesUsed(off_t page
, off_t count
) {
8428 for (j
= 0; j
< count
; j
++)
8429 vmMarkPageUsed(page
+j
);
8430 server
.vm_stats_used_pages
+= count
;
8431 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
8432 (long long)count
, (long long)page
);
8435 /* Mark the page as free */
8436 static void vmMarkPageFree(off_t page
) {
8437 off_t byte
= page
/8;
8439 redisAssert(vmFreePage(page
) == 0);
8440 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
8443 /* Mark N contiguous pages as free, with 'page' being the first. */
8444 static void vmMarkPagesFree(off_t page
, off_t count
) {
8447 for (j
= 0; j
< count
; j
++)
8448 vmMarkPageFree(page
+j
);
8449 server
.vm_stats_used_pages
-= count
;
8450 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
8451 (long long)count
, (long long)page
);
8454 /* Test if the page is free */
8455 static int vmFreePage(off_t page
) {
8456 off_t byte
= page
/8;
8458 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
8461 /* Find N contiguous free pages storing the first page of the cluster in *first.
8462 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
8463 * REDIS_ERR is returned.
8465 * This function uses a simple algorithm: we try to allocate
8466 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
8467 * again from the start of the swap file searching for free spaces.
8469 * If it looks pretty clear that there are no free pages near our offset
8470 * we try to find less populated places doing a forward jump of
8471 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
8472 * without hurry, and then we jump again and so forth...
8474 * This function can be improved using a free list to avoid to guess
8475 * too much, since we could collect data about freed pages.
8477 * note: I implemented this function just after watching an episode of
8478 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
8480 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
8481 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
8483 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
8484 server
.vm_near_pages
= 0;
8485 server
.vm_next_page
= 0;
8487 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
8488 base
= server
.vm_next_page
;
8490 while(offset
< server
.vm_pages
) {
8491 off_t
this = base
+offset
;
8493 /* If we overflow, restart from page zero */
8494 if (this >= server
.vm_pages
) {
8495 this -= server
.vm_pages
;
8497 /* Just overflowed, what we found on tail is no longer
8498 * interesting, as it's no longer contiguous. */
8502 if (vmFreePage(this)) {
8503 /* This is a free page */
8505 /* Already got N free pages? Return to the caller, with success */
8507 *first
= this-(n
-1);
8508 server
.vm_next_page
= this+1;
8509 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
8513 /* The current one is not a free page */
8517 /* Fast-forward if the current page is not free and we already
8518 * searched enough near this place. */
8520 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
8521 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
8523 /* Note that even if we rewind after the jump, we are don't need
8524 * to make sure numfree is set to zero as we only jump *if* it
8525 * is set to zero. */
8527 /* Otherwise just check the next page */
8534 /* Write the specified object at the specified page of the swap file */
8535 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
8536 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8537 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8538 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8539 redisLog(REDIS_WARNING
,
8540 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
8544 rdbSaveObject(server
.vm_fp
,o
);
8545 fflush(server
.vm_fp
);
8546 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8550 /* Swap the 'val' object relative to 'key' into disk. Store all the information
8551 * needed to later retrieve the object into the key object.
8552 * If we can't find enough contiguous empty pages to swap the object on disk
8553 * REDIS_ERR is returned. */
8554 static int vmSwapObjectBlocking(robj
*key
, robj
*val
) {
8555 off_t pages
= rdbSavedObjectPages(val
,NULL
);
8558 assert(key
->storage
== REDIS_VM_MEMORY
);
8559 assert(key
->refcount
== 1);
8560 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return REDIS_ERR
;
8561 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return REDIS_ERR
;
8562 key
->vm
.page
= page
;
8563 key
->vm
.usedpages
= pages
;
8564 key
->storage
= REDIS_VM_SWAPPED
;
8565 key
->vtype
= val
->type
;
8566 decrRefCount(val
); /* Deallocate the object from memory. */
8567 vmMarkPagesUsed(page
,pages
);
8568 redisLog(REDIS_DEBUG
,"VM: object %s swapped out at %lld (%lld pages)",
8569 (unsigned char*) key
->ptr
,
8570 (unsigned long long) page
, (unsigned long long) pages
);
8571 server
.vm_stats_swapped_objects
++;
8572 server
.vm_stats_swapouts
++;
8576 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
8579 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
8580 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
8581 redisLog(REDIS_WARNING
,
8582 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
8586 o
= rdbLoadObject(type
,server
.vm_fp
);
8588 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
8591 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
8595 /* Load the value object relative to the 'key' object from swap to memory.
8596 * The newly allocated object is returned.
8598 * If preview is true the unserialized object is returned to the caller but
8599 * no changes are made to the key object, nor the pages are marked as freed */
8600 static robj
*vmGenericLoadObject(robj
*key
, int preview
) {
8603 redisAssert(key
->storage
== REDIS_VM_SWAPPED
|| key
->storage
== REDIS_VM_LOADING
);
8604 val
= vmReadObjectFromSwap(key
->vm
.page
,key
->vtype
);
8606 key
->storage
= REDIS_VM_MEMORY
;
8607 key
->vm
.atime
= server
.unixtime
;
8608 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8609 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk",
8610 (unsigned char*) key
->ptr
);
8611 server
.vm_stats_swapped_objects
--;
8613 redisLog(REDIS_DEBUG
, "VM: object %s previewed from disk",
8614 (unsigned char*) key
->ptr
);
8616 server
.vm_stats_swapins
++;
8620 /* Plain object loading, from swap to memory */
8621 static robj
*vmLoadObject(robj
*key
) {
8622 /* If we are loading the object in background, stop it, we
8623 * need to load this object synchronously ASAP. */
8624 if (key
->storage
== REDIS_VM_LOADING
)
8625 vmCancelThreadedIOJob(key
);
8626 return vmGenericLoadObject(key
,0);
8629 /* Just load the value on disk, without to modify the key.
8630 * This is useful when we want to perform some operation on the value
8631 * without to really bring it from swap to memory, like while saving the
8632 * dataset or rewriting the append only log. */
8633 static robj
*vmPreviewObject(robj
*key
) {
8634 return vmGenericLoadObject(key
,1);
8637 /* How a good candidate is this object for swapping?
8638 * The better candidate it is, the greater the returned value.
8640 * Currently we try to perform a fast estimation of the object size in
8641 * memory, and combine it with aging informations.
8643 * Basically swappability = idle-time * log(estimated size)
8645 * Bigger objects are preferred over smaller objects, but not
8646 * proportionally, this is why we use the logarithm. This algorithm is
8647 * just a first try and will probably be tuned later. */
8648 static double computeObjectSwappability(robj
*o
) {
8649 time_t age
= server
.unixtime
- o
->vm
.atime
;
8653 struct dictEntry
*de
;
8656 if (age
<= 0) return 0;
8659 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
8662 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
8667 listNode
*ln
= listFirst(l
);
8669 asize
= sizeof(list
);
8671 robj
*ele
= ln
->value
;
8674 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8675 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8677 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
8682 z
= (o
->type
== REDIS_ZSET
);
8683 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
8685 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8686 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
8691 de
= dictGetRandomKey(d
);
8692 ele
= dictGetEntryKey(de
);
8693 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8694 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8696 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8697 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
8701 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
8702 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
8703 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
8704 unsigned int klen
, vlen
;
8705 unsigned char *key
, *val
;
8707 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
8711 asize
= len
*(klen
+vlen
+3);
8712 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
8714 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
8719 de
= dictGetRandomKey(d
);
8720 ele
= dictGetEntryKey(de
);
8721 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8722 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8724 ele
= dictGetEntryVal(de
);
8725 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
8726 (sizeof(*o
)+sdslen(ele
->ptr
)) :
8728 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
8733 return (double)age
*log(1+asize
);
8736 /* Try to swap an object that's a good candidate for swapping.
8737 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
8738 * to swap any object at all.
8740 * If 'usethreaded' is true, Redis will try to swap the object in background
8741 * using I/O threads. */
8742 static int vmSwapOneObject(int usethreads
) {
8744 struct dictEntry
*best
= NULL
;
8745 double best_swappability
= 0;
8746 redisDb
*best_db
= NULL
;
8749 for (j
= 0; j
< server
.dbnum
; j
++) {
8750 redisDb
*db
= server
.db
+j
;
8751 /* Why maxtries is set to 100?
8752 * Because this way (usually) we'll find 1 object even if just 1% - 2%
8753 * are swappable objects */
8756 if (dictSize(db
->dict
) == 0) continue;
8757 for (i
= 0; i
< 5; i
++) {
8759 double swappability
;
8761 if (maxtries
) maxtries
--;
8762 de
= dictGetRandomKey(db
->dict
);
8763 key
= dictGetEntryKey(de
);
8764 val
= dictGetEntryVal(de
);
8765 /* Only swap objects that are currently in memory.
8767 * Also don't swap shared objects if threaded VM is on, as we
8768 * try to ensure that the main thread does not touch the
8769 * object while the I/O thread is using it, but we can't
8770 * control other keys without adding additional mutex. */
8771 if (key
->storage
!= REDIS_VM_MEMORY
||
8772 (server
.vm_max_threads
!= 0 && val
->refcount
!= 1)) {
8773 if (maxtries
) i
--; /* don't count this try */
8776 swappability
= computeObjectSwappability(val
);
8777 if (!best
|| swappability
> best_swappability
) {
8779 best_swappability
= swappability
;
8784 if (best
== NULL
) return REDIS_ERR
;
8785 key
= dictGetEntryKey(best
);
8786 val
= dictGetEntryVal(best
);
8788 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
8789 key
->ptr
, best_swappability
);
8791 /* Unshare the key if needed */
8792 if (key
->refcount
> 1) {
8793 robj
*newkey
= dupStringObject(key
);
8795 key
= dictGetEntryKey(best
) = newkey
;
8799 vmSwapObjectThreaded(key
,val
,best_db
);
8802 if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
8803 dictGetEntryVal(best
) = NULL
;
8811 static int vmSwapOneObjectBlocking() {
8812 return vmSwapOneObject(0);
8815 static int vmSwapOneObjectThreaded() {
8816 return vmSwapOneObject(1);
8819 /* Return true if it's safe to swap out objects in a given moment.
8820 * Basically we don't want to swap objects out while there is a BGSAVE
8821 * or a BGAEOREWRITE running in backgroud. */
8822 static int vmCanSwapOut(void) {
8823 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
8826 /* Delete a key if swapped. Returns 1 if the key was found, was swapped
8827 * and was deleted. Otherwise 0 is returned. */
8828 static int deleteIfSwapped(redisDb
*db
, robj
*key
) {
8832 if ((de
= dictFind(db
->dict
,key
)) == NULL
) return 0;
8833 foundkey
= dictGetEntryKey(de
);
8834 if (foundkey
->storage
== REDIS_VM_MEMORY
) return 0;
8839 /* =================== Virtual Memory - Threaded I/O ======================= */
8841 static void freeIOJob(iojob
*j
) {
8842 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
8843 j
->type
== REDIS_IOJOB_DO_SWAP
||
8844 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
8845 decrRefCount(j
->val
);
8846 /* We don't decrRefCount the j->key field as we did't incremented
8847 * the count creating IO Jobs. This is because the key field here is
8848 * just used as an indentifier and if a key is removed the Job should
8849 * never be touched again. */
8853 /* Every time a thread finished a Job, it writes a byte into the write side
8854 * of an unix pipe in order to "awake" the main thread, and this function
8856 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
8860 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
8862 REDIS_NOTUSED(mask
);
8863 REDIS_NOTUSED(privdata
);
8865 /* For every byte we read in the read side of the pipe, there is one
8866 * I/O job completed to process. */
8867 while((retval
= read(fd
,buf
,1)) == 1) {
8871 struct dictEntry
*de
;
8873 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
8875 /* Get the processed element (the oldest one) */
8877 assert(listLength(server
.io_processed
) != 0);
8878 if (toprocess
== -1) {
8879 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
8880 if (toprocess
<= 0) toprocess
= 1;
8882 ln
= listFirst(server
.io_processed
);
8884 listDelNode(server
.io_processed
,ln
);
8886 /* If this job is marked as canceled, just ignore it */
8891 /* Post process it in the main thread, as there are things we
8892 * can do just here to avoid race conditions and/or invasive locks */
8893 redisLog(REDIS_DEBUG
,"Job %p type: %d, key at %p (%s) refcount: %d\n", (void*) j
, j
->type
, (void*)j
->key
, (char*)j
->key
->ptr
, j
->key
->refcount
);
8894 de
= dictFind(j
->db
->dict
,j
->key
);
8896 key
= dictGetEntryKey(de
);
8897 if (j
->type
== REDIS_IOJOB_LOAD
) {
8900 /* Key loaded, bring it at home */
8901 key
->storage
= REDIS_VM_MEMORY
;
8902 key
->vm
.atime
= server
.unixtime
;
8903 vmMarkPagesFree(key
->vm
.page
,key
->vm
.usedpages
);
8904 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
8905 (unsigned char*) key
->ptr
);
8906 server
.vm_stats_swapped_objects
--;
8907 server
.vm_stats_swapins
++;
8908 dictGetEntryVal(de
) = j
->val
;
8909 incrRefCount(j
->val
);
8912 /* Handle clients waiting for this key to be loaded. */
8913 handleClientsBlockedOnSwappedKey(db
,key
);
8914 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
8915 /* Now we know the amount of pages required to swap this object.
8916 * Let's find some space for it, and queue this task again
8917 * rebranded as REDIS_IOJOB_DO_SWAP. */
8918 if (!vmCanSwapOut() ||
8919 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
8921 /* Ooops... no space or we can't swap as there is
8922 * a fork()ed Redis trying to save stuff on disk. */
8924 key
->storage
= REDIS_VM_MEMORY
; /* undo operation */
8926 /* Note that we need to mark this pages as used now,
8927 * if the job will be canceled, we'll mark them as freed
8929 vmMarkPagesUsed(j
->page
,j
->pages
);
8930 j
->type
= REDIS_IOJOB_DO_SWAP
;
8935 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
8938 /* Key swapped. We can finally free some memory. */
8939 if (key
->storage
!= REDIS_VM_SWAPPING
) {
8940 printf("key->storage: %d\n",key
->storage
);
8941 printf("key->name: %s\n",(char*)key
->ptr
);
8942 printf("key->refcount: %d\n",key
->refcount
);
8943 printf("val: %p\n",(void*)j
->val
);
8944 printf("val->type: %d\n",j
->val
->type
);
8945 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
8947 redisAssert(key
->storage
== REDIS_VM_SWAPPING
);
8948 val
= dictGetEntryVal(de
);
8949 key
->vm
.page
= j
->page
;
8950 key
->vm
.usedpages
= j
->pages
;
8951 key
->storage
= REDIS_VM_SWAPPED
;
8952 key
->vtype
= j
->val
->type
;
8953 decrRefCount(val
); /* Deallocate the object from memory. */
8954 dictGetEntryVal(de
) = NULL
;
8955 redisLog(REDIS_DEBUG
,
8956 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
8957 (unsigned char*) key
->ptr
,
8958 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
8959 server
.vm_stats_swapped_objects
++;
8960 server
.vm_stats_swapouts
++;
8962 /* Put a few more swap requests in queue if we are still
8964 if (trytoswap
&& vmCanSwapOut() &&
8965 zmalloc_used_memory() > server
.vm_max_memory
)
8970 more
= listLength(server
.io_newjobs
) <
8971 (unsigned) server
.vm_max_threads
;
8973 /* Don't waste CPU time if swappable objects are rare. */
8974 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
8982 if (processed
== toprocess
) return;
8984 if (retval
< 0 && errno
!= EAGAIN
) {
8985 redisLog(REDIS_WARNING
,
8986 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
8991 static void lockThreadedIO(void) {
8992 pthread_mutex_lock(&server
.io_mutex
);
8995 static void unlockThreadedIO(void) {
8996 pthread_mutex_unlock(&server
.io_mutex
);
8999 /* Remove the specified object from the threaded I/O queue if still not
9000 * processed, otherwise make sure to flag it as canceled. */
9001 static void vmCancelThreadedIOJob(robj
*o
) {
9003 server
.io_newjobs
, /* 0 */
9004 server
.io_processing
, /* 1 */
9005 server
.io_processed
/* 2 */
9009 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9012 /* Search for a matching key in one of the queues */
9013 for (i
= 0; i
< 3; i
++) {
9017 listRewind(lists
[i
],&li
);
9018 while ((ln
= listNext(&li
)) != NULL
) {
9019 iojob
*job
= ln
->value
;
9021 if (job
->canceled
) continue; /* Skip this, already canceled. */
9022 if (job
->key
== o
) {
9023 redisLog(REDIS_DEBUG
,"*** CANCELED %p (%s) (type %d) (LIST ID %d)\n",
9024 (void*)job
, (char*)o
->ptr
, job
->type
, i
);
9025 /* Mark the pages as free since the swap didn't happened
9026 * or happened but is now discarded. */
9027 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9028 vmMarkPagesFree(job
->page
,job
->pages
);
9029 /* Cancel the job. It depends on the list the job is
9032 case 0: /* io_newjobs */
9033 /* If the job was yet not processed the best thing to do
9034 * is to remove it from the queue at all */
9036 listDelNode(lists
[i
],ln
);
9038 case 1: /* io_processing */
9039 /* Oh Shi- the thread is messing with the Job:
9041 * Probably it's accessing the object if this is a
9042 * PREPARE_SWAP or DO_SWAP job.
9043 * If it's a LOAD job it may be reading from disk and
9044 * if we don't wait for the job to terminate before to
9045 * cancel it, maybe in a few microseconds data can be
9046 * corrupted in this pages. So the short story is:
9048 * Better to wait for the job to move into the
9049 * next queue (processed)... */
9051 /* We try again and again until the job is completed. */
9053 /* But let's wait some time for the I/O thread
9054 * to finish with this job. After all this condition
9055 * should be very rare. */
9058 case 2: /* io_processed */
9059 /* The job was already processed, that's easy...
9060 * just mark it as canceled so that we'll ignore it
9061 * when processing completed jobs. */
9065 /* Finally we have to adjust the storage type of the object
9066 * in order to "UNDO" the operaiton. */
9067 if (o
->storage
== REDIS_VM_LOADING
)
9068 o
->storage
= REDIS_VM_SWAPPED
;
9069 else if (o
->storage
== REDIS_VM_SWAPPING
)
9070 o
->storage
= REDIS_VM_MEMORY
;
9077 assert(1 != 1); /* We should never reach this */
9080 static void *IOThreadEntryPoint(void *arg
) {
9085 pthread_detach(pthread_self());
9087 /* Get a new job to process */
9089 if (listLength(server
.io_newjobs
) == 0) {
9090 /* No new jobs in queue, exit. */
9091 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9092 (long) pthread_self());
9093 server
.io_active_threads
--;
9097 ln
= listFirst(server
.io_newjobs
);
9099 listDelNode(server
.io_newjobs
,ln
);
9100 /* Add the job in the processing queue */
9101 j
->thread
= pthread_self();
9102 listAddNodeTail(server
.io_processing
,j
);
9103 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9105 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9106 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9108 /* Process the Job */
9109 if (j
->type
== REDIS_IOJOB_LOAD
) {
9110 j
->val
= vmReadObjectFromSwap(j
->page
,j
->key
->vtype
);
9111 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9112 FILE *fp
= fopen("/dev/null","w+");
9113 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9115 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9116 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9120 /* Done: insert the job into the processed queue */
9121 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9122 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9124 listDelNode(server
.io_processing
,ln
);
9125 listAddNodeTail(server
.io_processed
,j
);
9128 /* Signal the main thread there is new stuff to process */
9129 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9131 return NULL
; /* never reached */
9134 static void spawnIOThread(void) {
9136 sigset_t mask
, omask
;
9140 sigaddset(&mask
,SIGCHLD
);
9141 sigaddset(&mask
,SIGHUP
);
9142 sigaddset(&mask
,SIGPIPE
);
9143 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
9144 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
9145 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
9149 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
9150 server
.io_active_threads
++;
9153 /* We need to wait for the last thread to exit before we are able to
9154 * fork() in order to BGSAVE or BGREWRITEAOF. */
9155 static void waitEmptyIOJobsQueue(void) {
9157 int io_processed_len
;
9160 if (listLength(server
.io_newjobs
) == 0 &&
9161 listLength(server
.io_processing
) == 0 &&
9162 server
.io_active_threads
== 0)
9167 /* While waiting for empty jobs queue condition we post-process some
9168 * finshed job, as I/O threads may be hanging trying to write against
9169 * the io_ready_pipe_write FD but there are so much pending jobs that
9171 io_processed_len
= listLength(server
.io_processed
);
9173 if (io_processed_len
) {
9174 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
9175 usleep(1000); /* 1 millisecond */
9177 usleep(10000); /* 10 milliseconds */
9182 static void vmReopenSwapFile(void) {
9183 /* Note: we don't close the old one as we are in the child process
9184 * and don't want to mess at all with the original file object. */
9185 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
9186 if (server
.vm_fp
== NULL
) {
9187 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
9188 server
.vm_swap_file
);
9191 server
.vm_fd
= fileno(server
.vm_fp
);
9194 /* This function must be called while with threaded IO locked */
9195 static void queueIOJob(iojob
*j
) {
9196 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
9197 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
9198 listAddNodeTail(server
.io_newjobs
,j
);
9199 if (server
.io_active_threads
< server
.vm_max_threads
)
9203 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
9206 assert(key
->storage
== REDIS_VM_MEMORY
);
9207 assert(key
->refcount
== 1);
9209 j
= zmalloc(sizeof(*j
));
9210 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
9216 j
->thread
= (pthread_t
) -1;
9217 key
->storage
= REDIS_VM_SWAPPING
;
9225 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
9227 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9228 * If there is not already a job loading the key, it is craeted.
9229 * The key is added to the io_keys list in the client structure, and also
9230 * in the hash table mapping swapped keys to waiting clients, that is,
9231 * server.io_waited_keys. */
9232 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
9233 struct dictEntry
*de
;
9237 /* If the key does not exist or is already in RAM we don't need to
9238 * block the client at all. */
9239 de
= dictFind(c
->db
->dict
,key
);
9240 if (de
== NULL
) return 0;
9241 o
= dictGetEntryKey(de
);
9242 if (o
->storage
== REDIS_VM_MEMORY
) {
9244 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
9245 /* We were swapping the key, undo it! */
9246 vmCancelThreadedIOJob(o
);
9250 /* OK: the key is either swapped, or being loaded just now. */
9252 /* Add the key to the list of keys this client is waiting for.
9253 * This maps clients to keys they are waiting for. */
9254 listAddNodeTail(c
->io_keys
,key
);
9257 /* Add the client to the swapped keys => clients waiting map. */
9258 de
= dictFind(c
->db
->io_keys
,key
);
9262 /* For every key we take a list of clients blocked for it */
9264 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
9266 assert(retval
== DICT_OK
);
9268 l
= dictGetEntryVal(de
);
9270 listAddNodeTail(l
,c
);
9272 /* Are we already loading the key from disk? If not create a job */
9273 if (o
->storage
== REDIS_VM_SWAPPED
) {
9276 o
->storage
= REDIS_VM_LOADING
;
9277 j
= zmalloc(sizeof(*j
));
9278 j
->type
= REDIS_IOJOB_LOAD
;
9281 j
->key
->vtype
= o
->vtype
;
9282 j
->page
= o
->vm
.page
;
9285 j
->thread
= (pthread_t
) -1;
9293 /* Preload keys needed for the ZUNION and ZINTER commands. */
9294 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
) {
9296 num
= atoi(c
->argv
[2]->ptr
);
9297 for (i
= 0; i
< num
; i
++) {
9298 waitForSwappedKey(c
,c
->argv
[3+i
]);
9302 /* Is this client attempting to run a command against swapped keys?
9303 * If so, block it ASAP, load the keys in background, then resume it.
9305 * The important idea about this function is that it can fail! If keys will
9306 * still be swapped when the client is resumed, this key lookups will
9307 * just block loading keys from disk. In practical terms this should only
9308 * happen with SORT BY command or if there is a bug in this function.
9310 * Return 1 if the client is marked as blocked, 0 if the client can
9311 * continue as the keys it is going to access appear to be in memory. */
9312 static int blockClientOnSwappedKeys(struct redisCommand
*cmd
, redisClient
*c
) {
9315 if (cmd
->vm_preload_proc
!= NULL
) {
9316 cmd
->vm_preload_proc(c
);
9318 if (cmd
->vm_firstkey
== 0) return 0;
9319 last
= cmd
->vm_lastkey
;
9320 if (last
< 0) last
= c
->argc
+last
;
9321 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
)
9322 waitForSwappedKey(c
,c
->argv
[j
]);
9325 /* If the client was blocked for at least one key, mark it as blocked. */
9326 if (listLength(c
->io_keys
)) {
9327 c
->flags
|= REDIS_IO_WAIT
;
9328 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
9329 server
.vm_blocked_clients
++;
9336 /* Remove the 'key' from the list of blocked keys for a given client.
9338 * The function returns 1 when there are no longer blocking keys after
9339 * the current one was removed (and the client can be unblocked). */
9340 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
9344 struct dictEntry
*de
;
9346 /* Remove the key from the list of keys this client is waiting for. */
9347 listRewind(c
->io_keys
,&li
);
9348 while ((ln
= listNext(&li
)) != NULL
) {
9349 if (compareStringObjects(ln
->value
,key
) == 0) {
9350 listDelNode(c
->io_keys
,ln
);
9356 /* Remove the client form the key => waiting clients map. */
9357 de
= dictFind(c
->db
->io_keys
,key
);
9359 l
= dictGetEntryVal(de
);
9360 ln
= listSearchKey(l
,c
);
9363 if (listLength(l
) == 0)
9364 dictDelete(c
->db
->io_keys
,key
);
9366 return listLength(c
->io_keys
) == 0;
9369 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
9370 struct dictEntry
*de
;
9375 de
= dictFind(db
->io_keys
,key
);
9378 l
= dictGetEntryVal(de
);
9379 len
= listLength(l
);
9380 /* Note: we can't use something like while(listLength(l)) as the list
9381 * can be freed by the calling function when we remove the last element. */
9384 redisClient
*c
= ln
->value
;
9386 if (dontWaitForSwappedKey(c
,key
)) {
9387 /* Put the client in the list of clients ready to go as we
9388 * loaded all the keys about it. */
9389 listAddNodeTail(server
.io_ready_clients
,c
);
9394 /* =========================== Remote Configuration ========================= */
9396 static void configSetCommand(redisClient
*c
) {
9397 robj
*o
= getDecodedObject(c
->argv
[3]);
9398 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
9399 zfree(server
.dbfilename
);
9400 server
.dbfilename
= zstrdup(o
->ptr
);
9401 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
9402 zfree(server
.requirepass
);
9403 server
.requirepass
= zstrdup(o
->ptr
);
9404 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
9405 zfree(server
.masterauth
);
9406 server
.masterauth
= zstrdup(o
->ptr
);
9407 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
9408 server
.maxmemory
= strtoll(o
->ptr
, NULL
, 10);
9410 addReplySds(c
,sdscatprintf(sdsempty(),
9411 "-ERR not supported CONFIG parameter %s\r\n",
9412 (char*)c
->argv
[2]->ptr
));
9417 addReply(c
,shared
.ok
);
9420 static void configGetCommand(redisClient
*c
) {
9421 robj
*o
= getDecodedObject(c
->argv
[2]);
9422 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
9423 char *pattern
= o
->ptr
;
9427 decrRefCount(lenobj
);
9429 if (stringmatch(pattern
,"dbfilename",0)) {
9430 addReplyBulkCString(c
,"dbfilename");
9431 addReplyBulkCString(c
,server
.dbfilename
);
9434 if (stringmatch(pattern
,"requirepass",0)) {
9435 addReplyBulkCString(c
,"requirepass");
9436 addReplyBulkCString(c
,server
.requirepass
);
9439 if (stringmatch(pattern
,"masterauth",0)) {
9440 addReplyBulkCString(c
,"masterauth");
9441 addReplyBulkCString(c
,server
.masterauth
);
9444 if (stringmatch(pattern
,"maxmemory",0)) {
9447 snprintf(buf
,128,"%llu\n",server
.maxmemory
);
9448 addReplyBulkCString(c
,"maxmemory");
9449 addReplyBulkCString(c
,buf
);
9453 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
9456 static void configCommand(redisClient
*c
) {
9457 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
9458 if (c
->argc
!= 4) goto badarity
;
9459 configSetCommand(c
);
9460 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
9461 if (c
->argc
!= 3) goto badarity
;
9462 configGetCommand(c
);
9463 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
9464 if (c
->argc
!= 2) goto badarity
;
9465 server
.stat_numcommands
= 0;
9466 server
.stat_numconnections
= 0;
9467 server
.stat_expiredkeys
= 0;
9468 server
.stat_starttime
= time(NULL
);
9469 addReply(c
,shared
.ok
);
9471 addReplySds(c
,sdscatprintf(sdsempty(),
9472 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
9477 addReplySds(c
,sdscatprintf(sdsempty(),
9478 "-ERR Wrong number of arguments for CONFIG %s\r\n",
9479 (char*) c
->argv
[1]->ptr
));
9482 /* =========================== Pubsub implementation ======================== */
9484 static void freePubsubPattern(void *p
) {
9485 pubsubPattern
*pat
= p
;
9487 decrRefCount(pat
->pattern
);
9491 static int listMatchPubsubPattern(void *a
, void *b
) {
9492 pubsubPattern
*pa
= a
, *pb
= b
;
9494 return (pa
->client
== pb
->client
) &&
9495 (compareStringObjects(pa
->pattern
,pb
->pattern
) == 0);
9498 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
9499 * 0 if the client was already subscribed to that channel. */
9500 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
9501 struct dictEntry
*de
;
9502 list
*clients
= NULL
;
9505 /* Add the channel to the client -> channels hash table */
9506 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
9508 incrRefCount(channel
);
9509 /* Add the client to the channel -> list of clients hash table */
9510 de
= dictFind(server
.pubsub_channels
,channel
);
9512 clients
= listCreate();
9513 dictAdd(server
.pubsub_channels
,channel
,clients
);
9514 incrRefCount(channel
);
9516 clients
= dictGetEntryVal(de
);
9518 listAddNodeTail(clients
,c
);
9520 /* Notify the client */
9521 addReply(c
,shared
.mbulk3
);
9522 addReply(c
,shared
.subscribebulk
);
9523 addReplyBulk(c
,channel
);
9524 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9528 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9529 * 0 if the client was not subscribed to the specified channel. */
9530 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
9531 struct dictEntry
*de
;
9536 /* Remove the channel from the client -> channels hash table */
9537 incrRefCount(channel
); /* channel may be just a pointer to the same object
9538 we have in the hash tables. Protect it... */
9539 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
9541 /* Remove the client from the channel -> clients list hash table */
9542 de
= dictFind(server
.pubsub_channels
,channel
);
9544 clients
= dictGetEntryVal(de
);
9545 ln
= listSearchKey(clients
,c
);
9547 listDelNode(clients
,ln
);
9548 if (listLength(clients
) == 0) {
9549 /* Free the list and associated hash entry at all if this was
9550 * the latest client, so that it will be possible to abuse
9551 * Redis PUBSUB creating millions of channels. */
9552 dictDelete(server
.pubsub_channels
,channel
);
9555 /* Notify the client */
9557 addReply(c
,shared
.mbulk3
);
9558 addReply(c
,shared
.unsubscribebulk
);
9559 addReplyBulk(c
,channel
);
9560 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9561 listLength(c
->pubsub_patterns
));
9564 decrRefCount(channel
); /* it is finally safe to release it */
9568 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
9569 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
9572 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
9575 listAddNodeTail(c
->pubsub_patterns
,pattern
);
9576 incrRefCount(pattern
);
9577 pat
= zmalloc(sizeof(*pat
));
9578 pat
->pattern
= getDecodedObject(pattern
);
9580 listAddNodeTail(server
.pubsub_patterns
,pat
);
9582 /* Notify the client */
9583 addReply(c
,shared
.mbulk3
);
9584 addReply(c
,shared
.psubscribebulk
);
9585 addReplyBulk(c
,pattern
);
9586 addReplyLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
9590 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
9591 * 0 if the client was not subscribed to the specified channel. */
9592 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
9597 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
9598 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
9600 listDelNode(c
->pubsub_patterns
,ln
);
9602 pat
.pattern
= pattern
;
9603 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
9604 listDelNode(server
.pubsub_patterns
,ln
);
9606 /* Notify the client */
9608 addReply(c
,shared
.mbulk3
);
9609 addReply(c
,shared
.punsubscribebulk
);
9610 addReplyBulk(c
,pattern
);
9611 addReplyLong(c
,dictSize(c
->pubsub_channels
)+
9612 listLength(c
->pubsub_patterns
));
9614 decrRefCount(pattern
);
9618 /* Unsubscribe from all the channels. Return the number of channels the
9619 * client was subscribed from. */
9620 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
9621 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
9625 while((de
= dictNext(di
)) != NULL
) {
9626 robj
*channel
= dictGetEntryKey(de
);
9628 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
9630 dictReleaseIterator(di
);
9634 /* Unsubscribe from all the patterns. Return the number of patterns the
9635 * client was subscribed from. */
9636 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
9641 listRewind(c
->pubsub_patterns
,&li
);
9642 while ((ln
= listNext(&li
)) != NULL
) {
9643 robj
*pattern
= ln
->value
;
9645 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
9650 /* Publish a message */
9651 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
9653 struct dictEntry
*de
;
9657 /* Send to clients listening for that channel */
9658 de
= dictFind(server
.pubsub_channels
,channel
);
9660 list
*list
= dictGetEntryVal(de
);
9664 listRewind(list
,&li
);
9665 while ((ln
= listNext(&li
)) != NULL
) {
9666 redisClient
*c
= ln
->value
;
9668 addReply(c
,shared
.mbulk3
);
9669 addReply(c
,shared
.messagebulk
);
9670 addReplyBulk(c
,channel
);
9671 addReplyBulk(c
,message
);
9675 /* Send to clients listening to matching channels */
9676 if (listLength(server
.pubsub_patterns
)) {
9677 listRewind(server
.pubsub_patterns
,&li
);
9678 channel
= getDecodedObject(channel
);
9679 while ((ln
= listNext(&li
)) != NULL
) {
9680 pubsubPattern
*pat
= ln
->value
;
9682 if (stringmatchlen((char*)pat
->pattern
->ptr
,
9683 sdslen(pat
->pattern
->ptr
),
9684 (char*)channel
->ptr
,
9685 sdslen(channel
->ptr
),0)) {
9686 addReply(pat
->client
,shared
.mbulk3
);
9687 addReply(pat
->client
,shared
.messagebulk
);
9688 addReplyBulk(pat
->client
,channel
);
9689 addReplyBulk(pat
->client
,message
);
9693 decrRefCount(channel
);
9698 static void subscribeCommand(redisClient
*c
) {
9701 for (j
= 1; j
< c
->argc
; j
++)
9702 pubsubSubscribeChannel(c
,c
->argv
[j
]);
9705 static void unsubscribeCommand(redisClient
*c
) {
9707 pubsubUnsubscribeAllChannels(c
,1);
9712 for (j
= 1; j
< c
->argc
; j
++)
9713 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
9717 static void psubscribeCommand(redisClient
*c
) {
9720 for (j
= 1; j
< c
->argc
; j
++)
9721 pubsubSubscribePattern(c
,c
->argv
[j
]);
9724 static void punsubscribeCommand(redisClient
*c
) {
9726 pubsubUnsubscribeAllPatterns(c
,1);
9731 for (j
= 1; j
< c
->argc
; j
++)
9732 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
9736 static void publishCommand(redisClient
*c
) {
9737 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
9738 addReplyLong(c
,receivers
);
9741 /* ================================= Debugging ============================== */
9743 static void debugCommand(redisClient
*c
) {
9744 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
9746 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
9747 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
9748 addReply(c
,shared
.err
);
9752 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
9753 addReply(c
,shared
.err
);
9756 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
9757 addReply(c
,shared
.ok
);
9758 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
9760 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
9761 addReply(c
,shared
.err
);
9764 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
9765 addReply(c
,shared
.ok
);
9766 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
9767 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9771 addReply(c
,shared
.nokeyerr
);
9774 key
= dictGetEntryKey(de
);
9775 val
= dictGetEntryVal(de
);
9776 if (!server
.vm_enabled
|| (key
->storage
== REDIS_VM_MEMORY
||
9777 key
->storage
== REDIS_VM_SWAPPING
)) {
9781 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
9782 strenc
= strencoding
[val
->encoding
];
9784 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
9787 addReplySds(c
,sdscatprintf(sdsempty(),
9788 "+Key at:%p refcount:%d, value at:%p refcount:%d "
9789 "encoding:%s serializedlength:%lld\r\n",
9790 (void*)key
, key
->refcount
, (void*)val
, val
->refcount
,
9791 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
9793 addReplySds(c
,sdscatprintf(sdsempty(),
9794 "+Key at:%p refcount:%d, value swapped at: page %llu "
9795 "using %llu pages\r\n",
9796 (void*)key
, key
->refcount
, (unsigned long long) key
->vm
.page
,
9797 (unsigned long long) key
->vm
.usedpages
));
9799 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
9800 lookupKeyRead(c
->db
,c
->argv
[2]);
9801 addReply(c
,shared
.ok
);
9802 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
9803 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]);
9806 if (!server
.vm_enabled
) {
9807 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
9811 addReply(c
,shared
.nokeyerr
);
9814 key
= dictGetEntryKey(de
);
9815 val
= dictGetEntryVal(de
);
9816 /* If the key is shared we want to create a copy */
9817 if (key
->refcount
> 1) {
9818 robj
*newkey
= dupStringObject(key
);
9820 key
= dictGetEntryKey(de
) = newkey
;
9823 if (key
->storage
!= REDIS_VM_MEMORY
) {
9824 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
9825 } else if (vmSwapObjectBlocking(key
,val
) == REDIS_OK
) {
9826 dictGetEntryVal(de
) = NULL
;
9827 addReply(c
,shared
.ok
);
9829 addReply(c
,shared
.err
);
9832 addReplySds(c
,sdsnew(
9833 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
9837 static void _redisAssert(char *estr
, char *file
, int line
) {
9838 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
9839 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true\n",file
,line
,estr
);
9840 #ifdef HAVE_BACKTRACE
9841 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
9846 /* =================================== Main! ================================ */
9849 int linuxOvercommitMemoryValue(void) {
9850 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
9854 if (fgets(buf
,64,fp
) == NULL
) {
9863 void linuxOvercommitMemoryWarning(void) {
9864 if (linuxOvercommitMemoryValue() == 0) {
9865 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
9868 #endif /* __linux__ */
9870 static void daemonize(void) {
9874 if (fork() != 0) exit(0); /* parent exits */
9875 setsid(); /* create a new session */
9877 /* Every output goes to /dev/null. If Redis is daemonized but
9878 * the 'logfile' is set to 'stdout' in the configuration file
9879 * it will not log at all. */
9880 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
9881 dup2(fd
, STDIN_FILENO
);
9882 dup2(fd
, STDOUT_FILENO
);
9883 dup2(fd
, STDERR_FILENO
);
9884 if (fd
> STDERR_FILENO
) close(fd
);
9886 /* Try to write the pid file */
9887 fp
= fopen(server
.pidfile
,"w");
9889 fprintf(fp
,"%d\n",getpid());
9894 static void version() {
9895 printf("Redis server version %s\n", REDIS_VERSION
);
9899 static void usage() {
9900 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
9901 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
9905 int main(int argc
, char **argv
) {
9910 if (strcmp(argv
[1], "-v") == 0 ||
9911 strcmp(argv
[1], "--version") == 0) version();
9912 if (strcmp(argv
[1], "--help") == 0) usage();
9913 resetServerSaveParams();
9914 loadServerConfig(argv
[1]);
9915 } else if ((argc
> 2)) {
9918 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
9920 if (server
.daemonize
) daemonize();
9922 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
9924 linuxOvercommitMemoryWarning();
9927 if (server
.appendonly
) {
9928 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
9929 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
9931 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
9932 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
9934 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
9935 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
9937 aeDeleteEventLoop(server
.el
);
9941 /* ============================= Backtrace support ========================= */
9943 #ifdef HAVE_BACKTRACE
9944 static char *findFuncName(void *pointer
, unsigned long *offset
);
9946 static void *getMcontextEip(ucontext_t
*uc
) {
9947 #if defined(__FreeBSD__)
9948 return (void*) uc
->uc_mcontext
.mc_eip
;
9949 #elif defined(__dietlibc__)
9950 return (void*) uc
->uc_mcontext
.eip
;
9951 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
9953 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9955 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9957 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
9958 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
9959 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
9961 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
9963 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
9964 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
9965 #elif defined(__ia64__) /* Linux IA64 */
9966 return (void*) uc
->uc_mcontext
.sc_ip
;
9972 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
9974 char **messages
= NULL
;
9975 int i
, trace_size
= 0;
9976 unsigned long offset
=0;
9977 ucontext_t
*uc
= (ucontext_t
*) secret
;
9979 REDIS_NOTUSED(info
);
9981 redisLog(REDIS_WARNING
,
9982 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
9983 infostring
= genRedisInfoString();
9984 redisLog(REDIS_WARNING
, "%s",infostring
);
9985 /* It's not safe to sdsfree() the returned string under memory
9986 * corruption conditions. Let it leak as we are going to abort */
9988 trace_size
= backtrace(trace
, 100);
9989 /* overwrite sigaction with caller's address */
9990 if (getMcontextEip(uc
) != NULL
) {
9991 trace
[1] = getMcontextEip(uc
);
9993 messages
= backtrace_symbols(trace
, trace_size
);
9995 for (i
=1; i
<trace_size
; ++i
) {
9996 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
9998 p
= strchr(messages
[i
],'+');
9999 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
10000 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
10002 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
10005 /* free(messages); Don't call free() with possibly corrupted memory. */
10009 static void setupSigSegvAction(void) {
10010 struct sigaction act
;
10012 sigemptyset (&act
.sa_mask
);
10013 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
10014 * is used. Otherwise, sa_handler is used */
10015 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
10016 act
.sa_sigaction
= segvHandler
;
10017 sigaction (SIGSEGV
, &act
, NULL
);
10018 sigaction (SIGBUS
, &act
, NULL
);
10019 sigaction (SIGFPE
, &act
, NULL
);
10020 sigaction (SIGILL
, &act
, NULL
);
10021 sigaction (SIGBUS
, &act
, NULL
);
10025 #include "staticsymbols.h"
10026 /* This function try to convert a pointer into a function name. It's used in
10027 * oreder to provide a backtrace under segmentation fault that's able to
10028 * display functions declared as static (otherwise the backtrace is useless). */
10029 static char *findFuncName(void *pointer
, unsigned long *offset
){
10031 unsigned long off
, minoff
= 0;
10033 /* Try to match against the Symbol with the smallest offset */
10034 for (i
=0; symsTable
[i
].pointer
; i
++) {
10035 unsigned long lp
= (unsigned long) pointer
;
10037 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
10038 off
=lp
-symsTable
[i
].pointer
;
10039 if (ret
< 0 || off
< minoff
) {
10045 if (ret
== -1) return NULL
;
10047 return symsTable
[ret
].name
;
10049 #else /* HAVE_BACKTRACE */
10050 static void setupSigSegvAction(void) {
10052 #endif /* HAVE_BACKTRACE */