2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
30 #define REDIS_VERSION "2.1.1"
45 #endif /* HAVE_BACKTRACE */
53 #include <arpa/inet.h>
57 #include <sys/resource.h>
65 #include "solarisfixes.h"
69 #include "ae.h" /* Event driven programming library */
70 #include "sds.h" /* Dynamic safe strings */
71 #include "anet.h" /* Networking the easy way */
72 #include "dict.h" /* Hash tables */
73 #include "adlist.h" /* Linked lists */
74 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
75 #include "lzf.h" /* LZF compression library */
76 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
77 #include "zipmap.h" /* Compact dictionary-alike data structure */
78 #include "ziplist.h" /* Compact list data structure */
79 #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
80 #include "release.h" /* Release and/or git repository information */
86 /* Static server configuration */
87 #define REDIS_SERVERPORT 6379 /* TCP port */
88 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
89 #define REDIS_IOBUF_LEN 1024
90 #define REDIS_LOADBUF_LEN 1024
91 #define REDIS_STATIC_ARGS 8
92 #define REDIS_DEFAULT_DBNUM 16
93 #define REDIS_CONFIGLINE_MAX 1024
94 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
95 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
96 #define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
97 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
98 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
100 /* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
101 #define REDIS_WRITEV_THRESHOLD 3
102 /* Max number of iovecs used for each writev call */
103 #define REDIS_WRITEV_IOVEC_COUNT 256
105 /* Hash table parameters */
106 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
109 #define REDIS_CMD_BULK 1 /* Bulk write command */
110 #define REDIS_CMD_INLINE 2 /* Inline command */
111 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
112 this flags will return an error when the 'maxmemory' option is set in the
113 config file and the server is using more than maxmemory bytes of memory.
114 In short this commands are denied on low memory conditions. */
115 #define REDIS_CMD_DENYOOM 4
116 #define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
119 #define REDIS_STRING 0
124 #define REDIS_VMPOINTER 8
126 /* Objects encoding. Some kind of objects like Strings and Hashes can be
127 * internally represented in multiple ways. The 'encoding' field of the object
128 * is set to one of this fields for this object. */
129 #define REDIS_ENCODING_RAW 0 /* Raw representation */
130 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
131 #define REDIS_ENCODING_HT 2 /* Encoded as hash table */
132 #define REDIS_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
133 #define REDIS_ENCODING_LIST 4 /* Encoded as zipmap */
134 #define REDIS_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
136 static char* strencoding
[] = {
137 "raw", "int", "hashtable", "zipmap", "list", "ziplist"
140 /* Object types only used for dumping to disk */
141 #define REDIS_EXPIRETIME 253
142 #define REDIS_SELECTDB 254
143 #define REDIS_EOF 255
145 /* Defines related to the dump file format. To store 32 bits lengths for short
146 * keys requires a lot of space, so we check the most significant 2 bits of
147 * the first byte to interpreter the length:
149 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
150 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
151 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
152 * 11|000000 this means: specially encoded object will follow. The six bits
153 * number specify the kind of object that follows.
154 * See the REDIS_RDB_ENC_* defines.
156 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
157 * values, will fit inside. */
158 #define REDIS_RDB_6BITLEN 0
159 #define REDIS_RDB_14BITLEN 1
160 #define REDIS_RDB_32BITLEN 2
161 #define REDIS_RDB_ENCVAL 3
162 #define REDIS_RDB_LENERR UINT_MAX
164 /* When a length of a string object stored on disk has the first two bits
165 * set, the remaining two bits specify a special encoding for the object
166 * accordingly to the following defines: */
167 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
168 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
169 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
170 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
172 /* Virtual memory object->where field. */
173 #define REDIS_VM_MEMORY 0 /* The object is on memory */
174 #define REDIS_VM_SWAPPED 1 /* The object is on disk */
175 #define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
176 #define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
178 /* Virtual memory static configuration stuff.
179 * Check vmFindContiguousPages() to know more about this magic numbers. */
180 #define REDIS_VM_MAX_NEAR_PAGES 65536
181 #define REDIS_VM_MAX_RANDOM_JUMP 4096
182 #define REDIS_VM_MAX_THREADS 32
183 #define REDIS_THREAD_STACK_SIZE (1024*1024*4)
184 /* The following is the *percentage* of completed I/O jobs to process when the
185 * handelr is called. While Virtual Memory I/O operations are performed by
186 * threads, this operations must be processed by the main thread when completed
187 * in order to take effect. */
188 #define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
191 #define REDIS_SLAVE 1 /* This client is a slave server */
192 #define REDIS_MASTER 2 /* This client is a master server */
193 #define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
194 #define REDIS_MULTI 8 /* This client is in a MULTI context */
195 #define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
196 #define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
197 #define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
199 /* Slave replication state - slave side */
200 #define REDIS_REPL_NONE 0 /* No active replication */
201 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
202 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
204 /* Slave replication state - from the point of view of master
205 * Note that in SEND_BULK and ONLINE state the slave receives new updates
206 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
207 * to start the next background saving in order to send updates to it. */
208 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
209 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
210 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
211 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
213 /* List related stuff */
217 /* Sort operations */
218 #define REDIS_SORT_GET 0
219 #define REDIS_SORT_ASC 1
220 #define REDIS_SORT_DESC 2
221 #define REDIS_SORTKEY_MAX 1024
224 #define REDIS_DEBUG 0
225 #define REDIS_VERBOSE 1
226 #define REDIS_NOTICE 2
227 #define REDIS_WARNING 3
229 /* Anti-warning macro... */
230 #define REDIS_NOTUSED(V) ((void) V)
232 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
233 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
235 /* Append only defines */
236 #define APPENDFSYNC_NO 0
237 #define APPENDFSYNC_ALWAYS 1
238 #define APPENDFSYNC_EVERYSEC 2
240 /* Hashes related defaults */
241 #define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
242 #define REDIS_HASH_MAX_ZIPMAP_VALUE 512
244 /* We can print the stacktrace, so our assert is defined this way: */
245 #define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
246 #define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
247 static void _redisAssert(char *estr
, char *file
, int line
);
248 static void _redisPanic(char *msg
, char *file
, int line
);
250 /*================================= Data types ============================== */
252 /* A redis object, that is a type able to hold a string / list / set */
254 /* The actual Redis Object */
255 typedef struct redisObject
{
257 unsigned storage
:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
259 unsigned lru
:22; /* lru time (relative to server.lruclock) */
262 /* VM fields, this are only allocated if VM is active, otherwise the
263 * object allocation function will just allocate
264 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
265 * Redis without VM active will not have any overhead. */
268 /* The VM pointer structure - identifies an object in the swap file.
270 * This object is stored in place of the value
271 * object in the main key->value hash table representing a database.
272 * Note that the first fields (type, storage) are the same as the redisObject
273 * structure so that vmPointer strucuters can be accessed even when casted
274 * as redisObject structures.
276 * This is useful as we don't know if a value object is or not on disk, but we
277 * are always able to read obj->storage to check this. For vmPointer
278 * structures "type" is set to REDIS_VMPOINTER (even if without this field
279 * is still possible to check the kind of object from the value of 'storage').*/
280 typedef struct vmPointer
{
282 unsigned storage
:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
284 unsigned int vtype
; /* type of the object stored in the swap file */
285 off_t page
; /* the page at witch the object is stored on disk */
286 off_t usedpages
; /* number of pages used on disk */
289 /* Macro used to initalize a Redis object allocated on the stack.
290 * Note that this macro is taken near the structure definition to make sure
291 * we'll update it when the structure is changed, to avoid bugs like
292 * bug #85 introduced exactly in this way. */
293 #define initStaticStringObject(_var,_ptr) do { \
295 _var.type = REDIS_STRING; \
296 _var.encoding = REDIS_ENCODING_RAW; \
298 _var.storage = REDIS_VM_MEMORY; \
301 typedef struct redisDb
{
302 dict
*dict
; /* The keyspace for this DB */
303 dict
*expires
; /* Timeout of keys with a timeout set */
304 dict
*blocking_keys
; /* Keys with clients waiting for data (BLPOP) */
305 dict
*io_keys
; /* Keys with clients waiting for VM I/O */
306 dict
*watched_keys
; /* WATCHED keys for MULTI/EXEC CAS */
310 /* Client MULTI/EXEC state */
311 typedef struct multiCmd
{
314 struct redisCommand
*cmd
;
317 typedef struct multiState
{
318 multiCmd
*commands
; /* Array of MULTI commands */
319 int count
; /* Total number of MULTI commands */
322 /* With multiplexing we need to take per-clinet state.
323 * Clients are taken in a liked list. */
324 typedef struct redisClient
{
329 robj
**argv
, **mbargv
;
331 int bulklen
; /* bulk read len. -1 if not in bulk read mode */
332 int multibulk
; /* multi bulk command format active */
335 time_t lastinteraction
; /* time of the last interaction, used for timeout */
336 int flags
; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
337 int slaveseldb
; /* slave selected db, if this client is a slave */
338 int authenticated
; /* when requirepass is non-NULL */
339 int replstate
; /* replication state if this is a slave */
340 int repldbfd
; /* replication DB file descriptor */
341 long repldboff
; /* replication DB file offset */
342 off_t repldbsize
; /* replication DB file size */
343 multiState mstate
; /* MULTI/EXEC state */
344 robj
**blocking_keys
; /* The key we are waiting to terminate a blocking
345 * operation such as BLPOP. Otherwise NULL. */
346 int blocking_keys_num
; /* Number of blocking keys */
347 time_t blockingto
; /* Blocking operation timeout. If UNIX current time
348 * is >= blockingto then the operation timed out. */
349 list
*io_keys
; /* Keys this client is waiting to be loaded from the
350 * swap file in order to continue. */
351 list
*watched_keys
; /* Keys WATCHED for MULTI/EXEC CAS */
352 dict
*pubsub_channels
; /* channels a client is interested in (SUBSCRIBE) */
353 list
*pubsub_patterns
; /* patterns a client is interested in (SUBSCRIBE) */
361 /* Global server state structure */
366 long long dirty
; /* changes to DB from the last save */
368 list
*slaves
, *monitors
;
369 char neterr
[ANET_ERR_LEN
];
371 int cronloops
; /* number of times the cron function run */
372 list
*objfreelist
; /* A list of freed objects to avoid malloc() */
373 time_t lastsave
; /* Unix time of last save succeeede */
374 /* Fields used only for stats */
375 time_t stat_starttime
; /* server start time */
376 long long stat_numcommands
; /* number of processed commands */
377 long long stat_numconnections
; /* number of connections received */
378 long long stat_expiredkeys
; /* number of expired keys */
387 int no_appendfsync_on_rewrite
;
393 pid_t bgsavechildpid
;
394 pid_t bgrewritechildpid
;
395 sds bgrewritebuf
; /* buffer taken by parent during oppend only rewrite */
396 sds aofbuf
; /* AOF buffer, written before entering the event loop */
397 struct saveparam
*saveparams
;
402 char *appendfilename
;
406 /* Replication related */
411 redisClient
*master
; /* client that is master for this slave */
413 unsigned int maxclients
;
414 unsigned long long maxmemory
;
415 unsigned int blpop_blocked_clients
;
416 unsigned int vm_blocked_clients
;
417 /* Sort parameters - qsort_r() is only available under BSD so we
418 * have to take this state global, in order to pass it to sortCompare() */
422 /* Virtual memory configuration */
427 unsigned long long vm_max_memory
;
429 size_t hash_max_zipmap_entries
;
430 size_t hash_max_zipmap_value
;
431 /* Virtual memory state */
434 off_t vm_next_page
; /* Next probably empty page */
435 off_t vm_near_pages
; /* Number of pages allocated sequentially */
436 unsigned char *vm_bitmap
; /* Bitmap of free/used pages */
437 time_t unixtime
; /* Unix time sampled every second. */
438 /* Virtual memory I/O threads stuff */
439 /* An I/O thread process an element taken from the io_jobs queue and
440 * put the result of the operation in the io_done list. While the
441 * job is being processed, it's put on io_processing queue. */
442 list
*io_newjobs
; /* List of VM I/O jobs yet to be processed */
443 list
*io_processing
; /* List of VM I/O jobs being processed */
444 list
*io_processed
; /* List of VM I/O jobs already processed */
445 list
*io_ready_clients
; /* Clients ready to be unblocked. All keys loaded */
446 pthread_mutex_t io_mutex
; /* lock to access io_jobs/io_done/io_thread_job */
447 pthread_mutex_t obj_freelist_mutex
; /* safe redis objects creation/free */
448 pthread_mutex_t io_swapfile_mutex
; /* So we can lseek + write */
449 pthread_attr_t io_threads_attr
; /* attributes for threads creation */
450 int io_active_threads
; /* Number of running I/O threads */
451 int vm_max_threads
; /* Max number of I/O threads running at the same time */
452 /* Our main thread is blocked on the event loop, locking for sockets ready
453 * to be read or written, so when a threaded I/O operation is ready to be
454 * processed by the main thread, the I/O thread will use a unix pipe to
455 * awake the main thread. The followings are the two pipe FDs. */
456 int io_ready_pipe_read
;
457 int io_ready_pipe_write
;
458 /* Virtual memory stats */
459 unsigned long long vm_stats_used_pages
;
460 unsigned long long vm_stats_swapped_objects
;
461 unsigned long long vm_stats_swapouts
;
462 unsigned long long vm_stats_swapins
;
464 dict
*pubsub_channels
; /* Map channels to list of subscribed clients */
465 list
*pubsub_patterns
; /* A list of pubsub_patterns */
468 unsigned lruclock
:22; /* clock incrementing every minute, for LRU */
469 unsigned lruclock_padding
:10;
472 typedef struct pubsubPattern
{
477 typedef void redisCommandProc(redisClient
*c
);
478 typedef void redisVmPreloadProc(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
479 struct redisCommand
{
481 redisCommandProc
*proc
;
484 /* Use a function to determine which keys need to be loaded
485 * in the background prior to executing this command. Takes precedence
486 * over vm_firstkey and others, ignored when NULL */
487 redisVmPreloadProc
*vm_preload_proc
;
488 /* What keys should be loaded in background when calling this command? */
489 int vm_firstkey
; /* The first argument that's a key (0 = no keys) */
490 int vm_lastkey
; /* THe last argument that's a key */
491 int vm_keystep
; /* The step between first and last key */
494 struct redisFunctionSym
{
496 unsigned long pointer
;
499 typedef struct _redisSortObject
{
507 typedef struct _redisSortOperation
{
510 } redisSortOperation
;
512 /* ZSETs use a specialized version of Skiplists */
514 typedef struct zskiplistNode
{
515 struct zskiplistNode
**forward
;
516 struct zskiplistNode
*backward
;
522 typedef struct zskiplist
{
523 struct zskiplistNode
*header
, *tail
;
524 unsigned long length
;
528 typedef struct zset
{
533 /* Our shared "common" objects */
535 #define REDIS_SHARED_INTEGERS 10000
536 struct sharedObjectsStruct
{
537 robj
*crlf
, *ok
, *err
, *emptybulk
, *czero
, *cone
, *pong
, *space
,
538 *colon
, *nullbulk
, *nullmultibulk
, *queued
,
539 *emptymultibulk
, *wrongtypeerr
, *nokeyerr
, *syntaxerr
, *sameobjecterr
,
540 *outofrangeerr
, *plus
,
541 *select0
, *select1
, *select2
, *select3
, *select4
,
542 *select5
, *select6
, *select7
, *select8
, *select9
,
543 *messagebulk
, *pmessagebulk
, *subscribebulk
, *unsubscribebulk
, *mbulk3
,
544 *mbulk4
, *psubscribebulk
, *punsubscribebulk
,
545 *integers
[REDIS_SHARED_INTEGERS
];
548 /* Global vars that are actally used as constants. The following double
549 * values are used for double on-disk serialization, and are initialized
550 * at runtime to avoid strange compiler optimizations. */
552 static double R_Zero
, R_PosInf
, R_NegInf
, R_Nan
;
554 /* VM threaded I/O request message */
555 #define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
556 #define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
557 #define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
558 typedef struct iojob
{
559 int type
; /* Request type, REDIS_IOJOB_* */
560 redisDb
*db
;/* Redis database */
561 robj
*key
; /* This I/O request is about swapping this key */
562 robj
*id
; /* Unique identifier of this job:
563 this is the object to swap for REDIS_IOREQ_*_SWAP, or the
564 vmpointer objct for REDIS_IOREQ_LOAD. */
565 robj
*val
; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
566 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
567 off_t page
; /* Swap page where to read/write the object */
568 off_t pages
; /* Swap pages needed to save object. PREPARE_SWAP return val */
569 int canceled
; /* True if this command was canceled by blocking side of VM */
570 pthread_t thread
; /* ID of the thread processing this entry */
573 /*================================ Prototypes =============================== */
575 static void freeStringObject(robj
*o
);
576 static void freeListObject(robj
*o
);
577 static void freeSetObject(robj
*o
);
578 static void decrRefCount(void *o
);
579 static robj
*createObject(int type
, void *ptr
);
580 static void freeClient(redisClient
*c
);
581 static int rdbLoad(char *filename
);
582 static void addReply(redisClient
*c
, robj
*obj
);
583 static void addReplySds(redisClient
*c
, sds s
);
584 static void incrRefCount(robj
*o
);
585 static int rdbSaveBackground(char *filename
);
586 static robj
*createStringObject(char *ptr
, size_t len
);
587 static robj
*dupStringObject(robj
*o
);
588 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
);
589 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
);
590 static void flushAppendOnlyFile(void);
591 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
);
592 static int syncWithMaster(void);
593 static robj
*tryObjectEncoding(robj
*o
);
594 static robj
*getDecodedObject(robj
*o
);
595 static int removeExpire(redisDb
*db
, robj
*key
);
596 static int expireIfNeeded(redisDb
*db
, robj
*key
);
597 static int deleteIfVolatile(redisDb
*db
, robj
*key
);
598 static int dbDelete(redisDb
*db
, robj
*key
);
599 static time_t getExpire(redisDb
*db
, robj
*key
);
600 static int setExpire(redisDb
*db
, robj
*key
, time_t when
);
601 static void updateSlavesWaitingBgsave(int bgsaveerr
);
602 static void freeMemoryIfNeeded(void);
603 static int processCommand(redisClient
*c
);
604 static void setupSigSegvAction(void);
605 static void rdbRemoveTempFile(pid_t childpid
);
606 static void aofRemoveTempFile(pid_t childpid
);
607 static size_t stringObjectLen(robj
*o
);
608 static void processInputBuffer(redisClient
*c
);
609 static zskiplist
*zslCreate(void);
610 static void zslFree(zskiplist
*zsl
);
611 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
);
612 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
613 static void initClientMultiState(redisClient
*c
);
614 static void freeClientMultiState(redisClient
*c
);
615 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
);
616 static void unblockClientWaitingData(redisClient
*c
);
617 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
);
618 static void vmInit(void);
619 static void vmMarkPagesFree(off_t page
, off_t count
);
620 static robj
*vmLoadObject(robj
*o
);
621 static robj
*vmPreviewObject(robj
*o
);
622 static int vmSwapOneObjectBlocking(void);
623 static int vmSwapOneObjectThreaded(void);
624 static int vmCanSwapOut(void);
625 static int tryFreeOneObjectFromFreelist(void);
626 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
627 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
628 static void vmCancelThreadedIOJob(robj
*o
);
629 static void lockThreadedIO(void);
630 static void unlockThreadedIO(void);
631 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
);
632 static void freeIOJob(iojob
*j
);
633 static void queueIOJob(iojob
*j
);
634 static int vmWriteObjectOnSwap(robj
*o
, off_t page
);
635 static robj
*vmReadObjectFromSwap(off_t page
, int type
);
636 static void waitEmptyIOJobsQueue(void);
637 static void vmReopenSwapFile(void);
638 static int vmFreePage(off_t page
);
639 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
640 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
);
641 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
);
642 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
);
643 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
);
644 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
);
645 static struct redisCommand
*lookupCommand(char *name
);
646 static void call(redisClient
*c
, struct redisCommand
*cmd
);
647 static void resetClient(redisClient
*c
);
648 static void convertToRealHash(robj
*o
);
649 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
);
650 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
);
651 static void freePubsubPattern(void *p
);
652 static int listMatchPubsubPattern(void *a
, void *b
);
653 static int compareStringObjects(robj
*a
, robj
*b
);
654 static int equalStringObjects(robj
*a
, robj
*b
);
656 static int rewriteAppendOnlyFileBackground(void);
657 static vmpointer
*vmSwapObjectBlocking(robj
*val
);
658 static int prepareForShutdown();
659 static void touchWatchedKey(redisDb
*db
, robj
*key
);
660 static void touchWatchedKeysOnFlush(int dbid
);
661 static void unwatchAllKeys(redisClient
*c
);
663 static void authCommand(redisClient
*c
);
664 static void pingCommand(redisClient
*c
);
665 static void echoCommand(redisClient
*c
);
666 static void setCommand(redisClient
*c
);
667 static void setnxCommand(redisClient
*c
);
668 static void setexCommand(redisClient
*c
);
669 static void getCommand(redisClient
*c
);
670 static void delCommand(redisClient
*c
);
671 static void existsCommand(redisClient
*c
);
672 static void incrCommand(redisClient
*c
);
673 static void decrCommand(redisClient
*c
);
674 static void incrbyCommand(redisClient
*c
);
675 static void decrbyCommand(redisClient
*c
);
676 static void selectCommand(redisClient
*c
);
677 static void randomkeyCommand(redisClient
*c
);
678 static void keysCommand(redisClient
*c
);
679 static void dbsizeCommand(redisClient
*c
);
680 static void lastsaveCommand(redisClient
*c
);
681 static void saveCommand(redisClient
*c
);
682 static void bgsaveCommand(redisClient
*c
);
683 static void bgrewriteaofCommand(redisClient
*c
);
684 static void shutdownCommand(redisClient
*c
);
685 static void moveCommand(redisClient
*c
);
686 static void renameCommand(redisClient
*c
);
687 static void renamenxCommand(redisClient
*c
);
688 static void lpushCommand(redisClient
*c
);
689 static void rpushCommand(redisClient
*c
);
690 static void lpopCommand(redisClient
*c
);
691 static void rpopCommand(redisClient
*c
);
692 static void llenCommand(redisClient
*c
);
693 static void lindexCommand(redisClient
*c
);
694 static void lrangeCommand(redisClient
*c
);
695 static void ltrimCommand(redisClient
*c
);
696 static void typeCommand(redisClient
*c
);
697 static void lsetCommand(redisClient
*c
);
698 static void saddCommand(redisClient
*c
);
699 static void sremCommand(redisClient
*c
);
700 static void smoveCommand(redisClient
*c
);
701 static void sismemberCommand(redisClient
*c
);
702 static void scardCommand(redisClient
*c
);
703 static void spopCommand(redisClient
*c
);
704 static void srandmemberCommand(redisClient
*c
);
705 static void sinterCommand(redisClient
*c
);
706 static void sinterstoreCommand(redisClient
*c
);
707 static void sunionCommand(redisClient
*c
);
708 static void sunionstoreCommand(redisClient
*c
);
709 static void sdiffCommand(redisClient
*c
);
710 static void sdiffstoreCommand(redisClient
*c
);
711 static void syncCommand(redisClient
*c
);
712 static void flushdbCommand(redisClient
*c
);
713 static void flushallCommand(redisClient
*c
);
714 static void sortCommand(redisClient
*c
);
715 static void lremCommand(redisClient
*c
);
716 static void rpoplpushcommand(redisClient
*c
);
717 static void infoCommand(redisClient
*c
);
718 static void mgetCommand(redisClient
*c
);
719 static void monitorCommand(redisClient
*c
);
720 static void expireCommand(redisClient
*c
);
721 static void expireatCommand(redisClient
*c
);
722 static void getsetCommand(redisClient
*c
);
723 static void ttlCommand(redisClient
*c
);
724 static void slaveofCommand(redisClient
*c
);
725 static void debugCommand(redisClient
*c
);
726 static void msetCommand(redisClient
*c
);
727 static void msetnxCommand(redisClient
*c
);
728 static void zaddCommand(redisClient
*c
);
729 static void zincrbyCommand(redisClient
*c
);
730 static void zrangeCommand(redisClient
*c
);
731 static void zrangebyscoreCommand(redisClient
*c
);
732 static void zcountCommand(redisClient
*c
);
733 static void zrevrangeCommand(redisClient
*c
);
734 static void zcardCommand(redisClient
*c
);
735 static void zremCommand(redisClient
*c
);
736 static void zscoreCommand(redisClient
*c
);
737 static void zremrangebyscoreCommand(redisClient
*c
);
738 static void multiCommand(redisClient
*c
);
739 static void execCommand(redisClient
*c
);
740 static void discardCommand(redisClient
*c
);
741 static void blpopCommand(redisClient
*c
);
742 static void brpopCommand(redisClient
*c
);
743 static void appendCommand(redisClient
*c
);
744 static void substrCommand(redisClient
*c
);
745 static void zrankCommand(redisClient
*c
);
746 static void zrevrankCommand(redisClient
*c
);
747 static void hsetCommand(redisClient
*c
);
748 static void hsetnxCommand(redisClient
*c
);
749 static void hgetCommand(redisClient
*c
);
750 static void hmsetCommand(redisClient
*c
);
751 static void hmgetCommand(redisClient
*c
);
752 static void hdelCommand(redisClient
*c
);
753 static void hlenCommand(redisClient
*c
);
754 static void zremrangebyrankCommand(redisClient
*c
);
755 static void zunionstoreCommand(redisClient
*c
);
756 static void zinterstoreCommand(redisClient
*c
);
757 static void hkeysCommand(redisClient
*c
);
758 static void hvalsCommand(redisClient
*c
);
759 static void hgetallCommand(redisClient
*c
);
760 static void hexistsCommand(redisClient
*c
);
761 static void configCommand(redisClient
*c
);
762 static void hincrbyCommand(redisClient
*c
);
763 static void subscribeCommand(redisClient
*c
);
764 static void unsubscribeCommand(redisClient
*c
);
765 static void psubscribeCommand(redisClient
*c
);
766 static void punsubscribeCommand(redisClient
*c
);
767 static void publishCommand(redisClient
*c
);
768 static void watchCommand(redisClient
*c
);
769 static void unwatchCommand(redisClient
*c
);
771 /*================================= Globals ================================= */
774 static struct redisServer server
; /* server global state */
775 static struct redisCommand
*commandTable
;
776 static struct redisCommand readonlyCommandTable
[] = {
777 {"get",getCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
778 {"set",setCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
779 {"setnx",setnxCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
780 {"setex",setexCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,0,0,0},
781 {"append",appendCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
782 {"substr",substrCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
783 {"del",delCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
784 {"exists",existsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
785 {"incr",incrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
786 {"decr",decrCommand
,2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
787 {"mget",mgetCommand
,-2,REDIS_CMD_INLINE
,NULL
,1,-1,1},
788 {"rpush",rpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
789 {"lpush",lpushCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
790 {"rpop",rpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
791 {"lpop",lpopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
792 {"brpop",brpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
793 {"blpop",blpopCommand
,-3,REDIS_CMD_INLINE
,NULL
,1,1,1},
794 {"llen",llenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
795 {"lindex",lindexCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
796 {"lset",lsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
797 {"lrange",lrangeCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
798 {"ltrim",ltrimCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
799 {"lrem",lremCommand
,4,REDIS_CMD_BULK
,NULL
,1,1,1},
800 {"rpoplpush",rpoplpushcommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,2,1},
801 {"sadd",saddCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
802 {"srem",sremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
803 {"smove",smoveCommand
,4,REDIS_CMD_BULK
,NULL
,1,2,1},
804 {"sismember",sismemberCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
805 {"scard",scardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
806 {"spop",spopCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
807 {"srandmember",srandmemberCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
808 {"sinter",sinterCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
809 {"sinterstore",sinterstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
810 {"sunion",sunionCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
811 {"sunionstore",sunionstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
812 {"sdiff",sdiffCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,-1,1},
813 {"sdiffstore",sdiffstoreCommand
,-3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,2,-1,1},
814 {"smembers",sinterCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
815 {"zadd",zaddCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
816 {"zincrby",zincrbyCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
817 {"zrem",zremCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
818 {"zremrangebyscore",zremrangebyscoreCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
819 {"zremrangebyrank",zremrangebyrankCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
820 {"zunionstore",zunionstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
821 {"zinterstore",zinterstoreCommand
,-4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,zunionInterBlockClientOnSwappedKeys
,0,0,0},
822 {"zrange",zrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
823 {"zrangebyscore",zrangebyscoreCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
824 {"zcount",zcountCommand
,4,REDIS_CMD_INLINE
,NULL
,1,1,1},
825 {"zrevrange",zrevrangeCommand
,-4,REDIS_CMD_INLINE
,NULL
,1,1,1},
826 {"zcard",zcardCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
827 {"zscore",zscoreCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
828 {"zrank",zrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
829 {"zrevrank",zrevrankCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
830 {"hset",hsetCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
831 {"hsetnx",hsetnxCommand
,4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
832 {"hget",hgetCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
833 {"hmset",hmsetCommand
,-4,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
834 {"hmget",hmgetCommand
,-3,REDIS_CMD_BULK
,NULL
,1,1,1},
835 {"hincrby",hincrbyCommand
,4,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
836 {"hdel",hdelCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
837 {"hlen",hlenCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
838 {"hkeys",hkeysCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
839 {"hvals",hvalsCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
840 {"hgetall",hgetallCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
841 {"hexists",hexistsCommand
,3,REDIS_CMD_BULK
,NULL
,1,1,1},
842 {"incrby",incrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
843 {"decrby",decrbyCommand
,3,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
844 {"getset",getsetCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
845 {"mset",msetCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
846 {"msetnx",msetnxCommand
,-3,REDIS_CMD_BULK
|REDIS_CMD_DENYOOM
,NULL
,1,-1,2},
847 {"randomkey",randomkeyCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
848 {"select",selectCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
849 {"move",moveCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
850 {"rename",renameCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
851 {"renamenx",renamenxCommand
,3,REDIS_CMD_INLINE
,NULL
,1,1,1},
852 {"expire",expireCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
853 {"expireat",expireatCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
854 {"keys",keysCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
855 {"dbsize",dbsizeCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
856 {"auth",authCommand
,2,REDIS_CMD_INLINE
,NULL
,0,0,0},
857 {"ping",pingCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
858 {"echo",echoCommand
,2,REDIS_CMD_BULK
,NULL
,0,0,0},
859 {"save",saveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
860 {"bgsave",bgsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
861 {"bgrewriteaof",bgrewriteaofCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
862 {"shutdown",shutdownCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
863 {"lastsave",lastsaveCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
864 {"type",typeCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
865 {"multi",multiCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
866 {"exec",execCommand
,1,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,execBlockClientOnSwappedKeys
,0,0,0},
867 {"discard",discardCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
868 {"sync",syncCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
869 {"flushdb",flushdbCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
870 {"flushall",flushallCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
871 {"sort",sortCommand
,-2,REDIS_CMD_INLINE
|REDIS_CMD_DENYOOM
,NULL
,1,1,1},
872 {"info",infoCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
873 {"monitor",monitorCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0},
874 {"ttl",ttlCommand
,2,REDIS_CMD_INLINE
,NULL
,1,1,1},
875 {"slaveof",slaveofCommand
,3,REDIS_CMD_INLINE
,NULL
,0,0,0},
876 {"debug",debugCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
877 {"config",configCommand
,-2,REDIS_CMD_BULK
,NULL
,0,0,0},
878 {"subscribe",subscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
879 {"unsubscribe",unsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
880 {"psubscribe",psubscribeCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
881 {"punsubscribe",punsubscribeCommand
,-1,REDIS_CMD_INLINE
,NULL
,0,0,0},
882 {"publish",publishCommand
,3,REDIS_CMD_BULK
|REDIS_CMD_FORCE_REPLICATION
,NULL
,0,0,0},
883 {"watch",watchCommand
,-2,REDIS_CMD_INLINE
,NULL
,0,0,0},
884 {"unwatch",unwatchCommand
,1,REDIS_CMD_INLINE
,NULL
,0,0,0}
887 /*============================ Utility functions ============================ */
889 /* Glob-style pattern matching. */
890 static int stringmatchlen(const char *pattern
, int patternLen
,
891 const char *string
, int stringLen
, int nocase
)
896 while (pattern
[1] == '*') {
901 return 1; /* match */
903 if (stringmatchlen(pattern
+1, patternLen
-1,
904 string
, stringLen
, nocase
))
905 return 1; /* match */
909 return 0; /* no match */
913 return 0; /* no match */
923 not = pattern
[0] == '^';
930 if (pattern
[0] == '\\') {
933 if (pattern
[0] == string
[0])
935 } else if (pattern
[0] == ']') {
937 } else if (patternLen
== 0) {
941 } else if (pattern
[1] == '-' && patternLen
>= 3) {
942 int start
= pattern
[0];
943 int end
= pattern
[2];
951 start
= tolower(start
);
957 if (c
>= start
&& c
<= end
)
961 if (pattern
[0] == string
[0])
964 if (tolower((int)pattern
[0]) == tolower((int)string
[0]))
974 return 0; /* no match */
980 if (patternLen
>= 2) {
987 if (pattern
[0] != string
[0])
988 return 0; /* no match */
990 if (tolower((int)pattern
[0]) != tolower((int)string
[0]))
991 return 0; /* no match */
999 if (stringLen
== 0) {
1000 while(*pattern
== '*') {
1007 if (patternLen
== 0 && stringLen
== 0)
1012 static int stringmatch(const char *pattern
, const char *string
, int nocase
) {
1013 return stringmatchlen(pattern
,strlen(pattern
),string
,strlen(string
),nocase
);
1016 /* Convert a string representing an amount of memory into the number of
1017 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1020 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1022 static long long memtoll(const char *p
, int *err
) {
1025 long mul
; /* unit multiplier */
1027 unsigned int digits
;
1030 /* Search the first non digit character. */
1033 while(*u
&& isdigit(*u
)) u
++;
1034 if (*u
== '\0' || !strcasecmp(u
,"b")) {
1036 } else if (!strcasecmp(u
,"k")) {
1038 } else if (!strcasecmp(u
,"kb")) {
1040 } else if (!strcasecmp(u
,"m")) {
1042 } else if (!strcasecmp(u
,"mb")) {
1044 } else if (!strcasecmp(u
,"g")) {
1045 mul
= 1000L*1000*1000;
1046 } else if (!strcasecmp(u
,"gb")) {
1047 mul
= 1024L*1024*1024;
1053 if (digits
>= sizeof(buf
)) {
1057 memcpy(buf
,p
,digits
);
1059 val
= strtoll(buf
,NULL
,10);
1063 /* Convert a long long into a string. Returns the number of
1064 * characters needed to represent the number, that can be shorter if passed
1065 * buffer length is not enough to store the whole number. */
1066 static int ll2string(char *s
, size_t len
, long long value
) {
1068 unsigned long long v
;
1071 if (len
== 0) return 0;
1072 v
= (value
< 0) ? -value
: value
;
1073 p
= buf
+31; /* point to the last character */
1078 if (value
< 0) *p
-- = '-';
1081 if (l
+1 > len
) l
= len
-1; /* Make sure it fits, including the nul term */
1087 static void redisLog(int level
, const char *fmt
, ...) {
1091 fp
= (server
.logfile
== NULL
) ? stdout
: fopen(server
.logfile
,"a");
1095 if (level
>= server
.verbosity
) {
1101 strftime(buf
,64,"%d %b %H:%M:%S",localtime(&now
));
1102 fprintf(fp
,"[%d] %s %c ",(int)getpid(),buf
,c
[level
]);
1103 vfprintf(fp
, fmt
, ap
);
1109 if (server
.logfile
) fclose(fp
);
1112 /*====================== Hash table type implementation ==================== */
1114 /* This is an hash table type that uses the SDS dynamic strings libary as
1115 * keys and radis objects as values (objects can hold SDS strings,
1118 static void dictVanillaFree(void *privdata
, void *val
)
1120 DICT_NOTUSED(privdata
);
1124 static void dictListDestructor(void *privdata
, void *val
)
1126 DICT_NOTUSED(privdata
);
1127 listRelease((list
*)val
);
1130 static int dictSdsKeyCompare(void *privdata
, const void *key1
,
1134 DICT_NOTUSED(privdata
);
1136 l1
= sdslen((sds
)key1
);
1137 l2
= sdslen((sds
)key2
);
1138 if (l1
!= l2
) return 0;
1139 return memcmp(key1
, key2
, l1
) == 0;
1142 static void dictRedisObjectDestructor(void *privdata
, void *val
)
1144 DICT_NOTUSED(privdata
);
1146 if (val
== NULL
) return; /* Values of swapped out keys as set to NULL */
1150 static void dictSdsDestructor(void *privdata
, void *val
)
1152 DICT_NOTUSED(privdata
);
1157 static int dictObjKeyCompare(void *privdata
, const void *key1
,
1160 const robj
*o1
= key1
, *o2
= key2
;
1161 return dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1164 static unsigned int dictObjHash(const void *key
) {
1165 const robj
*o
= key
;
1166 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1169 static unsigned int dictSdsHash(const void *key
) {
1170 return dictGenHashFunction((unsigned char*)key
, sdslen((char*)key
));
1173 static int dictEncObjKeyCompare(void *privdata
, const void *key1
,
1176 robj
*o1
= (robj
*) key1
, *o2
= (robj
*) key2
;
1179 if (o1
->encoding
== REDIS_ENCODING_INT
&&
1180 o2
->encoding
== REDIS_ENCODING_INT
)
1181 return o1
->ptr
== o2
->ptr
;
1183 o1
= getDecodedObject(o1
);
1184 o2
= getDecodedObject(o2
);
1185 cmp
= dictSdsKeyCompare(privdata
,o1
->ptr
,o2
->ptr
);
1191 static unsigned int dictEncObjHash(const void *key
) {
1192 robj
*o
= (robj
*) key
;
1194 if (o
->encoding
== REDIS_ENCODING_RAW
) {
1195 return dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1197 if (o
->encoding
== REDIS_ENCODING_INT
) {
1201 len
= ll2string(buf
,32,(long)o
->ptr
);
1202 return dictGenHashFunction((unsigned char*)buf
, len
);
1206 o
= getDecodedObject(o
);
1207 hash
= dictGenHashFunction(o
->ptr
, sdslen((sds
)o
->ptr
));
1215 static dictType setDictType
= {
1216 dictEncObjHash
, /* hash function */
1219 dictEncObjKeyCompare
, /* key compare */
1220 dictRedisObjectDestructor
, /* key destructor */
1221 NULL
/* val destructor */
1224 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1225 static dictType zsetDictType
= {
1226 dictEncObjHash
, /* hash function */
1229 dictEncObjKeyCompare
, /* key compare */
1230 dictRedisObjectDestructor
, /* key destructor */
1231 dictVanillaFree
/* val destructor of malloc(sizeof(double)) */
1234 /* Db->dict, keys are sds strings, vals are Redis objects. */
1235 static dictType dbDictType
= {
1236 dictSdsHash
, /* hash function */
1239 dictSdsKeyCompare
, /* key compare */
1240 dictSdsDestructor
, /* key destructor */
1241 dictRedisObjectDestructor
/* val destructor */
1245 static dictType keyptrDictType
= {
1246 dictSdsHash
, /* hash function */
1249 dictSdsKeyCompare
, /* key compare */
1250 dictSdsDestructor
, /* key destructor */
1251 NULL
/* val destructor */
1254 /* Hash type hash table (note that small hashes are represented with zimpaps) */
1255 static dictType hashDictType
= {
1256 dictEncObjHash
, /* hash function */
1259 dictEncObjKeyCompare
, /* key compare */
1260 dictRedisObjectDestructor
, /* key destructor */
1261 dictRedisObjectDestructor
/* val destructor */
1264 /* Keylist hash table type has unencoded redis objects as keys and
1265 * lists as values. It's used for blocking operations (BLPOP) and to
1266 * map swapped keys to a list of clients waiting for this keys to be loaded. */
1267 static dictType keylistDictType
= {
1268 dictObjHash
, /* hash function */
1271 dictObjKeyCompare
, /* key compare */
1272 dictRedisObjectDestructor
, /* key destructor */
1273 dictListDestructor
/* val destructor */
1276 static void version();
1278 /* ========================= Random utility functions ======================= */
1280 /* Redis generally does not try to recover from out of memory conditions
1281 * when allocating objects or strings, it is not clear if it will be possible
1282 * to report this condition to the client since the networking layer itself
1283 * is based on heap allocation for send buffers, so we simply abort.
1284 * At least the code will be simpler to read... */
1285 static void oom(const char *msg
) {
1286 redisLog(REDIS_WARNING
, "%s: Out of memory\n",msg
);
1291 /* ====================== Redis server networking stuff ===================== */
1292 static void closeTimedoutClients(void) {
1295 time_t now
= time(NULL
);
1298 listRewind(server
.clients
,&li
);
1299 while ((ln
= listNext(&li
)) != NULL
) {
1300 c
= listNodeValue(ln
);
1301 if (server
.maxidletime
&&
1302 !(c
->flags
& REDIS_SLAVE
) && /* no timeout for slaves */
1303 !(c
->flags
& REDIS_MASTER
) && /* no timeout for masters */
1304 dictSize(c
->pubsub_channels
) == 0 && /* no timeout for pubsub */
1305 listLength(c
->pubsub_patterns
) == 0 &&
1306 (now
- c
->lastinteraction
> server
.maxidletime
))
1308 redisLog(REDIS_VERBOSE
,"Closing idle client");
1310 } else if (c
->flags
& REDIS_BLOCKED
) {
1311 if (c
->blockingto
!= 0 && c
->blockingto
< now
) {
1312 addReply(c
,shared
.nullmultibulk
);
1313 unblockClientWaitingData(c
);
1319 static int htNeedsResize(dict
*dict
) {
1320 long long size
, used
;
1322 size
= dictSlots(dict
);
1323 used
= dictSize(dict
);
1324 return (size
&& used
&& size
> DICT_HT_INITIAL_SIZE
&&
1325 (used
*100/size
< REDIS_HT_MINFILL
));
1328 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1329 * we resize the hash table to save memory */
1330 static void tryResizeHashTables(void) {
1333 for (j
= 0; j
< server
.dbnum
; j
++) {
1334 if (htNeedsResize(server
.db
[j
].dict
))
1335 dictResize(server
.db
[j
].dict
);
1336 if (htNeedsResize(server
.db
[j
].expires
))
1337 dictResize(server
.db
[j
].expires
);
1341 /* Our hash table implementation performs rehashing incrementally while
1342 * we write/read from the hash table. Still if the server is idle, the hash
1343 * table will use two tables for a long time. So we try to use 1 millisecond
1344 * of CPU time at every serverCron() loop in order to rehash some key. */
1345 static void incrementallyRehash(void) {
1348 for (j
= 0; j
< server
.dbnum
; j
++) {
1349 if (dictIsRehashing(server
.db
[j
].dict
)) {
1350 dictRehashMilliseconds(server
.db
[j
].dict
,1);
1351 break; /* already used our millisecond for this loop... */
1356 /* A background saving child (BGSAVE) terminated its work. Handle this. */
1357 void backgroundSaveDoneHandler(int statloc
) {
1358 int exitcode
= WEXITSTATUS(statloc
);
1359 int bysignal
= WIFSIGNALED(statloc
);
1361 if (!bysignal
&& exitcode
== 0) {
1362 redisLog(REDIS_NOTICE
,
1363 "Background saving terminated with success");
1365 server
.lastsave
= time(NULL
);
1366 } else if (!bysignal
&& exitcode
!= 0) {
1367 redisLog(REDIS_WARNING
, "Background saving error");
1369 redisLog(REDIS_WARNING
,
1370 "Background saving terminated by signal %d", WTERMSIG(statloc
));
1371 rdbRemoveTempFile(server
.bgsavechildpid
);
1373 server
.bgsavechildpid
= -1;
1374 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1375 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1376 updateSlavesWaitingBgsave(exitcode
== 0 ? REDIS_OK
: REDIS_ERR
);
1379 /* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1381 void backgroundRewriteDoneHandler(int statloc
) {
1382 int exitcode
= WEXITSTATUS(statloc
);
1383 int bysignal
= WIFSIGNALED(statloc
);
1385 if (!bysignal
&& exitcode
== 0) {
1389 redisLog(REDIS_NOTICE
,
1390 "Background append only file rewriting terminated with success");
1391 /* Now it's time to flush the differences accumulated by the parent */
1392 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) server
.bgrewritechildpid
);
1393 fd
= open(tmpfile
,O_WRONLY
|O_APPEND
);
1395 redisLog(REDIS_WARNING
, "Not able to open the temp append only file produced by the child: %s", strerror(errno
));
1398 /* Flush our data... */
1399 if (write(fd
,server
.bgrewritebuf
,sdslen(server
.bgrewritebuf
)) !=
1400 (signed) sdslen(server
.bgrewritebuf
)) {
1401 redisLog(REDIS_WARNING
, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno
));
1405 redisLog(REDIS_NOTICE
,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server
.bgrewritebuf
));
1406 /* Now our work is to rename the temp file into the stable file. And
1407 * switch the file descriptor used by the server for append only. */
1408 if (rename(tmpfile
,server
.appendfilename
) == -1) {
1409 redisLog(REDIS_WARNING
,"Can't rename the temp append only file into the stable one: %s", strerror(errno
));
1413 /* Mission completed... almost */
1414 redisLog(REDIS_NOTICE
,"Append only file successfully rewritten.");
1415 if (server
.appendfd
!= -1) {
1416 /* If append only is actually enabled... */
1417 close(server
.appendfd
);
1418 server
.appendfd
= fd
;
1419 if (server
.appendfsync
!= APPENDFSYNC_NO
) aof_fsync(fd
);
1420 server
.appendseldb
= -1; /* Make sure it will issue SELECT */
1421 redisLog(REDIS_NOTICE
,"The new append only file was selected for future appends.");
1423 /* If append only is disabled we just generate a dump in this
1424 * format. Why not? */
1427 } else if (!bysignal
&& exitcode
!= 0) {
1428 redisLog(REDIS_WARNING
, "Background append only file rewriting error");
1430 redisLog(REDIS_WARNING
,
1431 "Background append only file rewriting terminated by signal %d",
1435 sdsfree(server
.bgrewritebuf
);
1436 server
.bgrewritebuf
= sdsempty();
1437 aofRemoveTempFile(server
.bgrewritechildpid
);
1438 server
.bgrewritechildpid
= -1;
1441 /* This function is called once a background process of some kind terminates,
1442 * as we want to avoid resizing the hash tables when there is a child in order
1443 * to play well with copy-on-write (otherwise when a resize happens lots of
1444 * memory pages are copied). The goal of this function is to update the ability
1445 * for dict.c to resize the hash tables accordingly to the fact we have o not
1446 * running childs. */
1447 static void updateDictResizePolicy(void) {
1448 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1)
1451 dictDisableResize();
1454 static int serverCron(struct aeEventLoop
*eventLoop
, long long id
, void *clientData
) {
1455 int j
, loops
= server
.cronloops
++;
1456 REDIS_NOTUSED(eventLoop
);
1458 REDIS_NOTUSED(clientData
);
1460 /* We take a cached value of the unix time in the global state because
1461 * with virtual memory and aging there is to store the current time
1462 * in objects at every object access, and accuracy is not needed.
1463 * To access a global var is faster than calling time(NULL) */
1464 server
.unixtime
= time(NULL
);
1465 /* We have just 21 bits per object for LRU information.
1466 * So we use an (eventually wrapping) LRU clock with minutes resolution.
1468 * When we need to select what object to swap, we compute the minimum
1469 * time distance between the current lruclock and the object last access
1470 * lruclock info. Even if clocks will wrap on overflow, there is
1471 * the interesting property that we are sure that at least
1472 * ABS(A-B) minutes passed between current time and timestamp B.
1474 * This is not precise but we don't need at all precision, but just
1475 * something statistically reasonable.
1477 server
.lruclock
= (time(NULL
)/60)&((1<<21)-1);
1479 /* We received a SIGTERM, shutting down here in a safe way, as it is
1480 * not ok doing so inside the signal handler. */
1481 if (server
.shutdown_asap
) {
1482 if (prepareForShutdown() == REDIS_OK
) exit(0);
1483 redisLog(REDIS_WARNING
,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1486 /* Show some info about non-empty databases */
1487 for (j
= 0; j
< server
.dbnum
; j
++) {
1488 long long size
, used
, vkeys
;
1490 size
= dictSlots(server
.db
[j
].dict
);
1491 used
= dictSize(server
.db
[j
].dict
);
1492 vkeys
= dictSize(server
.db
[j
].expires
);
1493 if (!(loops
% 50) && (used
|| vkeys
)) {
1494 redisLog(REDIS_VERBOSE
,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j
,used
,vkeys
,size
);
1495 /* dictPrintStats(server.dict); */
1499 /* We don't want to resize the hash tables while a bacground saving
1500 * is in progress: the saving child is created using fork() that is
1501 * implemented with a copy-on-write semantic in most modern systems, so
1502 * if we resize the HT while there is the saving child at work actually
1503 * a lot of memory movements in the parent will cause a lot of pages
1505 if (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1) {
1506 if (!(loops
% 10)) tryResizeHashTables();
1507 if (server
.activerehashing
) incrementallyRehash();
1510 /* Show information about connected clients */
1511 if (!(loops
% 50)) {
1512 redisLog(REDIS_VERBOSE
,"%d clients connected (%d slaves), %zu bytes in use",
1513 listLength(server
.clients
)-listLength(server
.slaves
),
1514 listLength(server
.slaves
),
1515 zmalloc_used_memory());
1518 /* Close connections of timedout clients */
1519 if ((server
.maxidletime
&& !(loops
% 100)) || server
.blpop_blocked_clients
)
1520 closeTimedoutClients();
1522 /* Check if a background saving or AOF rewrite in progress terminated */
1523 if (server
.bgsavechildpid
!= -1 || server
.bgrewritechildpid
!= -1) {
1527 if ((pid
= wait3(&statloc
,WNOHANG
,NULL
)) != 0) {
1528 if (pid
== server
.bgsavechildpid
) {
1529 backgroundSaveDoneHandler(statloc
);
1531 backgroundRewriteDoneHandler(statloc
);
1533 updateDictResizePolicy();
1536 /* If there is not a background saving in progress check if
1537 * we have to save now */
1538 time_t now
= time(NULL
);
1539 for (j
= 0; j
< server
.saveparamslen
; j
++) {
1540 struct saveparam
*sp
= server
.saveparams
+j
;
1542 if (server
.dirty
>= sp
->changes
&&
1543 now
-server
.lastsave
> sp
->seconds
) {
1544 redisLog(REDIS_NOTICE
,"%d changes in %d seconds. Saving...",
1545 sp
->changes
, sp
->seconds
);
1546 rdbSaveBackground(server
.dbfilename
);
1552 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1553 * will use few CPU cycles if there are few expiring keys, otherwise
1554 * it will get more aggressive to avoid that too much memory is used by
1555 * keys that can be removed from the keyspace. */
1556 for (j
= 0; j
< server
.dbnum
; j
++) {
1558 redisDb
*db
= server
.db
+j
;
1560 /* Continue to expire if at the end of the cycle more than 25%
1561 * of the keys were expired. */
1563 long num
= dictSize(db
->expires
);
1564 time_t now
= time(NULL
);
1567 if (num
> REDIS_EXPIRELOOKUPS_PER_CRON
)
1568 num
= REDIS_EXPIRELOOKUPS_PER_CRON
;
1573 if ((de
= dictGetRandomKey(db
->expires
)) == NULL
) break;
1574 t
= (time_t) dictGetEntryVal(de
);
1576 sds key
= dictGetEntryKey(de
);
1577 robj
*keyobj
= createStringObject(key
,sdslen(key
));
1579 dbDelete(db
,keyobj
);
1580 decrRefCount(keyobj
);
1582 server
.stat_expiredkeys
++;
1585 } while (expired
> REDIS_EXPIRELOOKUPS_PER_CRON
/4);
1588 /* Swap a few keys on disk if we are over the memory limit and VM
1589 * is enbled. Try to free objects from the free list first. */
1590 if (vmCanSwapOut()) {
1591 while (server
.vm_enabled
&& zmalloc_used_memory() >
1592 server
.vm_max_memory
)
1596 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
1597 retval
= (server
.vm_max_threads
== 0) ?
1598 vmSwapOneObjectBlocking() :
1599 vmSwapOneObjectThreaded();
1600 if (retval
== REDIS_ERR
&& !(loops
% 300) &&
1601 zmalloc_used_memory() >
1602 (server
.vm_max_memory
+server
.vm_max_memory
/10))
1604 redisLog(REDIS_WARNING
,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1606 /* Note that when using threade I/O we free just one object,
1607 * because anyway when the I/O thread in charge to swap this
1608 * object out will finish, the handler of completed jobs
1609 * will try to swap more objects if we are still out of memory. */
1610 if (retval
== REDIS_ERR
|| server
.vm_max_threads
> 0) break;
1614 /* Check if we should connect to a MASTER */
1615 if (server
.replstate
== REDIS_REPL_CONNECT
&& !(loops
% 10)) {
1616 redisLog(REDIS_NOTICE
,"Connecting to MASTER...");
1617 if (syncWithMaster() == REDIS_OK
) {
1618 redisLog(REDIS_NOTICE
,"MASTER <-> SLAVE sync succeeded");
1619 if (server
.appendonly
) rewriteAppendOnlyFileBackground();
1625 /* This function gets called every time Redis is entering the
1626 * main loop of the event driven library, that is, before to sleep
1627 * for ready file descriptors. */
1628 static void beforeSleep(struct aeEventLoop
*eventLoop
) {
1629 REDIS_NOTUSED(eventLoop
);
1631 /* Awake clients that got all the swapped keys they requested */
1632 if (server
.vm_enabled
&& listLength(server
.io_ready_clients
)) {
1636 listRewind(server
.io_ready_clients
,&li
);
1637 while((ln
= listNext(&li
))) {
1638 redisClient
*c
= ln
->value
;
1639 struct redisCommand
*cmd
;
1641 /* Resume the client. */
1642 listDelNode(server
.io_ready_clients
,ln
);
1643 c
->flags
&= (~REDIS_IO_WAIT
);
1644 server
.vm_blocked_clients
--;
1645 aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
1646 readQueryFromClient
, c
);
1647 cmd
= lookupCommand(c
->argv
[0]->ptr
);
1648 assert(cmd
!= NULL
);
1651 /* There may be more data to process in the input buffer. */
1652 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0)
1653 processInputBuffer(c
);
1656 /* Write the AOF buffer on disk */
1657 flushAppendOnlyFile();
1660 static void createSharedObjects(void) {
1663 shared
.crlf
= createObject(REDIS_STRING
,sdsnew("\r\n"));
1664 shared
.ok
= createObject(REDIS_STRING
,sdsnew("+OK\r\n"));
1665 shared
.err
= createObject(REDIS_STRING
,sdsnew("-ERR\r\n"));
1666 shared
.emptybulk
= createObject(REDIS_STRING
,sdsnew("$0\r\n\r\n"));
1667 shared
.czero
= createObject(REDIS_STRING
,sdsnew(":0\r\n"));
1668 shared
.cone
= createObject(REDIS_STRING
,sdsnew(":1\r\n"));
1669 shared
.nullbulk
= createObject(REDIS_STRING
,sdsnew("$-1\r\n"));
1670 shared
.nullmultibulk
= createObject(REDIS_STRING
,sdsnew("*-1\r\n"));
1671 shared
.emptymultibulk
= createObject(REDIS_STRING
,sdsnew("*0\r\n"));
1672 shared
.pong
= createObject(REDIS_STRING
,sdsnew("+PONG\r\n"));
1673 shared
.queued
= createObject(REDIS_STRING
,sdsnew("+QUEUED\r\n"));
1674 shared
.wrongtypeerr
= createObject(REDIS_STRING
,sdsnew(
1675 "-ERR Operation against a key holding the wrong kind of value\r\n"));
1676 shared
.nokeyerr
= createObject(REDIS_STRING
,sdsnew(
1677 "-ERR no such key\r\n"));
1678 shared
.syntaxerr
= createObject(REDIS_STRING
,sdsnew(
1679 "-ERR syntax error\r\n"));
1680 shared
.sameobjecterr
= createObject(REDIS_STRING
,sdsnew(
1681 "-ERR source and destination objects are the same\r\n"));
1682 shared
.outofrangeerr
= createObject(REDIS_STRING
,sdsnew(
1683 "-ERR index out of range\r\n"));
1684 shared
.space
= createObject(REDIS_STRING
,sdsnew(" "));
1685 shared
.colon
= createObject(REDIS_STRING
,sdsnew(":"));
1686 shared
.plus
= createObject(REDIS_STRING
,sdsnew("+"));
1687 shared
.select0
= createStringObject("select 0\r\n",10);
1688 shared
.select1
= createStringObject("select 1\r\n",10);
1689 shared
.select2
= createStringObject("select 2\r\n",10);
1690 shared
.select3
= createStringObject("select 3\r\n",10);
1691 shared
.select4
= createStringObject("select 4\r\n",10);
1692 shared
.select5
= createStringObject("select 5\r\n",10);
1693 shared
.select6
= createStringObject("select 6\r\n",10);
1694 shared
.select7
= createStringObject("select 7\r\n",10);
1695 shared
.select8
= createStringObject("select 8\r\n",10);
1696 shared
.select9
= createStringObject("select 9\r\n",10);
1697 shared
.messagebulk
= createStringObject("$7\r\nmessage\r\n",13);
1698 shared
.pmessagebulk
= createStringObject("$8\r\npmessage\r\n",14);
1699 shared
.subscribebulk
= createStringObject("$9\r\nsubscribe\r\n",15);
1700 shared
.unsubscribebulk
= createStringObject("$11\r\nunsubscribe\r\n",18);
1701 shared
.psubscribebulk
= createStringObject("$10\r\npsubscribe\r\n",17);
1702 shared
.punsubscribebulk
= createStringObject("$12\r\npunsubscribe\r\n",19);
1703 shared
.mbulk3
= createStringObject("*3\r\n",4);
1704 shared
.mbulk4
= createStringObject("*4\r\n",4);
1705 for (j
= 0; j
< REDIS_SHARED_INTEGERS
; j
++) {
1706 shared
.integers
[j
] = createObject(REDIS_STRING
,(void*)(long)j
);
1707 shared
.integers
[j
]->encoding
= REDIS_ENCODING_INT
;
1711 static void appendServerSaveParams(time_t seconds
, int changes
) {
1712 server
.saveparams
= zrealloc(server
.saveparams
,sizeof(struct saveparam
)*(server
.saveparamslen
+1));
1713 server
.saveparams
[server
.saveparamslen
].seconds
= seconds
;
1714 server
.saveparams
[server
.saveparamslen
].changes
= changes
;
1715 server
.saveparamslen
++;
1718 static void resetServerSaveParams() {
1719 zfree(server
.saveparams
);
1720 server
.saveparams
= NULL
;
1721 server
.saveparamslen
= 0;
1724 static void initServerConfig() {
1725 server
.dbnum
= REDIS_DEFAULT_DBNUM
;
1726 server
.port
= REDIS_SERVERPORT
;
1727 server
.verbosity
= REDIS_VERBOSE
;
1728 server
.maxidletime
= REDIS_MAXIDLETIME
;
1729 server
.saveparams
= NULL
;
1730 server
.logfile
= NULL
; /* NULL = log on standard output */
1731 server
.bindaddr
= NULL
;
1732 server
.glueoutputbuf
= 1;
1733 server
.daemonize
= 0;
1734 server
.appendonly
= 0;
1735 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
1736 server
.no_appendfsync_on_rewrite
= 0;
1737 server
.lastfsync
= time(NULL
);
1738 server
.appendfd
= -1;
1739 server
.appendseldb
= -1; /* Make sure the first time will not match */
1740 server
.pidfile
= zstrdup("/var/run/redis.pid");
1741 server
.dbfilename
= zstrdup("dump.rdb");
1742 server
.appendfilename
= zstrdup("appendonly.aof");
1743 server
.requirepass
= NULL
;
1744 server
.rdbcompression
= 1;
1745 server
.activerehashing
= 1;
1746 server
.maxclients
= 0;
1747 server
.blpop_blocked_clients
= 0;
1748 server
.maxmemory
= 0;
1749 server
.vm_enabled
= 0;
1750 server
.vm_swap_file
= zstrdup("/tmp/redis-%p.vm");
1751 server
.vm_page_size
= 256; /* 256 bytes per page */
1752 server
.vm_pages
= 1024*1024*100; /* 104 millions of pages */
1753 server
.vm_max_memory
= 1024LL*1024*1024*1; /* 1 GB of RAM */
1754 server
.vm_max_threads
= 4;
1755 server
.vm_blocked_clients
= 0;
1756 server
.hash_max_zipmap_entries
= REDIS_HASH_MAX_ZIPMAP_ENTRIES
;
1757 server
.hash_max_zipmap_value
= REDIS_HASH_MAX_ZIPMAP_VALUE
;
1758 server
.shutdown_asap
= 0;
1760 resetServerSaveParams();
1762 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1763 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1764 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1765 /* Replication related */
1767 server
.masterauth
= NULL
;
1768 server
.masterhost
= NULL
;
1769 server
.masterport
= 6379;
1770 server
.master
= NULL
;
1771 server
.replstate
= REDIS_REPL_NONE
;
1773 /* Double constants initialization */
1775 R_PosInf
= 1.0/R_Zero
;
1776 R_NegInf
= -1.0/R_Zero
;
1777 R_Nan
= R_Zero
/R_Zero
;
1780 static void initServer() {
1783 signal(SIGHUP
, SIG_IGN
);
1784 signal(SIGPIPE
, SIG_IGN
);
1785 setupSigSegvAction();
1787 server
.devnull
= fopen("/dev/null","w");
1788 if (server
.devnull
== NULL
) {
1789 redisLog(REDIS_WARNING
, "Can't open /dev/null: %s", server
.neterr
);
1792 server
.clients
= listCreate();
1793 server
.slaves
= listCreate();
1794 server
.monitors
= listCreate();
1795 server
.objfreelist
= listCreate();
1796 createSharedObjects();
1797 server
.el
= aeCreateEventLoop();
1798 server
.db
= zmalloc(sizeof(redisDb
)*server
.dbnum
);
1799 server
.fd
= anetTcpServer(server
.neterr
, server
.port
, server
.bindaddr
);
1800 if (server
.fd
== -1) {
1801 redisLog(REDIS_WARNING
, "Opening TCP port: %s", server
.neterr
);
1804 for (j
= 0; j
< server
.dbnum
; j
++) {
1805 server
.db
[j
].dict
= dictCreate(&dbDictType
,NULL
);
1806 server
.db
[j
].expires
= dictCreate(&keyptrDictType
,NULL
);
1807 server
.db
[j
].blocking_keys
= dictCreate(&keylistDictType
,NULL
);
1808 server
.db
[j
].watched_keys
= dictCreate(&keylistDictType
,NULL
);
1809 if (server
.vm_enabled
)
1810 server
.db
[j
].io_keys
= dictCreate(&keylistDictType
,NULL
);
1811 server
.db
[j
].id
= j
;
1813 server
.pubsub_channels
= dictCreate(&keylistDictType
,NULL
);
1814 server
.pubsub_patterns
= listCreate();
1815 listSetFreeMethod(server
.pubsub_patterns
,freePubsubPattern
);
1816 listSetMatchMethod(server
.pubsub_patterns
,listMatchPubsubPattern
);
1817 server
.cronloops
= 0;
1818 server
.bgsavechildpid
= -1;
1819 server
.bgrewritechildpid
= -1;
1820 server
.bgrewritebuf
= sdsempty();
1821 server
.aofbuf
= sdsempty();
1822 server
.lastsave
= time(NULL
);
1824 server
.stat_numcommands
= 0;
1825 server
.stat_numconnections
= 0;
1826 server
.stat_expiredkeys
= 0;
1827 server
.stat_starttime
= time(NULL
);
1828 server
.unixtime
= time(NULL
);
1829 aeCreateTimeEvent(server
.el
, 1, serverCron
, NULL
, NULL
);
1830 if (aeCreateFileEvent(server
.el
, server
.fd
, AE_READABLE
,
1831 acceptHandler
, NULL
) == AE_ERR
) oom("creating file event");
1833 if (server
.appendonly
) {
1834 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
1835 if (server
.appendfd
== -1) {
1836 redisLog(REDIS_WARNING
, "Can't open the append-only file: %s",
1842 if (server
.vm_enabled
) vmInit();
1845 /* Empty the whole database */
1846 static long long emptyDb() {
1848 long long removed
= 0;
1850 for (j
= 0; j
< server
.dbnum
; j
++) {
1851 removed
+= dictSize(server
.db
[j
].dict
);
1852 dictEmpty(server
.db
[j
].dict
);
1853 dictEmpty(server
.db
[j
].expires
);
1858 static int yesnotoi(char *s
) {
1859 if (!strcasecmp(s
,"yes")) return 1;
1860 else if (!strcasecmp(s
,"no")) return 0;
1864 /* I agree, this is a very rudimental way to load a configuration...
1865 will improve later if the config gets more complex */
1866 static void loadServerConfig(char *filename
) {
1868 char buf
[REDIS_CONFIGLINE_MAX
+1], *err
= NULL
;
1872 if (filename
[0] == '-' && filename
[1] == '\0')
1875 if ((fp
= fopen(filename
,"r")) == NULL
) {
1876 redisLog(REDIS_WARNING
, "Fatal error, can't open config file '%s'", filename
);
1881 while(fgets(buf
,REDIS_CONFIGLINE_MAX
+1,fp
) != NULL
) {
1887 line
= sdstrim(line
," \t\r\n");
1889 /* Skip comments and blank lines*/
1890 if (line
[0] == '#' || line
[0] == '\0') {
1895 /* Split into arguments */
1896 argv
= sdssplitlen(line
,sdslen(line
)," ",1,&argc
);
1897 sdstolower(argv
[0]);
1899 /* Execute config directives */
1900 if (!strcasecmp(argv
[0],"timeout") && argc
== 2) {
1901 server
.maxidletime
= atoi(argv
[1]);
1902 if (server
.maxidletime
< 0) {
1903 err
= "Invalid timeout value"; goto loaderr
;
1905 } else if (!strcasecmp(argv
[0],"port") && argc
== 2) {
1906 server
.port
= atoi(argv
[1]);
1907 if (server
.port
< 1 || server
.port
> 65535) {
1908 err
= "Invalid port"; goto loaderr
;
1910 } else if (!strcasecmp(argv
[0],"bind") && argc
== 2) {
1911 server
.bindaddr
= zstrdup(argv
[1]);
1912 } else if (!strcasecmp(argv
[0],"save") && argc
== 3) {
1913 int seconds
= atoi(argv
[1]);
1914 int changes
= atoi(argv
[2]);
1915 if (seconds
< 1 || changes
< 0) {
1916 err
= "Invalid save parameters"; goto loaderr
;
1918 appendServerSaveParams(seconds
,changes
);
1919 } else if (!strcasecmp(argv
[0],"dir") && argc
== 2) {
1920 if (chdir(argv
[1]) == -1) {
1921 redisLog(REDIS_WARNING
,"Can't chdir to '%s': %s",
1922 argv
[1], strerror(errno
));
1925 } else if (!strcasecmp(argv
[0],"loglevel") && argc
== 2) {
1926 if (!strcasecmp(argv
[1],"debug")) server
.verbosity
= REDIS_DEBUG
;
1927 else if (!strcasecmp(argv
[1],"verbose")) server
.verbosity
= REDIS_VERBOSE
;
1928 else if (!strcasecmp(argv
[1],"notice")) server
.verbosity
= REDIS_NOTICE
;
1929 else if (!strcasecmp(argv
[1],"warning")) server
.verbosity
= REDIS_WARNING
;
1931 err
= "Invalid log level. Must be one of debug, notice, warning";
1934 } else if (!strcasecmp(argv
[0],"logfile") && argc
== 2) {
1937 server
.logfile
= zstrdup(argv
[1]);
1938 if (!strcasecmp(server
.logfile
,"stdout")) {
1939 zfree(server
.logfile
);
1940 server
.logfile
= NULL
;
1942 if (server
.logfile
) {
1943 /* Test if we are able to open the file. The server will not
1944 * be able to abort just for this problem later... */
1945 logfp
= fopen(server
.logfile
,"a");
1946 if (logfp
== NULL
) {
1947 err
= sdscatprintf(sdsempty(),
1948 "Can't open the log file: %s", strerror(errno
));
1953 } else if (!strcasecmp(argv
[0],"databases") && argc
== 2) {
1954 server
.dbnum
= atoi(argv
[1]);
1955 if (server
.dbnum
< 1) {
1956 err
= "Invalid number of databases"; goto loaderr
;
1958 } else if (!strcasecmp(argv
[0],"include") && argc
== 2) {
1959 loadServerConfig(argv
[1]);
1960 } else if (!strcasecmp(argv
[0],"maxclients") && argc
== 2) {
1961 server
.maxclients
= atoi(argv
[1]);
1962 } else if (!strcasecmp(argv
[0],"maxmemory") && argc
== 2) {
1963 server
.maxmemory
= memtoll(argv
[1],NULL
);
1964 } else if (!strcasecmp(argv
[0],"slaveof") && argc
== 3) {
1965 server
.masterhost
= sdsnew(argv
[1]);
1966 server
.masterport
= atoi(argv
[2]);
1967 server
.replstate
= REDIS_REPL_CONNECT
;
1968 } else if (!strcasecmp(argv
[0],"masterauth") && argc
== 2) {
1969 server
.masterauth
= zstrdup(argv
[1]);
1970 } else if (!strcasecmp(argv
[0],"glueoutputbuf") && argc
== 2) {
1971 if ((server
.glueoutputbuf
= yesnotoi(argv
[1])) == -1) {
1972 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1974 } else if (!strcasecmp(argv
[0],"rdbcompression") && argc
== 2) {
1975 if ((server
.rdbcompression
= yesnotoi(argv
[1])) == -1) {
1976 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1978 } else if (!strcasecmp(argv
[0],"activerehashing") && argc
== 2) {
1979 if ((server
.activerehashing
= yesnotoi(argv
[1])) == -1) {
1980 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1982 } else if (!strcasecmp(argv
[0],"daemonize") && argc
== 2) {
1983 if ((server
.daemonize
= yesnotoi(argv
[1])) == -1) {
1984 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1986 } else if (!strcasecmp(argv
[0],"appendonly") && argc
== 2) {
1987 if ((server
.appendonly
= yesnotoi(argv
[1])) == -1) {
1988 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1990 } else if (!strcasecmp(argv
[0],"appendfilename") && argc
== 2) {
1991 zfree(server
.appendfilename
);
1992 server
.appendfilename
= zstrdup(argv
[1]);
1993 } else if (!strcasecmp(argv
[0],"no-appendfsync-on-rewrite")
1995 if ((server
.no_appendfsync_on_rewrite
= yesnotoi(argv
[1])) == -1) {
1996 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
1998 } else if (!strcasecmp(argv
[0],"appendfsync") && argc
== 2) {
1999 if (!strcasecmp(argv
[1],"no")) {
2000 server
.appendfsync
= APPENDFSYNC_NO
;
2001 } else if (!strcasecmp(argv
[1],"always")) {
2002 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
2003 } else if (!strcasecmp(argv
[1],"everysec")) {
2004 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
2006 err
= "argument must be 'no', 'always' or 'everysec'";
2009 } else if (!strcasecmp(argv
[0],"requirepass") && argc
== 2) {
2010 server
.requirepass
= zstrdup(argv
[1]);
2011 } else if (!strcasecmp(argv
[0],"pidfile") && argc
== 2) {
2012 zfree(server
.pidfile
);
2013 server
.pidfile
= zstrdup(argv
[1]);
2014 } else if (!strcasecmp(argv
[0],"dbfilename") && argc
== 2) {
2015 zfree(server
.dbfilename
);
2016 server
.dbfilename
= zstrdup(argv
[1]);
2017 } else if (!strcasecmp(argv
[0],"vm-enabled") && argc
== 2) {
2018 if ((server
.vm_enabled
= yesnotoi(argv
[1])) == -1) {
2019 err
= "argument must be 'yes' or 'no'"; goto loaderr
;
2021 } else if (!strcasecmp(argv
[0],"vm-swap-file") && argc
== 2) {
2022 zfree(server
.vm_swap_file
);
2023 server
.vm_swap_file
= zstrdup(argv
[1]);
2024 } else if (!strcasecmp(argv
[0],"vm-max-memory") && argc
== 2) {
2025 server
.vm_max_memory
= memtoll(argv
[1],NULL
);
2026 } else if (!strcasecmp(argv
[0],"vm-page-size") && argc
== 2) {
2027 server
.vm_page_size
= memtoll(argv
[1], NULL
);
2028 } else if (!strcasecmp(argv
[0],"vm-pages") && argc
== 2) {
2029 server
.vm_pages
= memtoll(argv
[1], NULL
);
2030 } else if (!strcasecmp(argv
[0],"vm-max-threads") && argc
== 2) {
2031 server
.vm_max_threads
= strtoll(argv
[1], NULL
, 10);
2032 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-entries") && argc
== 2){
2033 server
.hash_max_zipmap_entries
= memtoll(argv
[1], NULL
);
2034 } else if (!strcasecmp(argv
[0],"hash-max-zipmap-value") && argc
== 2){
2035 server
.hash_max_zipmap_value
= memtoll(argv
[1], NULL
);
2037 err
= "Bad directive or wrong number of arguments"; goto loaderr
;
2039 for (j
= 0; j
< argc
; j
++)
2044 if (fp
!= stdin
) fclose(fp
);
2048 fprintf(stderr
, "\n*** FATAL CONFIG FILE ERROR ***\n");
2049 fprintf(stderr
, "Reading the configuration file, at line %d\n", linenum
);
2050 fprintf(stderr
, ">>> '%s'\n", line
);
2051 fprintf(stderr
, "%s\n", err
);
2055 static void freeClientArgv(redisClient
*c
) {
2058 for (j
= 0; j
< c
->argc
; j
++)
2059 decrRefCount(c
->argv
[j
]);
2060 for (j
= 0; j
< c
->mbargc
; j
++)
2061 decrRefCount(c
->mbargv
[j
]);
2066 static void freeClient(redisClient
*c
) {
2069 /* Note that if the client we are freeing is blocked into a blocking
2070 * call, we have to set querybuf to NULL *before* to call
2071 * unblockClientWaitingData() to avoid processInputBuffer() will get
2072 * called. Also it is important to remove the file events after
2073 * this, because this call adds the READABLE event. */
2074 sdsfree(c
->querybuf
);
2076 if (c
->flags
& REDIS_BLOCKED
)
2077 unblockClientWaitingData(c
);
2079 /* UNWATCH all the keys */
2081 listRelease(c
->watched_keys
);
2082 /* Unsubscribe from all the pubsub channels */
2083 pubsubUnsubscribeAllChannels(c
,0);
2084 pubsubUnsubscribeAllPatterns(c
,0);
2085 dictRelease(c
->pubsub_channels
);
2086 listRelease(c
->pubsub_patterns
);
2087 /* Obvious cleanup */
2088 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
2089 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2090 listRelease(c
->reply
);
2093 /* Remove from the list of clients */
2094 ln
= listSearchKey(server
.clients
,c
);
2095 redisAssert(ln
!= NULL
);
2096 listDelNode(server
.clients
,ln
);
2097 /* Remove from the list of clients that are now ready to be restarted
2098 * after waiting for swapped keys */
2099 if (c
->flags
& REDIS_IO_WAIT
&& listLength(c
->io_keys
) == 0) {
2100 ln
= listSearchKey(server
.io_ready_clients
,c
);
2102 listDelNode(server
.io_ready_clients
,ln
);
2103 server
.vm_blocked_clients
--;
2106 /* Remove from the list of clients waiting for swapped keys */
2107 while (server
.vm_enabled
&& listLength(c
->io_keys
)) {
2108 ln
= listFirst(c
->io_keys
);
2109 dontWaitForSwappedKey(c
,ln
->value
);
2111 listRelease(c
->io_keys
);
2112 /* Master/slave cleanup */
2113 if (c
->flags
& REDIS_SLAVE
) {
2114 if (c
->replstate
== REDIS_REPL_SEND_BULK
&& c
->repldbfd
!= -1)
2116 list
*l
= (c
->flags
& REDIS_MONITOR
) ? server
.monitors
: server
.slaves
;
2117 ln
= listSearchKey(l
,c
);
2118 redisAssert(ln
!= NULL
);
2121 if (c
->flags
& REDIS_MASTER
) {
2122 server
.master
= NULL
;
2123 server
.replstate
= REDIS_REPL_CONNECT
;
2125 /* Release memory */
2128 freeClientMultiState(c
);
2132 #define GLUEREPLY_UP_TO (1024)
2133 static void glueReplyBuffersIfNeeded(redisClient
*c
) {
2135 char buf
[GLUEREPLY_UP_TO
];
2140 listRewind(c
->reply
,&li
);
2141 while((ln
= listNext(&li
))) {
2145 objlen
= sdslen(o
->ptr
);
2146 if (copylen
+ objlen
<= GLUEREPLY_UP_TO
) {
2147 memcpy(buf
+copylen
,o
->ptr
,objlen
);
2149 listDelNode(c
->reply
,ln
);
2151 if (copylen
== 0) return;
2155 /* Now the output buffer is empty, add the new single element */
2156 o
= createObject(REDIS_STRING
,sdsnewlen(buf
,copylen
));
2157 listAddNodeHead(c
->reply
,o
);
2160 static void sendReplyToClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2161 redisClient
*c
= privdata
;
2162 int nwritten
= 0, totwritten
= 0, objlen
;
2165 REDIS_NOTUSED(mask
);
2167 /* Use writev() if we have enough buffers to send */
2168 if (!server
.glueoutputbuf
&&
2169 listLength(c
->reply
) > REDIS_WRITEV_THRESHOLD
&&
2170 !(c
->flags
& REDIS_MASTER
))
2172 sendReplyToClientWritev(el
, fd
, privdata
, mask
);
2176 while(listLength(c
->reply
)) {
2177 if (server
.glueoutputbuf
&& listLength(c
->reply
) > 1)
2178 glueReplyBuffersIfNeeded(c
);
2180 o
= listNodeValue(listFirst(c
->reply
));
2181 objlen
= sdslen(o
->ptr
);
2184 listDelNode(c
->reply
,listFirst(c
->reply
));
2188 if (c
->flags
& REDIS_MASTER
) {
2189 /* Don't reply to a master */
2190 nwritten
= objlen
- c
->sentlen
;
2192 nwritten
= write(fd
, ((char*)o
->ptr
)+c
->sentlen
, objlen
- c
->sentlen
);
2193 if (nwritten
<= 0) break;
2195 c
->sentlen
+= nwritten
;
2196 totwritten
+= nwritten
;
2197 /* If we fully sent the object on head go to the next one */
2198 if (c
->sentlen
== objlen
) {
2199 listDelNode(c
->reply
,listFirst(c
->reply
));
2202 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
2203 * bytes, in a single threaded server it's a good idea to serve
2204 * other clients as well, even if a very large request comes from
2205 * super fast link that is always able to accept data (in real world
2206 * scenario think about 'KEYS *' against the loopback interfae) */
2207 if (totwritten
> REDIS_MAX_WRITE_PER_EVENT
) break;
2209 if (nwritten
== -1) {
2210 if (errno
== EAGAIN
) {
2213 redisLog(REDIS_VERBOSE
,
2214 "Error writing to client: %s", strerror(errno
));
2219 if (totwritten
> 0) c
->lastinteraction
= time(NULL
);
2220 if (listLength(c
->reply
) == 0) {
2222 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2226 static void sendReplyToClientWritev(aeEventLoop
*el
, int fd
, void *privdata
, int mask
)
2228 redisClient
*c
= privdata
;
2229 int nwritten
= 0, totwritten
= 0, objlen
, willwrite
;
2231 struct iovec iov
[REDIS_WRITEV_IOVEC_COUNT
];
2232 int offset
, ion
= 0;
2234 REDIS_NOTUSED(mask
);
2237 while (listLength(c
->reply
)) {
2238 offset
= c
->sentlen
;
2242 /* fill-in the iov[] array */
2243 for(node
= listFirst(c
->reply
); node
; node
= listNextNode(node
)) {
2244 o
= listNodeValue(node
);
2245 objlen
= sdslen(o
->ptr
);
2247 if (totwritten
+ objlen
- offset
> REDIS_MAX_WRITE_PER_EVENT
)
2250 if(ion
== REDIS_WRITEV_IOVEC_COUNT
)
2251 break; /* no more iovecs */
2253 iov
[ion
].iov_base
= ((char*)o
->ptr
) + offset
;
2254 iov
[ion
].iov_len
= objlen
- offset
;
2255 willwrite
+= objlen
- offset
;
2256 offset
= 0; /* just for the first item */
2263 /* write all collected blocks at once */
2264 if((nwritten
= writev(fd
, iov
, ion
)) < 0) {
2265 if (errno
!= EAGAIN
) {
2266 redisLog(REDIS_VERBOSE
,
2267 "Error writing to client: %s", strerror(errno
));
2274 totwritten
+= nwritten
;
2275 offset
= c
->sentlen
;
2277 /* remove written robjs from c->reply */
2278 while (nwritten
&& listLength(c
->reply
)) {
2279 o
= listNodeValue(listFirst(c
->reply
));
2280 objlen
= sdslen(o
->ptr
);
2282 if(nwritten
>= objlen
- offset
) {
2283 listDelNode(c
->reply
, listFirst(c
->reply
));
2284 nwritten
-= objlen
- offset
;
2288 c
->sentlen
+= nwritten
;
2296 c
->lastinteraction
= time(NULL
);
2298 if (listLength(c
->reply
) == 0) {
2300 aeDeleteFileEvent(server
.el
,c
->fd
,AE_WRITABLE
);
2304 static int qsortRedisCommands(const void *r1
, const void *r2
) {
2306 ((struct redisCommand
*)r1
)->name
,
2307 ((struct redisCommand
*)r2
)->name
);
2310 static void sortCommandTable() {
2311 /* Copy and sort the read-only version of the command table */
2312 commandTable
= (struct redisCommand
*)malloc(sizeof(readonlyCommandTable
));
2313 memcpy(commandTable
,readonlyCommandTable
,sizeof(readonlyCommandTable
));
2315 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2316 sizeof(struct redisCommand
),qsortRedisCommands
);
2319 static struct redisCommand
*lookupCommand(char *name
) {
2320 struct redisCommand tmp
= {name
,NULL
,0,0,NULL
,0,0,0};
2324 sizeof(readonlyCommandTable
)/sizeof(struct redisCommand
),
2325 sizeof(struct redisCommand
),
2326 qsortRedisCommands
);
2329 /* resetClient prepare the client to process the next command */
2330 static void resetClient(redisClient
*c
) {
2336 /* Call() is the core of Redis execution of a command */
2337 static void call(redisClient
*c
, struct redisCommand
*cmd
) {
2340 dirty
= server
.dirty
;
2342 dirty
= server
.dirty
-dirty
;
2344 if (server
.appendonly
&& dirty
)
2345 feedAppendOnlyFile(cmd
,c
->db
->id
,c
->argv
,c
->argc
);
2346 if ((dirty
|| cmd
->flags
& REDIS_CMD_FORCE_REPLICATION
) &&
2347 listLength(server
.slaves
))
2348 replicationFeedSlaves(server
.slaves
,c
->db
->id
,c
->argv
,c
->argc
);
2349 if (listLength(server
.monitors
))
2350 replicationFeedMonitors(server
.monitors
,c
->db
->id
,c
->argv
,c
->argc
);
2351 server
.stat_numcommands
++;
2354 /* If this function gets called we already read a whole
2355 * command, argments are in the client argv/argc fields.
2356 * processCommand() execute the command or prepare the
2357 * server for a bulk read from the client.
2359 * If 1 is returned the client is still alive and valid and
2360 * and other operations can be performed by the caller. Otherwise
2361 * if 0 is returned the client was destroied (i.e. after QUIT). */
2362 static int processCommand(redisClient
*c
) {
2363 struct redisCommand
*cmd
;
2365 /* Free some memory if needed (maxmemory setting) */
2366 if (server
.maxmemory
) freeMemoryIfNeeded();
2368 /* Handle the multi bulk command type. This is an alternative protocol
2369 * supported by Redis in order to receive commands that are composed of
2370 * multiple binary-safe "bulk" arguments. The latency of processing is
2371 * a bit higher but this allows things like multi-sets, so if this
2372 * protocol is used only for MSET and similar commands this is a big win. */
2373 if (c
->multibulk
== 0 && c
->argc
== 1 && ((char*)(c
->argv
[0]->ptr
))[0] == '*') {
2374 c
->multibulk
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2375 if (c
->multibulk
<= 0) {
2379 decrRefCount(c
->argv
[c
->argc
-1]);
2383 } else if (c
->multibulk
) {
2384 if (c
->bulklen
== -1) {
2385 if (((char*)c
->argv
[0]->ptr
)[0] != '$') {
2386 addReplySds(c
,sdsnew("-ERR multi bulk protocol error\r\n"));
2390 int bulklen
= atoi(((char*)c
->argv
[0]->ptr
)+1);
2391 decrRefCount(c
->argv
[0]);
2392 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2394 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2399 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2403 c
->mbargv
= zrealloc(c
->mbargv
,(sizeof(robj
*))*(c
->mbargc
+1));
2404 c
->mbargv
[c
->mbargc
] = c
->argv
[0];
2408 if (c
->multibulk
== 0) {
2412 /* Here we need to swap the multi-bulk argc/argv with the
2413 * normal argc/argv of the client structure. */
2415 c
->argv
= c
->mbargv
;
2416 c
->mbargv
= auxargv
;
2419 c
->argc
= c
->mbargc
;
2420 c
->mbargc
= auxargc
;
2422 /* We need to set bulklen to something different than -1
2423 * in order for the code below to process the command without
2424 * to try to read the last argument of a bulk command as
2425 * a special argument. */
2427 /* continue below and process the command */
2434 /* -- end of multi bulk commands processing -- */
2436 /* The QUIT command is handled as a special case. Normal command
2437 * procs are unable to close the client connection safely */
2438 if (!strcasecmp(c
->argv
[0]->ptr
,"quit")) {
2443 /* Now lookup the command and check ASAP about trivial error conditions
2444 * such wrong arity, bad command name and so forth. */
2445 cmd
= lookupCommand(c
->argv
[0]->ptr
);
2448 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2449 (char*)c
->argv
[0]->ptr
));
2452 } else if ((cmd
->arity
> 0 && cmd
->arity
!= c
->argc
) ||
2453 (c
->argc
< -cmd
->arity
)) {
2455 sdscatprintf(sdsempty(),
2456 "-ERR wrong number of arguments for '%s' command\r\n",
2460 } else if (cmd
->flags
& REDIS_CMD_BULK
&& c
->bulklen
== -1) {
2461 /* This is a bulk command, we have to read the last argument yet. */
2462 int bulklen
= atoi(c
->argv
[c
->argc
-1]->ptr
);
2464 decrRefCount(c
->argv
[c
->argc
-1]);
2465 if (bulklen
< 0 || bulklen
> 1024*1024*1024) {
2467 addReplySds(c
,sdsnew("-ERR invalid bulk write count\r\n"));
2472 c
->bulklen
= bulklen
+2; /* add two bytes for CR+LF */
2473 /* It is possible that the bulk read is already in the
2474 * buffer. Check this condition and handle it accordingly.
2475 * This is just a fast path, alternative to call processInputBuffer().
2476 * It's a good idea since the code is small and this condition
2477 * happens most of the times. */
2478 if ((signed)sdslen(c
->querybuf
) >= c
->bulklen
) {
2479 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2481 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2483 /* Otherwise return... there is to read the last argument
2484 * from the socket. */
2488 /* Let's try to encode the bulk object to save space. */
2489 if (cmd
->flags
& REDIS_CMD_BULK
)
2490 c
->argv
[c
->argc
-1] = tryObjectEncoding(c
->argv
[c
->argc
-1]);
2492 /* Check if the user is authenticated */
2493 if (server
.requirepass
&& !c
->authenticated
&& cmd
->proc
!= authCommand
) {
2494 addReplySds(c
,sdsnew("-ERR operation not permitted\r\n"));
2499 /* Handle the maxmemory directive */
2500 if (server
.maxmemory
&& (cmd
->flags
& REDIS_CMD_DENYOOM
) &&
2501 zmalloc_used_memory() > server
.maxmemory
)
2503 addReplySds(c
,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2508 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
2509 if ((dictSize(c
->pubsub_channels
) > 0 || listLength(c
->pubsub_patterns
) > 0)
2511 cmd
->proc
!= subscribeCommand
&& cmd
->proc
!= unsubscribeCommand
&&
2512 cmd
->proc
!= psubscribeCommand
&& cmd
->proc
!= punsubscribeCommand
) {
2513 addReplySds(c
,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
2518 /* Exec the command */
2519 if (c
->flags
& REDIS_MULTI
&&
2520 cmd
->proc
!= execCommand
&& cmd
->proc
!= discardCommand
&&
2521 cmd
->proc
!= multiCommand
&& cmd
->proc
!= watchCommand
)
2523 queueMultiCommand(c
,cmd
);
2524 addReply(c
,shared
.queued
);
2526 if (server
.vm_enabled
&& server
.vm_max_threads
> 0 &&
2527 blockClientOnSwappedKeys(c
,cmd
)) return 1;
2531 /* Prepare the client for the next command */
2536 static void replicationFeedSlaves(list
*slaves
, int dictid
, robj
**argv
, int argc
) {
2541 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2542 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2543 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2544 robj
*static_outv
[REDIS_STATIC_ARGS
*3+1];
2547 if (argc
<= REDIS_STATIC_ARGS
) {
2550 outv
= zmalloc(sizeof(robj
*)*(argc
*3+1));
2553 lenobj
= createObject(REDIS_STRING
,
2554 sdscatprintf(sdsempty(), "*%d\r\n", argc
));
2555 lenobj
->refcount
= 0;
2556 outv
[outc
++] = lenobj
;
2557 for (j
= 0; j
< argc
; j
++) {
2558 lenobj
= createObject(REDIS_STRING
,
2559 sdscatprintf(sdsempty(),"$%lu\r\n",
2560 (unsigned long) stringObjectLen(argv
[j
])));
2561 lenobj
->refcount
= 0;
2562 outv
[outc
++] = lenobj
;
2563 outv
[outc
++] = argv
[j
];
2564 outv
[outc
++] = shared
.crlf
;
2567 /* Increment all the refcounts at start and decrement at end in order to
2568 * be sure to free objects if there is no slave in a replication state
2569 * able to be feed with commands */
2570 for (j
= 0; j
< outc
; j
++) incrRefCount(outv
[j
]);
2571 listRewind(slaves
,&li
);
2572 while((ln
= listNext(&li
))) {
2573 redisClient
*slave
= ln
->value
;
2575 /* Don't feed slaves that are still waiting for BGSAVE to start */
2576 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) continue;
2578 /* Feed all the other slaves, MONITORs and so on */
2579 if (slave
->slaveseldb
!= dictid
) {
2583 case 0: selectcmd
= shared
.select0
; break;
2584 case 1: selectcmd
= shared
.select1
; break;
2585 case 2: selectcmd
= shared
.select2
; break;
2586 case 3: selectcmd
= shared
.select3
; break;
2587 case 4: selectcmd
= shared
.select4
; break;
2588 case 5: selectcmd
= shared
.select5
; break;
2589 case 6: selectcmd
= shared
.select6
; break;
2590 case 7: selectcmd
= shared
.select7
; break;
2591 case 8: selectcmd
= shared
.select8
; break;
2592 case 9: selectcmd
= shared
.select9
; break;
2594 selectcmd
= createObject(REDIS_STRING
,
2595 sdscatprintf(sdsempty(),"select %d\r\n",dictid
));
2596 selectcmd
->refcount
= 0;
2599 addReply(slave
,selectcmd
);
2600 slave
->slaveseldb
= dictid
;
2602 for (j
= 0; j
< outc
; j
++) addReply(slave
,outv
[j
]);
2604 for (j
= 0; j
< outc
; j
++) decrRefCount(outv
[j
]);
2605 if (outv
!= static_outv
) zfree(outv
);
2608 static sds
sdscatrepr(sds s
, char *p
, size_t len
) {
2609 s
= sdscatlen(s
,"\"",1);
2614 s
= sdscatprintf(s
,"\\%c",*p
);
2616 case '\n': s
= sdscatlen(s
,"\\n",1); break;
2617 case '\r': s
= sdscatlen(s
,"\\r",1); break;
2618 case '\t': s
= sdscatlen(s
,"\\t",1); break;
2619 case '\a': s
= sdscatlen(s
,"\\a",1); break;
2620 case '\b': s
= sdscatlen(s
,"\\b",1); break;
2623 s
= sdscatprintf(s
,"%c",*p
);
2625 s
= sdscatprintf(s
,"\\x%02x",(unsigned char)*p
);
2630 return sdscatlen(s
,"\"",1);
2633 static void replicationFeedMonitors(list
*monitors
, int dictid
, robj
**argv
, int argc
) {
2637 sds cmdrepr
= sdsnew("+");
2641 gettimeofday(&tv
,NULL
);
2642 cmdrepr
= sdscatprintf(cmdrepr
,"%ld.%ld ",(long)tv
.tv_sec
,(long)tv
.tv_usec
);
2643 if (dictid
!= 0) cmdrepr
= sdscatprintf(cmdrepr
,"(db %d) ", dictid
);
2645 for (j
= 0; j
< argc
; j
++) {
2646 if (argv
[j
]->encoding
== REDIS_ENCODING_INT
) {
2647 cmdrepr
= sdscatprintf(cmdrepr
, "%ld", (long)argv
[j
]->ptr
);
2649 cmdrepr
= sdscatrepr(cmdrepr
,(char*)argv
[j
]->ptr
,
2650 sdslen(argv
[j
]->ptr
));
2653 cmdrepr
= sdscatlen(cmdrepr
," ",1);
2655 cmdrepr
= sdscatlen(cmdrepr
,"\r\n",2);
2656 cmdobj
= createObject(REDIS_STRING
,cmdrepr
);
2658 listRewind(monitors
,&li
);
2659 while((ln
= listNext(&li
))) {
2660 redisClient
*monitor
= ln
->value
;
2661 addReply(monitor
,cmdobj
);
2663 decrRefCount(cmdobj
);
2666 static void processInputBuffer(redisClient
*c
) {
2668 /* Before to process the input buffer, make sure the client is not
2669 * waitig for a blocking operation such as BLPOP. Note that the first
2670 * iteration the client is never blocked, otherwise the processInputBuffer
2671 * would not be called at all, but after the execution of the first commands
2672 * in the input buffer the client may be blocked, and the "goto again"
2673 * will try to reiterate. The following line will make it return asap. */
2674 if (c
->flags
& REDIS_BLOCKED
|| c
->flags
& REDIS_IO_WAIT
) return;
2675 if (c
->bulklen
== -1) {
2676 /* Read the first line of the query */
2677 char *p
= strchr(c
->querybuf
,'\n');
2684 query
= c
->querybuf
;
2685 c
->querybuf
= sdsempty();
2686 querylen
= 1+(p
-(query
));
2687 if (sdslen(query
) > querylen
) {
2688 /* leave data after the first line of the query in the buffer */
2689 c
->querybuf
= sdscatlen(c
->querybuf
,query
+querylen
,sdslen(query
)-querylen
);
2691 *p
= '\0'; /* remove "\n" */
2692 if (*(p
-1) == '\r') *(p
-1) = '\0'; /* and "\r" if any */
2693 sdsupdatelen(query
);
2695 /* Now we can split the query in arguments */
2696 argv
= sdssplitlen(query
,sdslen(query
)," ",1,&argc
);
2699 if (c
->argv
) zfree(c
->argv
);
2700 c
->argv
= zmalloc(sizeof(robj
*)*argc
);
2702 for (j
= 0; j
< argc
; j
++) {
2703 if (sdslen(argv
[j
])) {
2704 c
->argv
[c
->argc
] = createObject(REDIS_STRING
,argv
[j
]);
2712 /* Execute the command. If the client is still valid
2713 * after processCommand() return and there is something
2714 * on the query buffer try to process the next command. */
2715 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2717 /* Nothing to process, argc == 0. Just process the query
2718 * buffer if it's not empty or return to the caller */
2719 if (sdslen(c
->querybuf
)) goto again
;
2722 } else if (sdslen(c
->querybuf
) >= REDIS_REQUEST_MAX_SIZE
) {
2723 redisLog(REDIS_VERBOSE
, "Client protocol error");
2728 /* Bulk read handling. Note that if we are at this point
2729 the client already sent a command terminated with a newline,
2730 we are reading the bulk data that is actually the last
2731 argument of the command. */
2732 int qbl
= sdslen(c
->querybuf
);
2734 if (c
->bulklen
<= qbl
) {
2735 /* Copy everything but the final CRLF as final argument */
2736 c
->argv
[c
->argc
] = createStringObject(c
->querybuf
,c
->bulklen
-2);
2738 c
->querybuf
= sdsrange(c
->querybuf
,c
->bulklen
,-1);
2739 /* Process the command. If the client is still valid after
2740 * the processing and there is more data in the buffer
2741 * try to parse it. */
2742 if (processCommand(c
) && sdslen(c
->querybuf
)) goto again
;
2748 static void readQueryFromClient(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2749 redisClient
*c
= (redisClient
*) privdata
;
2750 char buf
[REDIS_IOBUF_LEN
];
2753 REDIS_NOTUSED(mask
);
2755 nread
= read(fd
, buf
, REDIS_IOBUF_LEN
);
2757 if (errno
== EAGAIN
) {
2760 redisLog(REDIS_VERBOSE
, "Reading from client: %s",strerror(errno
));
2764 } else if (nread
== 0) {
2765 redisLog(REDIS_VERBOSE
, "Client closed connection");
2770 c
->querybuf
= sdscatlen(c
->querybuf
, buf
, nread
);
2771 c
->lastinteraction
= time(NULL
);
2775 processInputBuffer(c
);
2778 static int selectDb(redisClient
*c
, int id
) {
2779 if (id
< 0 || id
>= server
.dbnum
)
2781 c
->db
= &server
.db
[id
];
2785 static void *dupClientReplyValue(void *o
) {
2786 incrRefCount((robj
*)o
);
2790 static int listMatchObjects(void *a
, void *b
) {
2791 return equalStringObjects(a
,b
);
2794 static redisClient
*createClient(int fd
) {
2795 redisClient
*c
= zmalloc(sizeof(*c
));
2797 anetNonBlock(NULL
,fd
);
2798 anetTcpNoDelay(NULL
,fd
);
2799 if (!c
) return NULL
;
2802 c
->querybuf
= sdsempty();
2811 c
->lastinteraction
= time(NULL
);
2812 c
->authenticated
= 0;
2813 c
->replstate
= REDIS_REPL_NONE
;
2814 c
->reply
= listCreate();
2815 listSetFreeMethod(c
->reply
,decrRefCount
);
2816 listSetDupMethod(c
->reply
,dupClientReplyValue
);
2817 c
->blocking_keys
= NULL
;
2818 c
->blocking_keys_num
= 0;
2819 c
->io_keys
= listCreate();
2820 c
->watched_keys
= listCreate();
2821 listSetFreeMethod(c
->io_keys
,decrRefCount
);
2822 c
->pubsub_channels
= dictCreate(&setDictType
,NULL
);
2823 c
->pubsub_patterns
= listCreate();
2824 listSetFreeMethod(c
->pubsub_patterns
,decrRefCount
);
2825 listSetMatchMethod(c
->pubsub_patterns
,listMatchObjects
);
2826 if (aeCreateFileEvent(server
.el
, c
->fd
, AE_READABLE
,
2827 readQueryFromClient
, c
) == AE_ERR
) {
2831 listAddNodeTail(server
.clients
,c
);
2832 initClientMultiState(c
);
2836 static void addReply(redisClient
*c
, robj
*obj
) {
2837 if (listLength(c
->reply
) == 0 &&
2838 (c
->replstate
== REDIS_REPL_NONE
||
2839 c
->replstate
== REDIS_REPL_ONLINE
) &&
2840 aeCreateFileEvent(server
.el
, c
->fd
, AE_WRITABLE
,
2841 sendReplyToClient
, c
) == AE_ERR
) return;
2843 if (server
.vm_enabled
&& obj
->storage
!= REDIS_VM_MEMORY
) {
2844 obj
= dupStringObject(obj
);
2845 obj
->refcount
= 0; /* getDecodedObject() will increment the refcount */
2847 listAddNodeTail(c
->reply
,getDecodedObject(obj
));
2850 static void addReplySds(redisClient
*c
, sds s
) {
2851 robj
*o
= createObject(REDIS_STRING
,s
);
2856 static void addReplyDouble(redisClient
*c
, double d
) {
2859 snprintf(buf
,sizeof(buf
),"%.17g",d
);
2860 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
2861 (unsigned long) strlen(buf
),buf
));
2864 static void addReplyLongLong(redisClient
*c
, long long ll
) {
2869 addReply(c
,shared
.czero
);
2871 } else if (ll
== 1) {
2872 addReply(c
,shared
.cone
);
2876 len
= ll2string(buf
+1,sizeof(buf
)-1,ll
);
2879 addReplySds(c
,sdsnewlen(buf
,len
+3));
2882 static void addReplyUlong(redisClient
*c
, unsigned long ul
) {
2887 addReply(c
,shared
.czero
);
2889 } else if (ul
== 1) {
2890 addReply(c
,shared
.cone
);
2893 len
= snprintf(buf
,sizeof(buf
),":%lu\r\n",ul
);
2894 addReplySds(c
,sdsnewlen(buf
,len
));
2897 static void addReplyBulkLen(redisClient
*c
, robj
*obj
) {
2901 if (obj
->encoding
== REDIS_ENCODING_RAW
) {
2902 len
= sdslen(obj
->ptr
);
2904 long n
= (long)obj
->ptr
;
2906 /* Compute how many bytes will take this integer as a radix 10 string */
2912 while((n
= n
/10) != 0) {
2917 intlen
= ll2string(buf
+1,sizeof(buf
)-1,(long long)len
);
2918 buf
[intlen
+1] = '\r';
2919 buf
[intlen
+2] = '\n';
2920 addReplySds(c
,sdsnewlen(buf
,intlen
+3));
2923 static void addReplyBulk(redisClient
*c
, robj
*obj
) {
2924 addReplyBulkLen(c
,obj
);
2926 addReply(c
,shared
.crlf
);
2929 static void addReplyBulkSds(redisClient
*c
, sds s
) {
2930 robj
*o
= createStringObject(s
, sdslen(s
));
2935 /* In the CONFIG command we need to add vanilla C string as bulk replies */
2936 static void addReplyBulkCString(redisClient
*c
, char *s
) {
2938 addReply(c
,shared
.nullbulk
);
2940 robj
*o
= createStringObject(s
,strlen(s
));
2946 static void acceptHandler(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
2951 REDIS_NOTUSED(mask
);
2952 REDIS_NOTUSED(privdata
);
2954 cfd
= anetAccept(server
.neterr
, fd
, cip
, &cport
);
2955 if (cfd
== AE_ERR
) {
2956 redisLog(REDIS_VERBOSE
,"Accepting client connection: %s", server
.neterr
);
2959 redisLog(REDIS_VERBOSE
,"Accepted %s:%d", cip
, cport
);
2960 if ((c
= createClient(cfd
)) == NULL
) {
2961 redisLog(REDIS_WARNING
,"Error allocating resoures for the client");
2962 close(cfd
); /* May be already closed, just ingore errors */
2965 /* If maxclient directive is set and this is one client more... close the
2966 * connection. Note that we create the client instead to check before
2967 * for this condition, since now the socket is already set in nonblocking
2968 * mode and we can send an error for free using the Kernel I/O */
2969 if (server
.maxclients
&& listLength(server
.clients
) > server
.maxclients
) {
2970 char *err
= "-ERR max number of clients reached\r\n";
2972 /* That's a best effort error message, don't check write errors */
2973 if (write(c
->fd
,err
,strlen(err
)) == -1) {
2974 /* Nothing to do, Just to avoid the warning... */
2979 server
.stat_numconnections
++;
2982 /* ======================= Redis objects implementation ===================== */
2984 static robj
*createObject(int type
, void *ptr
) {
2987 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
2988 if (listLength(server
.objfreelist
)) {
2989 listNode
*head
= listFirst(server
.objfreelist
);
2990 o
= listNodeValue(head
);
2991 listDelNode(server
.objfreelist
,head
);
2992 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2994 if (server
.vm_enabled
)
2995 pthread_mutex_unlock(&server
.obj_freelist_mutex
);
2996 o
= zmalloc(sizeof(*o
));
2999 o
->encoding
= REDIS_ENCODING_RAW
;
3002 if (server
.vm_enabled
) {
3003 /* Note that this code may run in the context of an I/O thread
3004 * and accessing server.lruclock in theory is an error
3005 * (no locks). But in practice this is safe, and even if we read
3006 * garbage Redis will not fail. */
3007 o
->lru
= server
.lruclock
;
3008 o
->storage
= REDIS_VM_MEMORY
;
3013 static robj
*createStringObject(char *ptr
, size_t len
) {
3014 return createObject(REDIS_STRING
,sdsnewlen(ptr
,len
));
3017 static robj
*createStringObjectFromLongLong(long long value
) {
3019 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3020 incrRefCount(shared
.integers
[value
]);
3021 o
= shared
.integers
[value
];
3023 if (value
>= LONG_MIN
&& value
<= LONG_MAX
) {
3024 o
= createObject(REDIS_STRING
, NULL
);
3025 o
->encoding
= REDIS_ENCODING_INT
;
3026 o
->ptr
= (void*)((long)value
);
3028 o
= createObject(REDIS_STRING
,sdsfromlonglong(value
));
3034 static robj
*dupStringObject(robj
*o
) {
3035 assert(o
->encoding
== REDIS_ENCODING_RAW
);
3036 return createStringObject(o
->ptr
,sdslen(o
->ptr
));
3039 static robj
*createListObject(void) {
3040 list
*l
= listCreate();
3041 robj
*o
= createObject(REDIS_LIST
,l
);
3042 listSetFreeMethod(l
,decrRefCount
);
3043 o
->encoding
= REDIS_ENCODING_LIST
;
3047 static robj
*createZiplistObject(void) {
3048 unsigned char *zl
= ziplistNew();
3049 robj
*o
= createObject(REDIS_LIST
,zl
);
3050 o
->encoding
= REDIS_ENCODING_ZIPLIST
;
3054 static robj
*createSetObject(void) {
3055 dict
*d
= dictCreate(&setDictType
,NULL
);
3056 return createObject(REDIS_SET
,d
);
3059 static robj
*createHashObject(void) {
3060 /* All the Hashes start as zipmaps. Will be automatically converted
3061 * into hash tables if there are enough elements or big elements
3063 unsigned char *zm
= zipmapNew();
3064 robj
*o
= createObject(REDIS_HASH
,zm
);
3065 o
->encoding
= REDIS_ENCODING_ZIPMAP
;
3069 static robj
*createZsetObject(void) {
3070 zset
*zs
= zmalloc(sizeof(*zs
));
3072 zs
->dict
= dictCreate(&zsetDictType
,NULL
);
3073 zs
->zsl
= zslCreate();
3074 return createObject(REDIS_ZSET
,zs
);
3077 static void freeStringObject(robj
*o
) {
3078 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3083 static void freeListObject(robj
*o
) {
3084 switch (o
->encoding
) {
3085 case REDIS_ENCODING_LIST
:
3086 listRelease((list
*) o
->ptr
);
3088 case REDIS_ENCODING_ZIPLIST
:
3092 redisPanic("Unknown list encoding type");
3096 static void freeSetObject(robj
*o
) {
3097 dictRelease((dict
*) o
->ptr
);
3100 static void freeZsetObject(robj
*o
) {
3103 dictRelease(zs
->dict
);
3108 static void freeHashObject(robj
*o
) {
3109 switch (o
->encoding
) {
3110 case REDIS_ENCODING_HT
:
3111 dictRelease((dict
*) o
->ptr
);
3113 case REDIS_ENCODING_ZIPMAP
:
3117 redisPanic("Unknown hash encoding type");
3122 static void incrRefCount(robj
*o
) {
3126 static void decrRefCount(void *obj
) {
3129 /* Object is a swapped out value, or in the process of being loaded. */
3130 if (server
.vm_enabled
&&
3131 (o
->storage
== REDIS_VM_SWAPPED
|| o
->storage
== REDIS_VM_LOADING
))
3133 vmpointer
*vp
= obj
;
3134 if (o
->storage
== REDIS_VM_LOADING
) vmCancelThreadedIOJob(o
);
3135 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
3136 server
.vm_stats_swapped_objects
--;
3141 if (o
->refcount
<= 0) redisPanic("decrRefCount against refcount <= 0");
3142 /* Object is in memory, or in the process of being swapped out.
3144 * If the object is being swapped out, abort the operation on
3145 * decrRefCount even if the refcount does not drop to 0: the object
3146 * is referenced at least two times, as value of the key AND as
3147 * job->val in the iojob. So if we don't invalidate the iojob, when it is
3148 * done but the relevant key was removed in the meantime, the
3149 * complete jobs handler will not find the key about the job and the
3150 * assert will fail. */
3151 if (server
.vm_enabled
&& o
->storage
== REDIS_VM_SWAPPING
)
3152 vmCancelThreadedIOJob(o
);
3153 if (--(o
->refcount
) == 0) {
3155 case REDIS_STRING
: freeStringObject(o
); break;
3156 case REDIS_LIST
: freeListObject(o
); break;
3157 case REDIS_SET
: freeSetObject(o
); break;
3158 case REDIS_ZSET
: freeZsetObject(o
); break;
3159 case REDIS_HASH
: freeHashObject(o
); break;
3160 default: redisPanic("Unknown object type"); break;
3162 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
3163 if (listLength(server
.objfreelist
) > REDIS_OBJFREELIST_MAX
||
3164 !listAddNodeHead(server
.objfreelist
,o
))
3166 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
3170 static int checkType(redisClient
*c
, robj
*o
, int type
) {
3171 if (o
->type
!= type
) {
3172 addReply(c
,shared
.wrongtypeerr
);
3178 /* Check if the nul-terminated string 's' can be represented by a long
3179 * (that is, is a number that fits into long without any other space or
3180 * character before or after the digits).
3182 * If so, the function returns REDIS_OK and *longval is set to the value
3183 * of the number. Otherwise REDIS_ERR is returned */
3184 static int isStringRepresentableAsLong(sds s
, long *longval
) {
3185 char buf
[32], *endptr
;
3189 value
= strtol(s
, &endptr
, 10);
3190 if (endptr
[0] != '\0') return REDIS_ERR
;
3191 slen
= ll2string(buf
,32,value
);
3193 /* If the number converted back into a string is not identical
3194 * then it's not possible to encode the string as integer */
3195 if (sdslen(s
) != (unsigned)slen
|| memcmp(buf
,s
,slen
)) return REDIS_ERR
;
3196 if (longval
) *longval
= value
;
3200 /* Try to encode a string object in order to save space */
3201 static robj
*tryObjectEncoding(robj
*o
) {
3205 if (o
->encoding
!= REDIS_ENCODING_RAW
)
3206 return o
; /* Already encoded */
3208 /* It's not safe to encode shared objects: shared objects can be shared
3209 * everywhere in the "object space" of Redis. Encoded objects can only
3210 * appear as "values" (and not, for instance, as keys) */
3211 if (o
->refcount
> 1) return o
;
3213 /* Currently we try to encode only strings */
3214 redisAssert(o
->type
== REDIS_STRING
);
3216 /* Check if we can represent this string as a long integer */
3217 if (isStringRepresentableAsLong(s
,&value
) == REDIS_ERR
) return o
;
3219 /* Ok, this object can be encoded */
3220 if (value
>= 0 && value
< REDIS_SHARED_INTEGERS
) {
3222 incrRefCount(shared
.integers
[value
]);
3223 return shared
.integers
[value
];
3225 o
->encoding
= REDIS_ENCODING_INT
;
3227 o
->ptr
= (void*) value
;
3232 /* Get a decoded version of an encoded object (returned as a new object).
3233 * If the object is already raw-encoded just increment the ref count. */
3234 static robj
*getDecodedObject(robj
*o
) {
3237 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3241 if (o
->type
== REDIS_STRING
&& o
->encoding
== REDIS_ENCODING_INT
) {
3244 ll2string(buf
,32,(long)o
->ptr
);
3245 dec
= createStringObject(buf
,strlen(buf
));
3248 redisPanic("Unknown encoding type");
3252 /* Compare two string objects via strcmp() or alike.
3253 * Note that the objects may be integer-encoded. In such a case we
3254 * use ll2string() to get a string representation of the numbers on the stack
3255 * and compare the strings, it's much faster than calling getDecodedObject().
3257 * Important note: if objects are not integer encoded, but binary-safe strings,
3258 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3260 static int compareStringObjects(robj
*a
, robj
*b
) {
3261 redisAssert(a
->type
== REDIS_STRING
&& b
->type
== REDIS_STRING
);
3262 char bufa
[128], bufb
[128], *astr
, *bstr
;
3265 if (a
== b
) return 0;
3266 if (a
->encoding
!= REDIS_ENCODING_RAW
) {
3267 ll2string(bufa
,sizeof(bufa
),(long) a
->ptr
);
3273 if (b
->encoding
!= REDIS_ENCODING_RAW
) {
3274 ll2string(bufb
,sizeof(bufb
),(long) b
->ptr
);
3280 return bothsds
? sdscmp(astr
,bstr
) : strcmp(astr
,bstr
);
3283 /* Equal string objects return 1 if the two objects are the same from the
3284 * point of view of a string comparison, otherwise 0 is returned. Note that
3285 * this function is faster then checking for (compareStringObject(a,b) == 0)
3286 * because it can perform some more optimization. */
3287 static int equalStringObjects(robj
*a
, robj
*b
) {
3288 if (a
->encoding
!= REDIS_ENCODING_RAW
&& b
->encoding
!= REDIS_ENCODING_RAW
){
3289 return a
->ptr
== b
->ptr
;
3291 return compareStringObjects(a
,b
) == 0;
3295 static size_t stringObjectLen(robj
*o
) {
3296 redisAssert(o
->type
== REDIS_STRING
);
3297 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3298 return sdslen(o
->ptr
);
3302 return ll2string(buf
,32,(long)o
->ptr
);
3306 static int getDoubleFromObject(robj
*o
, double *target
) {
3313 redisAssert(o
->type
== REDIS_STRING
);
3314 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3315 value
= strtod(o
->ptr
, &eptr
);
3316 if (eptr
[0] != '\0') return REDIS_ERR
;
3317 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3318 value
= (long)o
->ptr
;
3320 redisPanic("Unknown string encoding");
3328 static int getDoubleFromObjectOrReply(redisClient
*c
, robj
*o
, double *target
, const char *msg
) {
3330 if (getDoubleFromObject(o
, &value
) != REDIS_OK
) {
3332 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3334 addReplySds(c
, sdsnew("-ERR value is not a double\r\n"));
3343 static int getLongLongFromObject(robj
*o
, long long *target
) {
3350 redisAssert(o
->type
== REDIS_STRING
);
3351 if (o
->encoding
== REDIS_ENCODING_RAW
) {
3352 value
= strtoll(o
->ptr
, &eptr
, 10);
3353 if (eptr
[0] != '\0') return REDIS_ERR
;
3354 } else if (o
->encoding
== REDIS_ENCODING_INT
) {
3355 value
= (long)o
->ptr
;
3357 redisPanic("Unknown string encoding");
3365 static int getLongLongFromObjectOrReply(redisClient
*c
, robj
*o
, long long *target
, const char *msg
) {
3367 if (getLongLongFromObject(o
, &value
) != REDIS_OK
) {
3369 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3371 addReplySds(c
, sdsnew("-ERR value is not an integer\r\n"));
3380 static int getLongFromObjectOrReply(redisClient
*c
, robj
*o
, long *target
, const char *msg
) {
3383 if (getLongLongFromObjectOrReply(c
, o
, &value
, msg
) != REDIS_OK
) return REDIS_ERR
;
3384 if (value
< LONG_MIN
|| value
> LONG_MAX
) {
3386 addReplySds(c
, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg
));
3388 addReplySds(c
, sdsnew("-ERR value is out of range\r\n"));
3397 /* =========================== Keyspace access API ========================== */
3399 static robj
*lookupKey(redisDb
*db
, robj
*key
) {
3400 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
3402 robj
*val
= dictGetEntryVal(de
);
3404 if (server
.vm_enabled
) {
3405 if (val
->storage
== REDIS_VM_MEMORY
||
3406 val
->storage
== REDIS_VM_SWAPPING
)
3408 /* If we were swapping the object out, cancel the operation */
3409 if (val
->storage
== REDIS_VM_SWAPPING
)
3410 vmCancelThreadedIOJob(val
);
3411 /* Update the access time for the aging algorithm. */
3412 val
->lru
= server
.lruclock
;
3414 int notify
= (val
->storage
== REDIS_VM_LOADING
);
3416 /* Our value was swapped on disk. Bring it at home. */
3417 redisAssert(val
->type
== REDIS_VMPOINTER
);
3418 val
= vmLoadObject(val
);
3419 dictGetEntryVal(de
) = val
;
3421 /* Clients blocked by the VM subsystem may be waiting for
3423 if (notify
) handleClientsBlockedOnSwappedKey(db
,key
);
3432 static robj
*lookupKeyRead(redisDb
*db
, robj
*key
) {
3433 expireIfNeeded(db
,key
);
3434 return lookupKey(db
,key
);
3437 static robj
*lookupKeyWrite(redisDb
*db
, robj
*key
) {
3438 deleteIfVolatile(db
,key
);
3439 touchWatchedKey(db
,key
);
3440 return lookupKey(db
,key
);
3443 static robj
*lookupKeyReadOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3444 robj
*o
= lookupKeyRead(c
->db
, key
);
3445 if (!o
) addReply(c
,reply
);
3449 static robj
*lookupKeyWriteOrReply(redisClient
*c
, robj
*key
, robj
*reply
) {
3450 robj
*o
= lookupKeyWrite(c
->db
, key
);
3451 if (!o
) addReply(c
,reply
);
3455 /* Add the key to the DB. If the key already exists REDIS_ERR is returned,
3456 * otherwise REDIS_OK is returned, and the caller should increment the
3457 * refcount of 'val'. */
3458 static int dbAdd(redisDb
*db
, robj
*key
, robj
*val
) {
3459 /* Perform a lookup before adding the key, as we need to copy the
3461 if (dictFind(db
->dict
, key
->ptr
) != NULL
) {
3464 sds copy
= sdsdup(key
->ptr
);
3465 dictAdd(db
->dict
, copy
, val
);
3470 /* If the key does not exist, this is just like dbAdd(). Otherwise
3471 * the value associated to the key is replaced with the new one.
3473 * On update (key already existed) 0 is returned. Otherwise 1. */
3474 static int dbReplace(redisDb
*db
, robj
*key
, robj
*val
) {
3475 if (dictFind(db
->dict
,key
->ptr
) == NULL
) {
3476 sds copy
= sdsdup(key
->ptr
);
3477 dictAdd(db
->dict
, copy
, val
);
3480 dictReplace(db
->dict
, key
->ptr
, val
);
3485 static int dbExists(redisDb
*db
, robj
*key
) {
3486 return dictFind(db
->dict
,key
->ptr
) != NULL
;
3489 /* Return a random key, in form of a Redis object.
3490 * If there are no keys, NULL is returned.
3492 * The function makes sure to return keys not already expired. */
3493 static robj
*dbRandomKey(redisDb
*db
) {
3494 struct dictEntry
*de
;
3500 de
= dictGetRandomKey(db
->dict
);
3501 if (de
== NULL
) return NULL
;
3503 key
= dictGetEntryKey(de
);
3504 keyobj
= createStringObject(key
,sdslen(key
));
3505 if (dictFind(db
->expires
,key
)) {
3506 if (expireIfNeeded(db
,keyobj
)) {
3507 decrRefCount(keyobj
);
3508 continue; /* search for another key. This expired. */
3515 /* Delete a key, value, and associated expiration entry if any, from the DB */
3516 static int dbDelete(redisDb
*db
, robj
*key
) {
3519 if (dictSize(db
->expires
)) dictDelete(db
->expires
,key
->ptr
);
3520 retval
= dictDelete(db
->dict
,key
->ptr
);
3522 return retval
== DICT_OK
;
3525 /*============================ RDB saving/loading =========================== */
3527 static int rdbSaveType(FILE *fp
, unsigned char type
) {
3528 if (fwrite(&type
,1,1,fp
) == 0) return -1;
3532 static int rdbSaveTime(FILE *fp
, time_t t
) {
3533 int32_t t32
= (int32_t) t
;
3534 if (fwrite(&t32
,4,1,fp
) == 0) return -1;
3538 /* check rdbLoadLen() comments for more info */
3539 static int rdbSaveLen(FILE *fp
, uint32_t len
) {
3540 unsigned char buf
[2];
3543 /* Save a 6 bit len */
3544 buf
[0] = (len
&0xFF)|(REDIS_RDB_6BITLEN
<<6);
3545 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3546 } else if (len
< (1<<14)) {
3547 /* Save a 14 bit len */
3548 buf
[0] = ((len
>>8)&0xFF)|(REDIS_RDB_14BITLEN
<<6);
3550 if (fwrite(buf
,2,1,fp
) == 0) return -1;
3552 /* Save a 32 bit len */
3553 buf
[0] = (REDIS_RDB_32BITLEN
<<6);
3554 if (fwrite(buf
,1,1,fp
) == 0) return -1;
3556 if (fwrite(&len
,4,1,fp
) == 0) return -1;
3561 /* Encode 'value' as an integer if possible (if integer will fit the
3562 * supported range). If the function sucessful encoded the integer
3563 * then the (up to 5 bytes) encoded representation is written in the
3564 * string pointed by 'enc' and the length is returned. Otherwise
3566 static int rdbEncodeInteger(long long value
, unsigned char *enc
) {
3567 /* Finally check if it fits in our ranges */
3568 if (value
>= -(1<<7) && value
<= (1<<7)-1) {
3569 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT8
;
3570 enc
[1] = value
&0xFF;
3572 } else if (value
>= -(1<<15) && value
<= (1<<15)-1) {
3573 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT16
;
3574 enc
[1] = value
&0xFF;
3575 enc
[2] = (value
>>8)&0xFF;
3577 } else if (value
>= -((long long)1<<31) && value
<= ((long long)1<<31)-1) {
3578 enc
[0] = (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_INT32
;
3579 enc
[1] = value
&0xFF;
3580 enc
[2] = (value
>>8)&0xFF;
3581 enc
[3] = (value
>>16)&0xFF;
3582 enc
[4] = (value
>>24)&0xFF;
3589 /* String objects in the form "2391" "-100" without any space and with a
3590 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3591 * encoded as integers to save space */
3592 static int rdbTryIntegerEncoding(char *s
, size_t len
, unsigned char *enc
) {
3594 char *endptr
, buf
[32];
3596 /* Check if it's possible to encode this value as a number */
3597 value
= strtoll(s
, &endptr
, 10);
3598 if (endptr
[0] != '\0') return 0;
3599 ll2string(buf
,32,value
);
3601 /* If the number converted back into a string is not identical
3602 * then it's not possible to encode the string as integer */
3603 if (strlen(buf
) != len
|| memcmp(buf
,s
,len
)) return 0;
3605 return rdbEncodeInteger(value
,enc
);
3608 static int rdbSaveLzfStringObject(FILE *fp
, unsigned char *s
, size_t len
) {
3609 size_t comprlen
, outlen
;
3613 /* We require at least four bytes compression for this to be worth it */
3614 if (len
<= 4) return 0;
3616 if ((out
= zmalloc(outlen
+1)) == NULL
) return 0;
3617 comprlen
= lzf_compress(s
, len
, out
, outlen
);
3618 if (comprlen
== 0) {
3622 /* Data compressed! Let's save it on disk */
3623 byte
= (REDIS_RDB_ENCVAL
<<6)|REDIS_RDB_ENC_LZF
;
3624 if (fwrite(&byte
,1,1,fp
) == 0) goto writeerr
;
3625 if (rdbSaveLen(fp
,comprlen
) == -1) goto writeerr
;
3626 if (rdbSaveLen(fp
,len
) == -1) goto writeerr
;
3627 if (fwrite(out
,comprlen
,1,fp
) == 0) goto writeerr
;
3636 /* Save a string objet as [len][data] on disk. If the object is a string
3637 * representation of an integer value we try to safe it in a special form */
3638 static int rdbSaveRawString(FILE *fp
, unsigned char *s
, size_t len
) {
3641 /* Try integer encoding */
3643 unsigned char buf
[5];
3644 if ((enclen
= rdbTryIntegerEncoding((char*)s
,len
,buf
)) > 0) {
3645 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3650 /* Try LZF compression - under 20 bytes it's unable to compress even
3651 * aaaaaaaaaaaaaaaaaa so skip it */
3652 if (server
.rdbcompression
&& len
> 20) {
3655 retval
= rdbSaveLzfStringObject(fp
,s
,len
);
3656 if (retval
== -1) return -1;
3657 if (retval
> 0) return 0;
3658 /* retval == 0 means data can't be compressed, save the old way */
3661 /* Store verbatim */
3662 if (rdbSaveLen(fp
,len
) == -1) return -1;
3663 if (len
&& fwrite(s
,len
,1,fp
) == 0) return -1;
3667 /* Save a long long value as either an encoded string or a string. */
3668 static int rdbSaveLongLongAsStringObject(FILE *fp
, long long value
) {
3669 unsigned char buf
[32];
3670 int enclen
= rdbEncodeInteger(value
,buf
);
3672 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3674 /* Encode as string */
3675 enclen
= ll2string((char*)buf
,32,value
);
3676 redisAssert(enclen
< 32);
3677 if (rdbSaveLen(fp
,enclen
) == -1) return -1;
3678 if (fwrite(buf
,enclen
,1,fp
) == 0) return -1;
3683 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
3684 static int rdbSaveStringObject(FILE *fp
, robj
*obj
) {
3685 /* Avoid to decode the object, then encode it again, if the
3686 * object is alrady integer encoded. */
3687 if (obj
->encoding
== REDIS_ENCODING_INT
) {
3688 return rdbSaveLongLongAsStringObject(fp
,(long)obj
->ptr
);
3690 redisAssert(obj
->encoding
== REDIS_ENCODING_RAW
);
3691 return rdbSaveRawString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
3695 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
3696 * 8 bit integer specifing the length of the representation.
3697 * This 8 bit integer has special values in order to specify the following
3703 static int rdbSaveDoubleValue(FILE *fp
, double val
) {
3704 unsigned char buf
[128];
3710 } else if (!isfinite(val
)) {
3712 buf
[0] = (val
< 0) ? 255 : 254;
3714 #if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
3715 /* Check if the float is in a safe range to be casted into a
3716 * long long. We are assuming that long long is 64 bit here.
3717 * Also we are assuming that there are no implementations around where
3718 * double has precision < 52 bit.
3720 * Under this assumptions we test if a double is inside an interval
3721 * where casting to long long is safe. Then using two castings we
3722 * make sure the decimal part is zero. If all this is true we use
3723 * integer printing function that is much faster. */
3724 double min
= -4503599627370495; /* (2^52)-1 */
3725 double max
= 4503599627370496; /* -(2^52) */
3726 if (val
> min
&& val
< max
&& val
== ((double)((long long)val
)))
3727 ll2string((char*)buf
+1,sizeof(buf
),(long long)val
);
3730 snprintf((char*)buf
+1,sizeof(buf
)-1,"%.17g",val
);
3731 buf
[0] = strlen((char*)buf
+1);
3734 if (fwrite(buf
,len
,1,fp
) == 0) return -1;
3738 /* Save a Redis object. */
3739 static int rdbSaveObject(FILE *fp
, robj
*o
) {
3740 if (o
->type
== REDIS_STRING
) {
3741 /* Save a string value */
3742 if (rdbSaveStringObject(fp
,o
) == -1) return -1;
3743 } else if (o
->type
== REDIS_LIST
) {
3744 /* Save a list value */
3745 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
3747 unsigned char *vstr
;
3751 if (rdbSaveLen(fp
,ziplistLen(o
->ptr
)) == -1) return -1;
3752 p
= ziplistIndex(o
->ptr
,0);
3753 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
3755 if (rdbSaveRawString(fp
,vstr
,vlen
) == -1)
3758 if (rdbSaveLongLongAsStringObject(fp
,vlong
) == -1)
3761 p
= ziplistNext(o
->ptr
,p
);
3763 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
3764 list
*list
= o
->ptr
;
3768 if (rdbSaveLen(fp
,listLength(list
)) == -1) return -1;
3769 listRewind(list
,&li
);
3770 while((ln
= listNext(&li
))) {
3771 robj
*eleobj
= listNodeValue(ln
);
3772 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3775 redisPanic("Unknown list encoding");
3777 } else if (o
->type
== REDIS_SET
) {
3778 /* Save a set value */
3780 dictIterator
*di
= dictGetIterator(set
);
3783 if (rdbSaveLen(fp
,dictSize(set
)) == -1) return -1;
3784 while((de
= dictNext(di
)) != NULL
) {
3785 robj
*eleobj
= dictGetEntryKey(de
);
3787 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3789 dictReleaseIterator(di
);
3790 } else if (o
->type
== REDIS_ZSET
) {
3791 /* Save a set value */
3793 dictIterator
*di
= dictGetIterator(zs
->dict
);
3796 if (rdbSaveLen(fp
,dictSize(zs
->dict
)) == -1) return -1;
3797 while((de
= dictNext(di
)) != NULL
) {
3798 robj
*eleobj
= dictGetEntryKey(de
);
3799 double *score
= dictGetEntryVal(de
);
3801 if (rdbSaveStringObject(fp
,eleobj
) == -1) return -1;
3802 if (rdbSaveDoubleValue(fp
,*score
) == -1) return -1;
3804 dictReleaseIterator(di
);
3805 } else if (o
->type
== REDIS_HASH
) {
3806 /* Save a hash value */
3807 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
3808 unsigned char *p
= zipmapRewind(o
->ptr
);
3809 unsigned int count
= zipmapLen(o
->ptr
);
3810 unsigned char *key
, *val
;
3811 unsigned int klen
, vlen
;
3813 if (rdbSaveLen(fp
,count
) == -1) return -1;
3814 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
3815 if (rdbSaveRawString(fp
,key
,klen
) == -1) return -1;
3816 if (rdbSaveRawString(fp
,val
,vlen
) == -1) return -1;
3819 dictIterator
*di
= dictGetIterator(o
->ptr
);
3822 if (rdbSaveLen(fp
,dictSize((dict
*)o
->ptr
)) == -1) return -1;
3823 while((de
= dictNext(di
)) != NULL
) {
3824 robj
*key
= dictGetEntryKey(de
);
3825 robj
*val
= dictGetEntryVal(de
);
3827 if (rdbSaveStringObject(fp
,key
) == -1) return -1;
3828 if (rdbSaveStringObject(fp
,val
) == -1) return -1;
3830 dictReleaseIterator(di
);
3833 redisPanic("Unknown object type");
3838 /* Return the length the object will have on disk if saved with
3839 * the rdbSaveObject() function. Currently we use a trick to get
3840 * this length with very little changes to the code. In the future
3841 * we could switch to a faster solution. */
3842 static off_t
rdbSavedObjectLen(robj
*o
, FILE *fp
) {
3843 if (fp
== NULL
) fp
= server
.devnull
;
3845 assert(rdbSaveObject(fp
,o
) != 1);
3849 /* Return the number of pages required to save this object in the swap file */
3850 static off_t
rdbSavedObjectPages(robj
*o
, FILE *fp
) {
3851 off_t bytes
= rdbSavedObjectLen(o
,fp
);
3853 return (bytes
+(server
.vm_page_size
-1))/server
.vm_page_size
;
3856 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
3857 static int rdbSave(char *filename
) {
3858 dictIterator
*di
= NULL
;
3863 time_t now
= time(NULL
);
3865 /* Wait for I/O therads to terminate, just in case this is a
3866 * foreground-saving, to avoid seeking the swap file descriptor at the
3868 if (server
.vm_enabled
)
3869 waitEmptyIOJobsQueue();
3871 snprintf(tmpfile
,256,"temp-%d.rdb", (int) getpid());
3872 fp
= fopen(tmpfile
,"w");
3874 redisLog(REDIS_WARNING
, "Failed saving the DB: %s", strerror(errno
));
3877 if (fwrite("REDIS0001",9,1,fp
) == 0) goto werr
;
3878 for (j
= 0; j
< server
.dbnum
; j
++) {
3879 redisDb
*db
= server
.db
+j
;
3881 if (dictSize(d
) == 0) continue;
3882 di
= dictGetIterator(d
);
3888 /* Write the SELECT DB opcode */
3889 if (rdbSaveType(fp
,REDIS_SELECTDB
) == -1) goto werr
;
3890 if (rdbSaveLen(fp
,j
) == -1) goto werr
;
3892 /* Iterate this DB writing every entry */
3893 while((de
= dictNext(di
)) != NULL
) {
3894 sds keystr
= dictGetEntryKey(de
);
3895 robj key
, *o
= dictGetEntryVal(de
);
3898 initStaticStringObject(key
,keystr
);
3899 expiretime
= getExpire(db
,&key
);
3901 /* Save the expire time */
3902 if (expiretime
!= -1) {
3903 /* If this key is already expired skip it */
3904 if (expiretime
< now
) continue;
3905 if (rdbSaveType(fp
,REDIS_EXPIRETIME
) == -1) goto werr
;
3906 if (rdbSaveTime(fp
,expiretime
) == -1) goto werr
;
3908 /* Save the key and associated value. This requires special
3909 * handling if the value is swapped out. */
3910 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
3911 o
->storage
== REDIS_VM_SWAPPING
) {
3912 /* Save type, key, value */
3913 if (rdbSaveType(fp
,o
->type
) == -1) goto werr
;
3914 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3915 if (rdbSaveObject(fp
,o
) == -1) goto werr
;
3917 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
3919 /* Get a preview of the object in memory */
3920 po
= vmPreviewObject(o
);
3921 /* Save type, key, value */
3922 if (rdbSaveType(fp
,po
->type
) == -1) goto werr
;
3923 if (rdbSaveStringObject(fp
,&key
) == -1) goto werr
;
3924 if (rdbSaveObject(fp
,po
) == -1) goto werr
;
3925 /* Remove the loaded object from memory */
3929 dictReleaseIterator(di
);
3932 if (rdbSaveType(fp
,REDIS_EOF
) == -1) goto werr
;
3934 /* Make sure data will not remain on the OS's output buffers */
3939 /* Use RENAME to make sure the DB file is changed atomically only
3940 * if the generate DB file is ok. */
3941 if (rename(tmpfile
,filename
) == -1) {
3942 redisLog(REDIS_WARNING
,"Error moving temp DB file on the final destination: %s", strerror(errno
));
3946 redisLog(REDIS_NOTICE
,"DB saved on disk");
3948 server
.lastsave
= time(NULL
);
3954 redisLog(REDIS_WARNING
,"Write error saving DB on disk: %s", strerror(errno
));
3955 if (di
) dictReleaseIterator(di
);
3959 static int rdbSaveBackground(char *filename
) {
3962 if (server
.bgsavechildpid
!= -1) return REDIS_ERR
;
3963 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
3964 if ((childpid
= fork()) == 0) {
3966 if (server
.vm_enabled
) vmReopenSwapFile();
3968 if (rdbSave(filename
) == REDIS_OK
) {
3975 if (childpid
== -1) {
3976 redisLog(REDIS_WARNING
,"Can't save in background: fork: %s",
3980 redisLog(REDIS_NOTICE
,"Background saving started by pid %d",childpid
);
3981 server
.bgsavechildpid
= childpid
;
3982 updateDictResizePolicy();
3985 return REDIS_OK
; /* unreached */
3988 static void rdbRemoveTempFile(pid_t childpid
) {
3991 snprintf(tmpfile
,256,"temp-%d.rdb", (int) childpid
);
3995 static int rdbLoadType(FILE *fp
) {
3997 if (fread(&type
,1,1,fp
) == 0) return -1;
4001 static time_t rdbLoadTime(FILE *fp
) {
4003 if (fread(&t32
,4,1,fp
) == 0) return -1;
4004 return (time_t) t32
;
4007 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
4008 * of this file for a description of how this are stored on disk.
4010 * isencoded is set to 1 if the readed length is not actually a length but
4011 * an "encoding type", check the above comments for more info */
4012 static uint32_t rdbLoadLen(FILE *fp
, int *isencoded
) {
4013 unsigned char buf
[2];
4017 if (isencoded
) *isencoded
= 0;
4018 if (fread(buf
,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4019 type
= (buf
[0]&0xC0)>>6;
4020 if (type
== REDIS_RDB_6BITLEN
) {
4021 /* Read a 6 bit len */
4023 } else if (type
== REDIS_RDB_ENCVAL
) {
4024 /* Read a 6 bit len encoding type */
4025 if (isencoded
) *isencoded
= 1;
4027 } else if (type
== REDIS_RDB_14BITLEN
) {
4028 /* Read a 14 bit len */
4029 if (fread(buf
+1,1,1,fp
) == 0) return REDIS_RDB_LENERR
;
4030 return ((buf
[0]&0x3F)<<8)|buf
[1];
4032 /* Read a 32 bit len */
4033 if (fread(&len
,4,1,fp
) == 0) return REDIS_RDB_LENERR
;
4038 /* Load an integer-encoded object from file 'fp', with the specified
4039 * encoding type 'enctype'. If encode is true the function may return
4040 * an integer-encoded object as reply, otherwise the returned object
4041 * will always be encoded as a raw string. */
4042 static robj
*rdbLoadIntegerObject(FILE *fp
, int enctype
, int encode
) {
4043 unsigned char enc
[4];
4046 if (enctype
== REDIS_RDB_ENC_INT8
) {
4047 if (fread(enc
,1,1,fp
) == 0) return NULL
;
4048 val
= (signed char)enc
[0];
4049 } else if (enctype
== REDIS_RDB_ENC_INT16
) {
4051 if (fread(enc
,2,1,fp
) == 0) return NULL
;
4052 v
= enc
[0]|(enc
[1]<<8);
4054 } else if (enctype
== REDIS_RDB_ENC_INT32
) {
4056 if (fread(enc
,4,1,fp
) == 0) return NULL
;
4057 v
= enc
[0]|(enc
[1]<<8)|(enc
[2]<<16)|(enc
[3]<<24);
4060 val
= 0; /* anti-warning */
4061 redisPanic("Unknown RDB integer encoding type");
4064 return createStringObjectFromLongLong(val
);
4066 return createObject(REDIS_STRING
,sdsfromlonglong(val
));
4069 static robj
*rdbLoadLzfStringObject(FILE*fp
) {
4070 unsigned int len
, clen
;
4071 unsigned char *c
= NULL
;
4074 if ((clen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4075 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4076 if ((c
= zmalloc(clen
)) == NULL
) goto err
;
4077 if ((val
= sdsnewlen(NULL
,len
)) == NULL
) goto err
;
4078 if (fread(c
,clen
,1,fp
) == 0) goto err
;
4079 if (lzf_decompress(c
,clen
,val
,len
) == 0) goto err
;
4081 return createObject(REDIS_STRING
,val
);
4088 static robj
*rdbGenericLoadStringObject(FILE*fp
, int encode
) {
4093 len
= rdbLoadLen(fp
,&isencoded
);
4096 case REDIS_RDB_ENC_INT8
:
4097 case REDIS_RDB_ENC_INT16
:
4098 case REDIS_RDB_ENC_INT32
:
4099 return rdbLoadIntegerObject(fp
,len
,encode
);
4100 case REDIS_RDB_ENC_LZF
:
4101 return rdbLoadLzfStringObject(fp
);
4103 redisPanic("Unknown RDB encoding type");
4107 if (len
== REDIS_RDB_LENERR
) return NULL
;
4108 val
= sdsnewlen(NULL
,len
);
4109 if (len
&& fread(val
,len
,1,fp
) == 0) {
4113 return createObject(REDIS_STRING
,val
);
4116 static robj
*rdbLoadStringObject(FILE *fp
) {
4117 return rdbGenericLoadStringObject(fp
,0);
4120 static robj
*rdbLoadEncodedStringObject(FILE *fp
) {
4121 return rdbGenericLoadStringObject(fp
,1);
4124 /* For information about double serialization check rdbSaveDoubleValue() */
4125 static int rdbLoadDoubleValue(FILE *fp
, double *val
) {
4129 if (fread(&len
,1,1,fp
) == 0) return -1;
4131 case 255: *val
= R_NegInf
; return 0;
4132 case 254: *val
= R_PosInf
; return 0;
4133 case 253: *val
= R_Nan
; return 0;
4135 if (fread(buf
,len
,1,fp
) == 0) return -1;
4137 sscanf(buf
, "%lg", val
);
4142 /* Load a Redis object of the specified type from the specified file.
4143 * On success a newly allocated object is returned, otherwise NULL. */
4144 static robj
*rdbLoadObject(int type
, FILE *fp
) {
4145 robj
*o
, *ele
, *dec
;
4148 redisLog(REDIS_DEBUG
,"LOADING OBJECT %d (at %d)\n",type
,ftell(fp
));
4149 if (type
== REDIS_STRING
) {
4150 /* Read string value */
4151 if ((o
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4152 o
= tryObjectEncoding(o
);
4153 } else if (type
== REDIS_LIST
) {
4154 /* Read list value */
4155 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4157 o
= createZiplistObject();
4159 /* Load every single element of the list */
4161 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4163 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4164 dec
= getDecodedObject(ele
);
4165 o
->ptr
= ziplistPush(o
->ptr
,dec
->ptr
,sdslen(dec
->ptr
),REDIS_TAIL
);
4169 ele
= tryObjectEncoding(ele
);
4170 listAddNodeTail(o
->ptr
,ele
);
4174 } else if (type
== REDIS_SET
) {
4175 /* Read list/set value */
4176 if ((len
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4177 o
= createSetObject();
4178 /* It's faster to expand the dict to the right size asap in order
4179 * to avoid rehashing */
4180 if (len
> DICT_HT_INITIAL_SIZE
)
4181 dictExpand(o
->ptr
,len
);
4182 /* Load every single element of the list/set */
4184 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4185 ele
= tryObjectEncoding(ele
);
4186 dictAdd((dict
*)o
->ptr
,ele
,NULL
);
4188 } else if (type
== REDIS_ZSET
) {
4189 /* Read list/set value */
4193 if ((zsetlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4194 o
= createZsetObject();
4196 /* Load every single element of the list/set */
4199 double *score
= zmalloc(sizeof(double));
4201 if ((ele
= rdbLoadEncodedStringObject(fp
)) == NULL
) return NULL
;
4202 ele
= tryObjectEncoding(ele
);
4203 if (rdbLoadDoubleValue(fp
,score
) == -1) return NULL
;
4204 dictAdd(zs
->dict
,ele
,score
);
4205 zslInsert(zs
->zsl
,*score
,ele
);
4206 incrRefCount(ele
); /* added to skiplist */
4208 } else if (type
== REDIS_HASH
) {
4211 if ((hashlen
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
) return NULL
;
4212 o
= createHashObject();
4213 /* Too many entries? Use an hash table. */
4214 if (hashlen
> server
.hash_max_zipmap_entries
)
4215 convertToRealHash(o
);
4216 /* Load every key/value, then set it into the zipmap or hash
4217 * table, as needed. */
4221 if ((key
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4222 if ((val
= rdbLoadStringObject(fp
)) == NULL
) return NULL
;
4223 /* If we are using a zipmap and there are too big values
4224 * the object is converted to real hash table encoding. */
4225 if (o
->encoding
!= REDIS_ENCODING_HT
&&
4226 (sdslen(key
->ptr
) > server
.hash_max_zipmap_value
||
4227 sdslen(val
->ptr
) > server
.hash_max_zipmap_value
))
4229 convertToRealHash(o
);
4232 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
4233 unsigned char *zm
= o
->ptr
;
4235 zm
= zipmapSet(zm
,key
->ptr
,sdslen(key
->ptr
),
4236 val
->ptr
,sdslen(val
->ptr
),NULL
);
4241 key
= tryObjectEncoding(key
);
4242 val
= tryObjectEncoding(val
);
4243 dictAdd((dict
*)o
->ptr
,key
,val
);
4247 redisPanic("Unknown object type");
4252 static int rdbLoad(char *filename
) {
4255 int type
, retval
, rdbver
;
4256 int swap_all_values
= 0;
4257 redisDb
*db
= server
.db
+0;
4259 time_t expiretime
, now
= time(NULL
);
4261 fp
= fopen(filename
,"r");
4262 if (!fp
) return REDIS_ERR
;
4263 if (fread(buf
,9,1,fp
) == 0) goto eoferr
;
4265 if (memcmp(buf
,"REDIS",5) != 0) {
4267 redisLog(REDIS_WARNING
,"Wrong signature trying to load DB from file");
4270 rdbver
= atoi(buf
+5);
4273 redisLog(REDIS_WARNING
,"Can't handle RDB format version %d",rdbver
);
4282 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4283 if (type
== REDIS_EXPIRETIME
) {
4284 if ((expiretime
= rdbLoadTime(fp
)) == -1) goto eoferr
;
4285 /* We read the time so we need to read the object type again */
4286 if ((type
= rdbLoadType(fp
)) == -1) goto eoferr
;
4288 if (type
== REDIS_EOF
) break;
4289 /* Handle SELECT DB opcode as a special case */
4290 if (type
== REDIS_SELECTDB
) {
4291 if ((dbid
= rdbLoadLen(fp
,NULL
)) == REDIS_RDB_LENERR
)
4293 if (dbid
>= (unsigned)server
.dbnum
) {
4294 redisLog(REDIS_WARNING
,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server
.dbnum
);
4297 db
= server
.db
+dbid
;
4301 if ((key
= rdbLoadStringObject(fp
)) == NULL
) goto eoferr
;
4303 if ((val
= rdbLoadObject(type
,fp
)) == NULL
) goto eoferr
;
4304 /* Check if the key already expired */
4305 if (expiretime
!= -1 && expiretime
< now
) {
4310 /* Add the new object in the hash table */
4311 retval
= dbAdd(db
,key
,val
);
4312 if (retval
== REDIS_ERR
) {
4313 redisLog(REDIS_WARNING
,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key
->ptr
);
4316 /* Set the expire time if needed */
4317 if (expiretime
!= -1) setExpire(db
,key
,expiretime
);
4319 /* Handle swapping while loading big datasets when VM is on */
4321 /* If we detecter we are hopeless about fitting something in memory
4322 * we just swap every new key on disk. Directly...
4323 * Note that's important to check for this condition before resorting
4324 * to random sampling, otherwise we may try to swap already
4326 if (swap_all_values
) {
4327 dictEntry
*de
= dictFind(db
->dict
,key
->ptr
);
4329 /* de may be NULL since the key already expired */
4332 val
= dictGetEntryVal(de
);
4334 if (val
->refcount
== 1 &&
4335 (vp
= vmSwapObjectBlocking(val
)) != NULL
)
4336 dictGetEntryVal(de
) = vp
;
4343 /* Flush data on disk once 32 MB of additional RAM are used... */
4345 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
4348 /* If we have still some hope of having some value fitting memory
4349 * then we try random sampling. */
4350 if (!swap_all_values
&& server
.vm_enabled
&& force_swapout
) {
4351 while (zmalloc_used_memory() > server
.vm_max_memory
) {
4352 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
4354 if (zmalloc_used_memory() > server
.vm_max_memory
)
4355 swap_all_values
= 1; /* We are already using too much mem */
4361 eoferr
: /* unexpected end of file is handled here with a fatal exit */
4362 redisLog(REDIS_WARNING
,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
4364 return REDIS_ERR
; /* Just to avoid warning */
4367 /*================================== Shutdown =============================== */
4368 static int prepareForShutdown() {
4369 redisLog(REDIS_WARNING
,"User requested shutdown, saving DB...");
4370 /* Kill the saving child if there is a background saving in progress.
4371 We want to avoid race conditions, for instance our saving child may
4372 overwrite the synchronous saving did by SHUTDOWN. */
4373 if (server
.bgsavechildpid
!= -1) {
4374 redisLog(REDIS_WARNING
,"There is a live saving child. Killing it!");
4375 kill(server
.bgsavechildpid
,SIGKILL
);
4376 rdbRemoveTempFile(server
.bgsavechildpid
);
4378 if (server
.appendonly
) {
4379 /* Append only file: fsync() the AOF and exit */
4380 aof_fsync(server
.appendfd
);
4381 if (server
.vm_enabled
) unlink(server
.vm_swap_file
);
4383 /* Snapshotting. Perform a SYNC SAVE and exit */
4384 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4385 if (server
.daemonize
)
4386 unlink(server
.pidfile
);
4387 redisLog(REDIS_WARNING
,"%zu bytes used at exit",zmalloc_used_memory());
4389 /* Ooops.. error saving! The best we can do is to continue
4390 * operating. Note that if there was a background saving process,
4391 * in the next cron() Redis will be notified that the background
4392 * saving aborted, handling special stuff like slaves pending for
4393 * synchronization... */
4394 redisLog(REDIS_WARNING
,"Error trying to save the DB, can't exit");
4398 redisLog(REDIS_WARNING
,"Server exit now, bye bye...");
4402 /*================================== Commands =============================== */
4404 static void authCommand(redisClient
*c
) {
4405 if (!server
.requirepass
|| !strcmp(c
->argv
[1]->ptr
, server
.requirepass
)) {
4406 c
->authenticated
= 1;
4407 addReply(c
,shared
.ok
);
4409 c
->authenticated
= 0;
4410 addReplySds(c
,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
4414 static void pingCommand(redisClient
*c
) {
4415 addReply(c
,shared
.pong
);
4418 static void echoCommand(redisClient
*c
) {
4419 addReplyBulk(c
,c
->argv
[1]);
4422 /*=================================== Strings =============================== */
4424 static void setGenericCommand(redisClient
*c
, int nx
, robj
*key
, robj
*val
, robj
*expire
) {
4426 long seconds
= 0; /* initialized to avoid an harmness warning */
4429 if (getLongFromObjectOrReply(c
, expire
, &seconds
, NULL
) != REDIS_OK
)
4432 addReplySds(c
,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4437 touchWatchedKey(c
->db
,key
);
4438 if (nx
) deleteIfVolatile(c
->db
,key
);
4439 retval
= dbAdd(c
->db
,key
,val
);
4440 if (retval
== REDIS_ERR
) {
4442 dbReplace(c
->db
,key
,val
);
4445 addReply(c
,shared
.czero
);
4452 removeExpire(c
->db
,key
);
4453 if (expire
) setExpire(c
->db
,key
,time(NULL
)+seconds
);
4454 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4457 static void setCommand(redisClient
*c
) {
4458 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[2],NULL
);
4461 static void setnxCommand(redisClient
*c
) {
4462 setGenericCommand(c
,1,c
->argv
[1],c
->argv
[2],NULL
);
4465 static void setexCommand(redisClient
*c
) {
4466 setGenericCommand(c
,0,c
->argv
[1],c
->argv
[3],c
->argv
[2]);
4469 static int getGenericCommand(redisClient
*c
) {
4472 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
)
4475 if (o
->type
!= REDIS_STRING
) {
4476 addReply(c
,shared
.wrongtypeerr
);
4484 static void getCommand(redisClient
*c
) {
4485 getGenericCommand(c
);
4488 static void getsetCommand(redisClient
*c
) {
4489 if (getGenericCommand(c
) == REDIS_ERR
) return;
4490 dbReplace(c
->db
,c
->argv
[1],c
->argv
[2]);
4491 incrRefCount(c
->argv
[2]);
4493 removeExpire(c
->db
,c
->argv
[1]);
4496 static void mgetCommand(redisClient
*c
) {
4499 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-1));
4500 for (j
= 1; j
< c
->argc
; j
++) {
4501 robj
*o
= lookupKeyRead(c
->db
,c
->argv
[j
]);
4503 addReply(c
,shared
.nullbulk
);
4505 if (o
->type
!= REDIS_STRING
) {
4506 addReply(c
,shared
.nullbulk
);
4514 static void msetGenericCommand(redisClient
*c
, int nx
) {
4515 int j
, busykeys
= 0;
4517 if ((c
->argc
% 2) == 0) {
4518 addReplySds(c
,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
4521 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4522 * set nothing at all if at least one already key exists. */
4524 for (j
= 1; j
< c
->argc
; j
+= 2) {
4525 if (lookupKeyWrite(c
->db
,c
->argv
[j
]) != NULL
) {
4531 addReply(c
, shared
.czero
);
4535 for (j
= 1; j
< c
->argc
; j
+= 2) {
4536 c
->argv
[j
+1] = tryObjectEncoding(c
->argv
[j
+1]);
4537 dbReplace(c
->db
,c
->argv
[j
],c
->argv
[j
+1]);
4538 incrRefCount(c
->argv
[j
+1]);
4539 removeExpire(c
->db
,c
->argv
[j
]);
4541 server
.dirty
+= (c
->argc
-1)/2;
4542 addReply(c
, nx
? shared
.cone
: shared
.ok
);
4545 static void msetCommand(redisClient
*c
) {
4546 msetGenericCommand(c
,0);
4549 static void msetnxCommand(redisClient
*c
) {
4550 msetGenericCommand(c
,1);
4553 static void incrDecrCommand(redisClient
*c
, long long incr
) {
4557 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4558 if (o
!= NULL
&& checkType(c
,o
,REDIS_STRING
)) return;
4559 if (getLongLongFromObjectOrReply(c
,o
,&value
,NULL
) != REDIS_OK
) return;
4562 o
= createStringObjectFromLongLong(value
);
4563 dbReplace(c
->db
,c
->argv
[1],o
);
4565 addReply(c
,shared
.colon
);
4567 addReply(c
,shared
.crlf
);
4570 static void incrCommand(redisClient
*c
) {
4571 incrDecrCommand(c
,1);
4574 static void decrCommand(redisClient
*c
) {
4575 incrDecrCommand(c
,-1);
4578 static void incrbyCommand(redisClient
*c
) {
4581 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4582 incrDecrCommand(c
,incr
);
4585 static void decrbyCommand(redisClient
*c
) {
4588 if (getLongLongFromObjectOrReply(c
, c
->argv
[2], &incr
, NULL
) != REDIS_OK
) return;
4589 incrDecrCommand(c
,-incr
);
4592 static void appendCommand(redisClient
*c
) {
4597 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4599 /* Create the key */
4600 retval
= dbAdd(c
->db
,c
->argv
[1],c
->argv
[2]);
4601 incrRefCount(c
->argv
[2]);
4602 totlen
= stringObjectLen(c
->argv
[2]);
4604 if (o
->type
!= REDIS_STRING
) {
4605 addReply(c
,shared
.wrongtypeerr
);
4608 /* If the object is specially encoded or shared we have to make
4610 if (o
->refcount
!= 1 || o
->encoding
!= REDIS_ENCODING_RAW
) {
4611 robj
*decoded
= getDecodedObject(o
);
4613 o
= createStringObject(decoded
->ptr
, sdslen(decoded
->ptr
));
4614 decrRefCount(decoded
);
4615 dbReplace(c
->db
,c
->argv
[1],o
);
4618 if (c
->argv
[2]->encoding
== REDIS_ENCODING_RAW
) {
4619 o
->ptr
= sdscatlen(o
->ptr
,
4620 c
->argv
[2]->ptr
, sdslen(c
->argv
[2]->ptr
));
4622 o
->ptr
= sdscatprintf(o
->ptr
, "%ld",
4623 (unsigned long) c
->argv
[2]->ptr
);
4625 totlen
= sdslen(o
->ptr
);
4628 addReplySds(c
,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen
));
4631 static void substrCommand(redisClient
*c
) {
4633 long start
= atoi(c
->argv
[2]->ptr
);
4634 long end
= atoi(c
->argv
[3]->ptr
);
4635 size_t rangelen
, strlen
;
4638 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
4639 checkType(c
,o
,REDIS_STRING
)) return;
4641 o
= getDecodedObject(o
);
4642 strlen
= sdslen(o
->ptr
);
4644 /* convert negative indexes */
4645 if (start
< 0) start
= strlen
+start
;
4646 if (end
< 0) end
= strlen
+end
;
4647 if (start
< 0) start
= 0;
4648 if (end
< 0) end
= 0;
4650 /* indexes sanity checks */
4651 if (start
> end
|| (size_t)start
>= strlen
) {
4652 /* Out of range start or start > end result in null reply */
4653 addReply(c
,shared
.nullbulk
);
4657 if ((size_t)end
>= strlen
) end
= strlen
-1;
4658 rangelen
= (end
-start
)+1;
4660 /* Return the result */
4661 addReplySds(c
,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen
));
4662 range
= sdsnewlen((char*)o
->ptr
+start
,rangelen
);
4663 addReplySds(c
,range
);
4664 addReply(c
,shared
.crlf
);
4668 /* ========================= Type agnostic commands ========================= */
4670 static void delCommand(redisClient
*c
) {
4673 for (j
= 1; j
< c
->argc
; j
++) {
4674 if (dbDelete(c
->db
,c
->argv
[j
])) {
4675 touchWatchedKey(c
->db
,c
->argv
[j
]);
4680 addReplyLongLong(c
,deleted
);
4683 static void existsCommand(redisClient
*c
) {
4684 expireIfNeeded(c
->db
,c
->argv
[1]);
4685 if (dbExists(c
->db
,c
->argv
[1])) {
4686 addReply(c
, shared
.cone
);
4688 addReply(c
, shared
.czero
);
4692 static void selectCommand(redisClient
*c
) {
4693 int id
= atoi(c
->argv
[1]->ptr
);
4695 if (selectDb(c
,id
) == REDIS_ERR
) {
4696 addReplySds(c
,sdsnew("-ERR invalid DB index\r\n"));
4698 addReply(c
,shared
.ok
);
4702 static void randomkeyCommand(redisClient
*c
) {
4705 if ((key
= dbRandomKey(c
->db
)) == NULL
) {
4706 addReply(c
,shared
.nullbulk
);
4710 addReplyBulk(c
,key
);
4714 static void keysCommand(redisClient
*c
) {
4717 sds pattern
= c
->argv
[1]->ptr
;
4718 int plen
= sdslen(pattern
);
4719 unsigned long numkeys
= 0;
4720 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
4722 di
= dictGetIterator(c
->db
->dict
);
4724 decrRefCount(lenobj
);
4725 while((de
= dictNext(di
)) != NULL
) {
4726 sds key
= dictGetEntryKey(de
);
4729 if ((pattern
[0] == '*' && pattern
[1] == '\0') ||
4730 stringmatchlen(pattern
,plen
,key
,sdslen(key
),0)) {
4731 keyobj
= createStringObject(key
,sdslen(key
));
4732 if (expireIfNeeded(c
->db
,keyobj
) == 0) {
4733 addReplyBulk(c
,keyobj
);
4736 decrRefCount(keyobj
);
4739 dictReleaseIterator(di
);
4740 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",numkeys
);
4743 static void dbsizeCommand(redisClient
*c
) {
4745 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c
->db
->dict
)));
4748 static void lastsaveCommand(redisClient
*c
) {
4750 sdscatprintf(sdsempty(),":%lu\r\n",server
.lastsave
));
4753 static void typeCommand(redisClient
*c
) {
4757 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
4762 case REDIS_STRING
: type
= "+string"; break;
4763 case REDIS_LIST
: type
= "+list"; break;
4764 case REDIS_SET
: type
= "+set"; break;
4765 case REDIS_ZSET
: type
= "+zset"; break;
4766 case REDIS_HASH
: type
= "+hash"; break;
4767 default: type
= "+unknown"; break;
4770 addReplySds(c
,sdsnew(type
));
4771 addReply(c
,shared
.crlf
);
4774 static void saveCommand(redisClient
*c
) {
4775 if (server
.bgsavechildpid
!= -1) {
4776 addReplySds(c
,sdsnew("-ERR background save in progress\r\n"));
4779 if (rdbSave(server
.dbfilename
) == REDIS_OK
) {
4780 addReply(c
,shared
.ok
);
4782 addReply(c
,shared
.err
);
4786 static void bgsaveCommand(redisClient
*c
) {
4787 if (server
.bgsavechildpid
!= -1) {
4788 addReplySds(c
,sdsnew("-ERR background save already in progress\r\n"));
4791 if (rdbSaveBackground(server
.dbfilename
) == REDIS_OK
) {
4792 char *status
= "+Background saving started\r\n";
4793 addReplySds(c
,sdsnew(status
));
4795 addReply(c
,shared
.err
);
4799 static void shutdownCommand(redisClient
*c
) {
4800 if (prepareForShutdown() == REDIS_OK
)
4802 addReplySds(c
, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
4805 static void renameGenericCommand(redisClient
*c
, int nx
) {
4808 /* To use the same key as src and dst is probably an error */
4809 if (sdscmp(c
->argv
[1]->ptr
,c
->argv
[2]->ptr
) == 0) {
4810 addReply(c
,shared
.sameobjecterr
);
4814 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
)) == NULL
)
4818 deleteIfVolatile(c
->db
,c
->argv
[2]);
4819 if (dbAdd(c
->db
,c
->argv
[2],o
) == REDIS_ERR
) {
4822 addReply(c
,shared
.czero
);
4825 dbReplace(c
->db
,c
->argv
[2],o
);
4827 dbDelete(c
->db
,c
->argv
[1]);
4828 touchWatchedKey(c
->db
,c
->argv
[2]);
4830 addReply(c
,nx
? shared
.cone
: shared
.ok
);
4833 static void renameCommand(redisClient
*c
) {
4834 renameGenericCommand(c
,0);
4837 static void renamenxCommand(redisClient
*c
) {
4838 renameGenericCommand(c
,1);
4841 static void moveCommand(redisClient
*c
) {
4846 /* Obtain source and target DB pointers */
4849 if (selectDb(c
,atoi(c
->argv
[2]->ptr
)) == REDIS_ERR
) {
4850 addReply(c
,shared
.outofrangeerr
);
4854 selectDb(c
,srcid
); /* Back to the source DB */
4856 /* If the user is moving using as target the same
4857 * DB as the source DB it is probably an error. */
4859 addReply(c
,shared
.sameobjecterr
);
4863 /* Check if the element exists and get a reference */
4864 o
= lookupKeyWrite(c
->db
,c
->argv
[1]);
4866 addReply(c
,shared
.czero
);
4870 /* Try to add the element to the target DB */
4871 deleteIfVolatile(dst
,c
->argv
[1]);
4872 if (dbAdd(dst
,c
->argv
[1],o
) == REDIS_ERR
) {
4873 addReply(c
,shared
.czero
);
4878 /* OK! key moved, free the entry in the source DB */
4879 dbDelete(src
,c
->argv
[1]);
4881 addReply(c
,shared
.cone
);
4884 /* =================================== Lists ================================ */
4885 static void lPush(robj
*subject
, robj
*value
, int where
) {
4886 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4887 int pos
= (where
== REDIS_HEAD
) ? ZIPLIST_HEAD
: ZIPLIST_TAIL
;
4888 value
= getDecodedObject(value
);
4889 subject
->ptr
= ziplistPush(subject
->ptr
,value
->ptr
,sdslen(value
->ptr
),pos
);
4890 decrRefCount(value
);
4891 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4892 if (where
== REDIS_HEAD
) {
4893 listAddNodeHead(subject
->ptr
,value
);
4895 listAddNodeTail(subject
->ptr
,value
);
4897 incrRefCount(value
);
4899 redisPanic("Unknown list encoding");
4903 static robj
*lPop(robj
*subject
, int where
) {
4905 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4907 unsigned char *vstr
;
4910 int pos
= (where
== REDIS_HEAD
) ? 0 : -1;
4911 p
= ziplistIndex(subject
->ptr
,pos
);
4912 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
4914 value
= createStringObject((char*)vstr
,vlen
);
4916 value
= createStringObjectFromLongLong(vlong
);
4918 /* We only need to delete an element when it exists */
4919 subject
->ptr
= ziplistDelete(subject
->ptr
,&p
);
4921 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4922 list
*list
= subject
->ptr
;
4924 if (where
== REDIS_HEAD
) {
4925 ln
= listFirst(list
);
4927 ln
= listLast(list
);
4930 value
= listNodeValue(ln
);
4931 incrRefCount(value
);
4932 listDelNode(list
,ln
);
4935 redisPanic("Unknown list encoding");
4940 static unsigned long lLength(robj
*subject
) {
4941 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4942 return ziplistLen(subject
->ptr
);
4943 } else if (subject
->encoding
== REDIS_ENCODING_LIST
) {
4944 return listLength((list
*)subject
->ptr
);
4946 redisPanic("Unknown list encoding");
4950 /* Structure to hold set iteration abstraction. */
4953 unsigned char encoding
;
4954 unsigned char direction
; /* Iteration direction */
4959 /* Structure for an entry while iterating over a list. */
4962 unsigned char *zi
; /* Entry in ziplist */
4963 listNode
*ln
; /* Entry in linked list */
4966 /* Initialize an iterator at the specified index. */
4967 static lIterator
*lInitIterator(robj
*subject
, int index
, unsigned char direction
) {
4968 lIterator
*li
= zmalloc(sizeof(lIterator
));
4969 li
->subject
= subject
;
4970 li
->encoding
= subject
->encoding
;
4971 li
->direction
= direction
;
4972 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4973 li
->zi
= ziplistIndex(subject
->ptr
,index
);
4974 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
4975 li
->ln
= listIndex(subject
->ptr
,index
);
4977 redisPanic("Unknown list encoding");
4982 /* Clean up the iterator. */
4983 static void lReleaseIterator(lIterator
*li
) {
4987 /* Stores pointer to current the entry in the provided entry structure
4988 * and advances the position of the iterator. Returns 1 when the current
4989 * entry is in fact an entry, 0 otherwise. */
4990 static int lNext(lIterator
*li
, lEntry
*entry
) {
4992 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
4994 if (entry
->zi
!= NULL
) {
4995 if (li
->direction
== REDIS_TAIL
)
4996 li
->zi
= ziplistNext(li
->subject
->ptr
,li
->zi
);
4998 li
->zi
= ziplistPrev(li
->subject
->ptr
,li
->zi
);
5001 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5003 if (entry
->ln
!= NULL
) {
5004 if (li
->direction
== REDIS_TAIL
)
5005 li
->ln
= li
->ln
->next
;
5007 li
->ln
= li
->ln
->prev
;
5011 redisPanic("Unknown list encoding");
5016 /* Return entry or NULL at the current position of the iterator. */
5017 static robj
*lGet(lEntry
*entry
) {
5018 lIterator
*li
= entry
->li
;
5020 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5021 unsigned char *vstr
;
5024 redisAssert(entry
->zi
!= NULL
);
5025 if (ziplistGet(entry
->zi
,&vstr
,&vlen
,&vlong
)) {
5027 value
= createStringObject((char*)vstr
,vlen
);
5029 value
= createStringObjectFromLongLong(vlong
);
5032 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5033 redisAssert(entry
->ln
!= NULL
);
5034 value
= listNodeValue(entry
->ln
);
5035 incrRefCount(value
);
5037 redisPanic("Unknown list encoding");
5042 /* Compare the given object with the entry at the current position. */
5043 static int lEqual(lEntry
*entry
, robj
*o
) {
5044 lIterator
*li
= entry
->li
;
5045 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5046 redisAssert(o
->encoding
== REDIS_ENCODING_RAW
);
5047 return ziplistCompare(entry
->zi
,o
->ptr
,sdslen(o
->ptr
));
5048 } else if (li
->encoding
== REDIS_ENCODING_LIST
) {
5049 return equalStringObjects(o
,listNodeValue(entry
->ln
));
5051 redisPanic("Unknown list encoding");
5055 /* Delete the element pointed to. */
5056 static void lDelete(lEntry
*entry
) {
5057 lIterator
*li
= entry
->li
;
5058 if (li
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5059 unsigned char *p
= entry
->zi
;
5060 li
->subject
->ptr
= ziplistDelete(li
->subject
->ptr
,&p
);
5062 /* Update position of the iterator depending on the direction */
5063 if (li
->direction
== REDIS_TAIL
)
5066 li
->zi
= ziplistPrev(li
->subject
->ptr
,p
);
5067 } else if (entry
->li
->encoding
== REDIS_ENCODING_LIST
) {
5069 if (li
->direction
== REDIS_TAIL
)
5070 next
= entry
->ln
->next
;
5072 next
= entry
->ln
->prev
;
5073 listDelNode(li
->subject
->ptr
,entry
->ln
);
5076 redisPanic("Unknown list encoding");
5080 static void pushGenericCommand(redisClient
*c
, int where
) {
5081 robj
*lobj
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5083 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5084 addReply(c
,shared
.cone
);
5087 lobj
= createZiplistObject();
5088 dbAdd(c
->db
,c
->argv
[1],lobj
);
5090 if (lobj
->type
!= REDIS_LIST
) {
5091 addReply(c
,shared
.wrongtypeerr
);
5094 if (handleClientsWaitingListPush(c
,c
->argv
[1],c
->argv
[2])) {
5095 addReply(c
,shared
.cone
);
5099 lPush(lobj
,c
->argv
[2],where
);
5100 addReplyLongLong(c
,lLength(lobj
));
5104 static void lpushCommand(redisClient
*c
) {
5105 pushGenericCommand(c
,REDIS_HEAD
);
5108 static void rpushCommand(redisClient
*c
) {
5109 pushGenericCommand(c
,REDIS_TAIL
);
5112 static void llenCommand(redisClient
*c
) {
5113 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
);
5114 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5115 addReplyUlong(c
,lLength(o
));
5118 static void lindexCommand(redisClient
*c
) {
5119 robj
*o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5120 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5121 int index
= atoi(c
->argv
[2]->ptr
);
5124 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5126 unsigned char *vstr
;
5129 p
= ziplistIndex(o
->ptr
,index
);
5130 if (ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
5132 value
= createStringObject((char*)vstr
,vlen
);
5134 value
= createStringObjectFromLongLong(vlong
);
5136 addReplyBulk(c
,value
);
5137 decrRefCount(value
);
5139 addReply(c
,shared
.nullbulk
);
5141 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5142 listNode
*ln
= listIndex(o
->ptr
,index
);
5144 value
= listNodeValue(ln
);
5145 addReplyBulk(c
,value
);
5147 addReply(c
,shared
.nullbulk
);
5150 redisPanic("Unknown list encoding");
5154 static void lsetCommand(redisClient
*c
) {
5155 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nokeyerr
);
5156 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5157 int index
= atoi(c
->argv
[2]->ptr
);
5158 robj
*value
= c
->argv
[3];
5160 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5161 unsigned char *p
, *zl
= o
->ptr
;
5162 p
= ziplistIndex(zl
,index
);
5164 addReply(c
,shared
.outofrangeerr
);
5166 o
->ptr
= ziplistDelete(o
->ptr
,&p
);
5167 value
= getDecodedObject(value
);
5168 o
->ptr
= ziplistInsert(o
->ptr
,p
,value
->ptr
,sdslen(value
->ptr
));
5169 decrRefCount(value
);
5170 addReply(c
,shared
.ok
);
5173 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5174 listNode
*ln
= listIndex(o
->ptr
,index
);
5176 addReply(c
,shared
.outofrangeerr
);
5178 decrRefCount((robj
*)listNodeValue(ln
));
5179 listNodeValue(ln
) = value
;
5180 incrRefCount(value
);
5181 addReply(c
,shared
.ok
);
5185 redisPanic("Unknown list encoding");
5189 static void popGenericCommand(redisClient
*c
, int where
) {
5190 robj
*o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
);
5191 if (o
== NULL
|| checkType(c
,o
,REDIS_LIST
)) return;
5193 robj
*value
= lPop(o
,where
);
5194 if (value
== NULL
) {
5195 addReply(c
,shared
.nullbulk
);
5197 addReplyBulk(c
,value
);
5198 decrRefCount(value
);
5199 if (lLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5204 static void lpopCommand(redisClient
*c
) {
5205 popGenericCommand(c
,REDIS_HEAD
);
5208 static void rpopCommand(redisClient
*c
) {
5209 popGenericCommand(c
,REDIS_TAIL
);
5212 static void lrangeCommand(redisClient
*c
) {
5214 int start
= atoi(c
->argv
[2]->ptr
);
5215 int end
= atoi(c
->argv
[3]->ptr
);
5220 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
5221 || checkType(c
,o
,REDIS_LIST
)) return;
5224 /* convert negative indexes */
5225 if (start
< 0) start
= llen
+start
;
5226 if (end
< 0) end
= llen
+end
;
5227 if (start
< 0) start
= 0;
5228 if (end
< 0) end
= 0;
5230 /* indexes sanity checks */
5231 if (start
> end
|| start
>= llen
) {
5232 /* Out of range start or start > end result in empty list */
5233 addReply(c
,shared
.emptymultibulk
);
5236 if (end
>= llen
) end
= llen
-1;
5237 rangelen
= (end
-start
)+1;
5239 /* Return the result in form of a multi-bulk reply */
5240 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",rangelen
));
5241 lIterator
*li
= lInitIterator(o
,start
,REDIS_TAIL
);
5242 for (j
= 0; j
< rangelen
; j
++) {
5243 redisAssert(lNext(li
,&entry
));
5244 value
= lGet(&entry
);
5245 addReplyBulk(c
,value
);
5246 decrRefCount(value
);
5248 lReleaseIterator(li
);
5251 static void ltrimCommand(redisClient
*c
) {
5253 int start
= atoi(c
->argv
[2]->ptr
);
5254 int end
= atoi(c
->argv
[3]->ptr
);
5256 int j
, ltrim
, rtrim
;
5260 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.ok
)) == NULL
||
5261 checkType(c
,o
,REDIS_LIST
)) return;
5264 /* convert negative indexes */
5265 if (start
< 0) start
= llen
+start
;
5266 if (end
< 0) end
= llen
+end
;
5267 if (start
< 0) start
= 0;
5268 if (end
< 0) end
= 0;
5270 /* indexes sanity checks */
5271 if (start
> end
|| start
>= llen
) {
5272 /* Out of range start or start > end result in empty list */
5276 if (end
>= llen
) end
= llen
-1;
5281 /* Remove list elements to perform the trim */
5282 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
5283 o
->ptr
= ziplistDeleteRange(o
->ptr
,0,ltrim
);
5284 o
->ptr
= ziplistDeleteRange(o
->ptr
,-rtrim
,rtrim
);
5285 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
5287 for (j
= 0; j
< ltrim
; j
++) {
5288 ln
= listFirst(list
);
5289 listDelNode(list
,ln
);
5291 for (j
= 0; j
< rtrim
; j
++) {
5292 ln
= listLast(list
);
5293 listDelNode(list
,ln
);
5296 redisPanic("Unknown list encoding");
5298 if (lLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5300 addReply(c
,shared
.ok
);
5303 static void lremCommand(redisClient
*c
) {
5304 robj
*subject
, *obj
= c
->argv
[3];
5305 int toremove
= atoi(c
->argv
[2]->ptr
);
5309 subject
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
);
5310 if (subject
== NULL
|| checkType(c
,subject
,REDIS_LIST
)) return;
5312 /* Make sure obj is raw when we're dealing with a ziplist */
5313 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5314 obj
= getDecodedObject(obj
);
5318 toremove
= -toremove
;
5319 li
= lInitIterator(subject
,-1,REDIS_HEAD
);
5321 li
= lInitIterator(subject
,0,REDIS_TAIL
);
5324 while (lNext(li
,&entry
)) {
5325 if (lEqual(&entry
,obj
)) {
5329 if (toremove
&& removed
== toremove
) break;
5332 lReleaseIterator(li
);
5334 /* Clean up raw encoded object */
5335 if (subject
->encoding
== REDIS_ENCODING_ZIPLIST
)
5338 if (lLength(subject
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5339 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",removed
));
5342 /* This is the semantic of this command:
5343 * RPOPLPUSH srclist dstlist:
5344 * IF LLEN(srclist) > 0
5345 * element = RPOP srclist
5346 * LPUSH dstlist element
5353 * The idea is to be able to get an element from a list in a reliable way
5354 * since the element is not just returned but pushed against another list
5355 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5357 static void rpoplpushcommand(redisClient
*c
) {
5359 if ((sobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5360 checkType(c
,sobj
,REDIS_LIST
)) return;
5362 if (lLength(sobj
) == 0) {
5363 addReply(c
,shared
.nullbulk
);
5365 robj
*dobj
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5366 if (dobj
&& checkType(c
,dobj
,REDIS_LIST
)) return;
5367 value
= lPop(sobj
,REDIS_TAIL
);
5369 /* Add the element to the target list (unless it's directly
5370 * passed to some BLPOP-ing client */
5371 if (!handleClientsWaitingListPush(c
,c
->argv
[2],value
)) {
5372 /* Create the list if the key does not exist */
5374 dobj
= createZiplistObject();
5375 dbAdd(c
->db
,c
->argv
[2],dobj
);
5377 lPush(dobj
,value
,REDIS_HEAD
);
5380 /* Send the element to the client as reply as well */
5381 addReplyBulk(c
,value
);
5383 /* lPop returns an object with its refcount incremented */
5384 decrRefCount(value
);
5386 /* Delete the source list when it is empty */
5387 if (lLength(sobj
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5392 /* ==================================== Sets ================================ */
5394 static void saddCommand(redisClient
*c
) {
5397 set
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5399 set
= createSetObject();
5400 dbAdd(c
->db
,c
->argv
[1],set
);
5402 if (set
->type
!= REDIS_SET
) {
5403 addReply(c
,shared
.wrongtypeerr
);
5407 if (dictAdd(set
->ptr
,c
->argv
[2],NULL
) == DICT_OK
) {
5408 incrRefCount(c
->argv
[2]);
5410 addReply(c
,shared
.cone
);
5412 addReply(c
,shared
.czero
);
5416 static void sremCommand(redisClient
*c
) {
5419 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5420 checkType(c
,set
,REDIS_SET
)) return;
5422 if (dictDelete(set
->ptr
,c
->argv
[2]) == DICT_OK
) {
5424 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5425 if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5426 addReply(c
,shared
.cone
);
5428 addReply(c
,shared
.czero
);
5432 static void smoveCommand(redisClient
*c
) {
5433 robj
*srcset
, *dstset
;
5435 srcset
= lookupKeyWrite(c
->db
,c
->argv
[1]);
5436 dstset
= lookupKeyWrite(c
->db
,c
->argv
[2]);
5438 /* If the source key does not exist return 0, if it's of the wrong type
5440 if (srcset
== NULL
|| srcset
->type
!= REDIS_SET
) {
5441 addReply(c
, srcset
? shared
.wrongtypeerr
: shared
.czero
);
5444 /* Error if the destination key is not a set as well */
5445 if (dstset
&& dstset
->type
!= REDIS_SET
) {
5446 addReply(c
,shared
.wrongtypeerr
);
5449 /* Remove the element from the source set */
5450 if (dictDelete(srcset
->ptr
,c
->argv
[3]) == DICT_ERR
) {
5451 /* Key not found in the src set! return zero */
5452 addReply(c
,shared
.czero
);
5455 if (dictSize((dict
*)srcset
->ptr
) == 0 && srcset
!= dstset
)
5456 dbDelete(c
->db
,c
->argv
[1]);
5458 /* Add the element to the destination set */
5460 dstset
= createSetObject();
5461 dbAdd(c
->db
,c
->argv
[2],dstset
);
5463 if (dictAdd(dstset
->ptr
,c
->argv
[3],NULL
) == DICT_OK
)
5464 incrRefCount(c
->argv
[3]);
5465 addReply(c
,shared
.cone
);
5468 static void sismemberCommand(redisClient
*c
) {
5471 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5472 checkType(c
,set
,REDIS_SET
)) return;
5474 if (dictFind(set
->ptr
,c
->argv
[2]))
5475 addReply(c
,shared
.cone
);
5477 addReply(c
,shared
.czero
);
5480 static void scardCommand(redisClient
*c
) {
5484 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
5485 checkType(c
,o
,REDIS_SET
)) return;
5488 addReplyUlong(c
,dictSize(s
));
5491 static void spopCommand(redisClient
*c
) {
5495 if ((set
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5496 checkType(c
,set
,REDIS_SET
)) return;
5498 de
= dictGetRandomKey(set
->ptr
);
5500 addReply(c
,shared
.nullbulk
);
5502 robj
*ele
= dictGetEntryKey(de
);
5504 addReplyBulk(c
,ele
);
5505 dictDelete(set
->ptr
,ele
);
5506 if (htNeedsResize(set
->ptr
)) dictResize(set
->ptr
);
5507 if (dictSize((dict
*)set
->ptr
) == 0) dbDelete(c
->db
,c
->argv
[1]);
5512 static void srandmemberCommand(redisClient
*c
) {
5516 if ((set
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
5517 checkType(c
,set
,REDIS_SET
)) return;
5519 de
= dictGetRandomKey(set
->ptr
);
5521 addReply(c
,shared
.nullbulk
);
5523 robj
*ele
= dictGetEntryKey(de
);
5525 addReplyBulk(c
,ele
);
5529 static int qsortCompareSetsByCardinality(const void *s1
, const void *s2
) {
5530 dict
**d1
= (void*) s1
, **d2
= (void*) s2
;
5532 return dictSize(*d1
)-dictSize(*d2
);
5535 static void sinterGenericCommand(redisClient
*c
, robj
**setskeys
, unsigned long setsnum
, robj
*dstkey
) {
5536 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5539 robj
*lenobj
= NULL
, *dstset
= NULL
;
5540 unsigned long j
, cardinality
= 0;
5542 for (j
= 0; j
< setsnum
; j
++) {
5546 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5547 lookupKeyRead(c
->db
,setskeys
[j
]);
5551 if (dbDelete(c
->db
,dstkey
))
5553 addReply(c
,shared
.czero
);
5555 addReply(c
,shared
.emptymultibulk
);
5559 if (setobj
->type
!= REDIS_SET
) {
5561 addReply(c
,shared
.wrongtypeerr
);
5564 dv
[j
] = setobj
->ptr
;
5566 /* Sort sets from the smallest to largest, this will improve our
5567 * algorithm's performace */
5568 qsort(dv
,setsnum
,sizeof(dict
*),qsortCompareSetsByCardinality
);
5570 /* The first thing we should output is the total number of elements...
5571 * since this is a multi-bulk write, but at this stage we don't know
5572 * the intersection set size, so we use a trick, append an empty object
5573 * to the output list and save the pointer to later modify it with the
5576 lenobj
= createObject(REDIS_STRING
,NULL
);
5578 decrRefCount(lenobj
);
5580 /* If we have a target key where to store the resulting set
5581 * create this key with an empty set inside */
5582 dstset
= createSetObject();
5585 /* Iterate all the elements of the first (smallest) set, and test
5586 * the element against all the other sets, if at least one set does
5587 * not include the element it is discarded */
5588 di
= dictGetIterator(dv
[0]);
5590 while((de
= dictNext(di
)) != NULL
) {
5593 for (j
= 1; j
< setsnum
; j
++)
5594 if (dictFind(dv
[j
],dictGetEntryKey(de
)) == NULL
) break;
5596 continue; /* at least one set does not contain the member */
5597 ele
= dictGetEntryKey(de
);
5599 addReplyBulk(c
,ele
);
5602 dictAdd(dstset
->ptr
,ele
,NULL
);
5606 dictReleaseIterator(di
);
5609 /* Store the resulting set into the target, if the intersection
5610 * is not an empty set. */
5611 dbDelete(c
->db
,dstkey
);
5612 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5613 dbAdd(c
->db
,dstkey
,dstset
);
5614 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5616 decrRefCount(dstset
);
5617 addReply(c
,shared
.czero
);
5621 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",cardinality
);
5626 static void sinterCommand(redisClient
*c
) {
5627 sinterGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
);
5630 static void sinterstoreCommand(redisClient
*c
) {
5631 sinterGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1]);
5634 #define REDIS_OP_UNION 0
5635 #define REDIS_OP_DIFF 1
5636 #define REDIS_OP_INTER 2
5638 static void sunionDiffGenericCommand(redisClient
*c
, robj
**setskeys
, int setsnum
, robj
*dstkey
, int op
) {
5639 dict
**dv
= zmalloc(sizeof(dict
*)*setsnum
);
5642 robj
*dstset
= NULL
;
5643 int j
, cardinality
= 0;
5645 for (j
= 0; j
< setsnum
; j
++) {
5649 lookupKeyWrite(c
->db
,setskeys
[j
]) :
5650 lookupKeyRead(c
->db
,setskeys
[j
]);
5655 if (setobj
->type
!= REDIS_SET
) {
5657 addReply(c
,shared
.wrongtypeerr
);
5660 dv
[j
] = setobj
->ptr
;
5663 /* We need a temp set object to store our union. If the dstkey
5664 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5665 * this set object will be the resulting object to set into the target key*/
5666 dstset
= createSetObject();
5668 /* Iterate all the elements of all the sets, add every element a single
5669 * time to the result set */
5670 for (j
= 0; j
< setsnum
; j
++) {
5671 if (op
== REDIS_OP_DIFF
&& j
== 0 && !dv
[j
]) break; /* result set is empty */
5672 if (!dv
[j
]) continue; /* non existing keys are like empty sets */
5674 di
= dictGetIterator(dv
[j
]);
5676 while((de
= dictNext(di
)) != NULL
) {
5679 /* dictAdd will not add the same element multiple times */
5680 ele
= dictGetEntryKey(de
);
5681 if (op
== REDIS_OP_UNION
|| j
== 0) {
5682 if (dictAdd(dstset
->ptr
,ele
,NULL
) == DICT_OK
) {
5686 } else if (op
== REDIS_OP_DIFF
) {
5687 if (dictDelete(dstset
->ptr
,ele
) == DICT_OK
) {
5692 dictReleaseIterator(di
);
5694 /* result set is empty? Exit asap. */
5695 if (op
== REDIS_OP_DIFF
&& cardinality
== 0) break;
5698 /* Output the content of the resulting set, if not in STORE mode */
5700 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",cardinality
));
5701 di
= dictGetIterator(dstset
->ptr
);
5702 while((de
= dictNext(di
)) != NULL
) {
5705 ele
= dictGetEntryKey(de
);
5706 addReplyBulk(c
,ele
);
5708 dictReleaseIterator(di
);
5709 decrRefCount(dstset
);
5711 /* If we have a target key where to store the resulting set
5712 * create this key with the result set inside */
5713 dbDelete(c
->db
,dstkey
);
5714 if (dictSize((dict
*)dstset
->ptr
) > 0) {
5715 dbAdd(c
->db
,dstkey
,dstset
);
5716 addReplyLongLong(c
,dictSize((dict
*)dstset
->ptr
));
5718 decrRefCount(dstset
);
5719 addReply(c
,shared
.czero
);
5726 static void sunionCommand(redisClient
*c
) {
5727 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_UNION
);
5730 static void sunionstoreCommand(redisClient
*c
) {
5731 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_UNION
);
5734 static void sdiffCommand(redisClient
*c
) {
5735 sunionDiffGenericCommand(c
,c
->argv
+1,c
->argc
-1,NULL
,REDIS_OP_DIFF
);
5738 static void sdiffstoreCommand(redisClient
*c
) {
5739 sunionDiffGenericCommand(c
,c
->argv
+2,c
->argc
-2,c
->argv
[1],REDIS_OP_DIFF
);
5742 /* ==================================== ZSets =============================== */
5744 /* ZSETs are ordered sets using two data structures to hold the same elements
5745 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5748 * The elements are added to an hash table mapping Redis objects to scores.
5749 * At the same time the elements are added to a skip list mapping scores
5750 * to Redis objects (so objects are sorted by scores in this "view"). */
5752 /* This skiplist implementation is almost a C translation of the original
5753 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5754 * Alternative to Balanced Trees", modified in three ways:
5755 * a) this implementation allows for repeated values.
5756 * b) the comparison is not just by key (our 'score') but by satellite data.
5757 * c) there is a back pointer, so it's a doubly linked list with the back
5758 * pointers being only at "level 1". This allows to traverse the list
5759 * from tail to head, useful for ZREVRANGE. */
5761 static zskiplistNode
*zslCreateNode(int level
, double score
, robj
*obj
) {
5762 zskiplistNode
*zn
= zmalloc(sizeof(*zn
));
5764 zn
->forward
= zmalloc(sizeof(zskiplistNode
*) * level
);
5766 zn
->span
= zmalloc(sizeof(unsigned int) * (level
- 1));
5774 static zskiplist
*zslCreate(void) {
5778 zsl
= zmalloc(sizeof(*zsl
));
5781 zsl
->header
= zslCreateNode(ZSKIPLIST_MAXLEVEL
,0,NULL
);
5782 for (j
= 0; j
< ZSKIPLIST_MAXLEVEL
; j
++) {
5783 zsl
->header
->forward
[j
] = NULL
;
5785 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5786 if (j
< ZSKIPLIST_MAXLEVEL
-1)
5787 zsl
->header
->span
[j
] = 0;
5789 zsl
->header
->backward
= NULL
;
5794 static void zslFreeNode(zskiplistNode
*node
) {
5795 decrRefCount(node
->obj
);
5796 zfree(node
->forward
);
5801 static void zslFree(zskiplist
*zsl
) {
5802 zskiplistNode
*node
= zsl
->header
->forward
[0], *next
;
5804 zfree(zsl
->header
->forward
);
5805 zfree(zsl
->header
->span
);
5808 next
= node
->forward
[0];
5815 static int zslRandomLevel(void) {
5817 while ((random()&0xFFFF) < (ZSKIPLIST_P
* 0xFFFF))
5819 return (level
<ZSKIPLIST_MAXLEVEL
) ? level
: ZSKIPLIST_MAXLEVEL
;
5822 static void zslInsert(zskiplist
*zsl
, double score
, robj
*obj
) {
5823 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5824 unsigned int rank
[ZSKIPLIST_MAXLEVEL
];
5828 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5829 /* store rank that is crossed to reach the insert position */
5830 rank
[i
] = i
== (zsl
->level
-1) ? 0 : rank
[i
+1];
5832 while (x
->forward
[i
] &&
5833 (x
->forward
[i
]->score
< score
||
5834 (x
->forward
[i
]->score
== score
&&
5835 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0))) {
5836 rank
[i
] += i
> 0 ? x
->span
[i
-1] : 1;
5841 /* we assume the key is not already inside, since we allow duplicated
5842 * scores, and the re-insertion of score and redis object should never
5843 * happpen since the caller of zslInsert() should test in the hash table
5844 * if the element is already inside or not. */
5845 level
= zslRandomLevel();
5846 if (level
> zsl
->level
) {
5847 for (i
= zsl
->level
; i
< level
; i
++) {
5849 update
[i
] = zsl
->header
;
5850 update
[i
]->span
[i
-1] = zsl
->length
;
5854 x
= zslCreateNode(level
,score
,obj
);
5855 for (i
= 0; i
< level
; i
++) {
5856 x
->forward
[i
] = update
[i
]->forward
[i
];
5857 update
[i
]->forward
[i
] = x
;
5859 /* update span covered by update[i] as x is inserted here */
5861 x
->span
[i
-1] = update
[i
]->span
[i
-1] - (rank
[0] - rank
[i
]);
5862 update
[i
]->span
[i
-1] = (rank
[0] - rank
[i
]) + 1;
5866 /* increment span for untouched levels */
5867 for (i
= level
; i
< zsl
->level
; i
++) {
5868 update
[i
]->span
[i
-1]++;
5871 x
->backward
= (update
[0] == zsl
->header
) ? NULL
: update
[0];
5873 x
->forward
[0]->backward
= x
;
5879 /* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5880 void zslDeleteNode(zskiplist
*zsl
, zskiplistNode
*x
, zskiplistNode
**update
) {
5882 for (i
= 0; i
< zsl
->level
; i
++) {
5883 if (update
[i
]->forward
[i
] == x
) {
5885 update
[i
]->span
[i
-1] += x
->span
[i
-1] - 1;
5887 update
[i
]->forward
[i
] = x
->forward
[i
];
5889 /* invariant: i > 0, because update[0]->forward[0]
5890 * is always equal to x */
5891 update
[i
]->span
[i
-1] -= 1;
5894 if (x
->forward
[0]) {
5895 x
->forward
[0]->backward
= x
->backward
;
5897 zsl
->tail
= x
->backward
;
5899 while(zsl
->level
> 1 && zsl
->header
->forward
[zsl
->level
-1] == NULL
)
5904 /* Delete an element with matching score/object from the skiplist. */
5905 static int zslDelete(zskiplist
*zsl
, double score
, robj
*obj
) {
5906 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5910 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5911 while (x
->forward
[i
] &&
5912 (x
->forward
[i
]->score
< score
||
5913 (x
->forward
[i
]->score
== score
&&
5914 compareStringObjects(x
->forward
[i
]->obj
,obj
) < 0)))
5918 /* We may have multiple elements with the same score, what we need
5919 * is to find the element with both the right score and object. */
5921 if (x
&& score
== x
->score
&& equalStringObjects(x
->obj
,obj
)) {
5922 zslDeleteNode(zsl
, x
, update
);
5926 return 0; /* not found */
5928 return 0; /* not found */
5931 /* Delete all the elements with score between min and max from the skiplist.
5932 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5933 * Note that this function takes the reference to the hash table view of the
5934 * sorted set, in order to remove the elements from the hash table too. */
5935 static unsigned long zslDeleteRangeByScore(zskiplist
*zsl
, double min
, double max
, dict
*dict
) {
5936 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5937 unsigned long removed
= 0;
5941 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5942 while (x
->forward
[i
] && x
->forward
[i
]->score
< min
)
5946 /* We may have multiple elements with the same score, what we need
5947 * is to find the element with both the right score and object. */
5949 while (x
&& x
->score
<= max
) {
5950 zskiplistNode
*next
= x
->forward
[0];
5951 zslDeleteNode(zsl
, x
, update
);
5952 dictDelete(dict
,x
->obj
);
5957 return removed
; /* not found */
5960 /* Delete all the elements with rank between start and end from the skiplist.
5961 * Start and end are inclusive. Note that start and end need to be 1-based */
5962 static unsigned long zslDeleteRangeByRank(zskiplist
*zsl
, unsigned int start
, unsigned int end
, dict
*dict
) {
5963 zskiplistNode
*update
[ZSKIPLIST_MAXLEVEL
], *x
;
5964 unsigned long traversed
= 0, removed
= 0;
5968 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5969 while (x
->forward
[i
] && (traversed
+ (i
> 0 ? x
->span
[i
-1] : 1)) < start
) {
5970 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
5978 while (x
&& traversed
<= end
) {
5979 zskiplistNode
*next
= x
->forward
[0];
5980 zslDeleteNode(zsl
, x
, update
);
5981 dictDelete(dict
,x
->obj
);
5990 /* Find the first node having a score equal or greater than the specified one.
5991 * Returns NULL if there is no match. */
5992 static zskiplistNode
*zslFirstWithScore(zskiplist
*zsl
, double score
) {
5997 for (i
= zsl
->level
-1; i
>= 0; i
--) {
5998 while (x
->forward
[i
] && x
->forward
[i
]->score
< score
)
6001 /* We may have multiple elements with the same score, what we need
6002 * is to find the element with both the right score and object. */
6003 return x
->forward
[0];
6006 /* Find the rank for an element by both score and key.
6007 * Returns 0 when the element cannot be found, rank otherwise.
6008 * Note that the rank is 1-based due to the span of zsl->header to the
6010 static unsigned long zslGetRank(zskiplist
*zsl
, double score
, robj
*o
) {
6012 unsigned long rank
= 0;
6016 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6017 while (x
->forward
[i
] &&
6018 (x
->forward
[i
]->score
< score
||
6019 (x
->forward
[i
]->score
== score
&&
6020 compareStringObjects(x
->forward
[i
]->obj
,o
) <= 0))) {
6021 rank
+= i
> 0 ? x
->span
[i
-1] : 1;
6025 /* x might be equal to zsl->header, so test if obj is non-NULL */
6026 if (x
->obj
&& equalStringObjects(x
->obj
,o
)) {
6033 /* Finds an element by its rank. The rank argument needs to be 1-based. */
6034 zskiplistNode
* zslGetElementByRank(zskiplist
*zsl
, unsigned long rank
) {
6036 unsigned long traversed
= 0;
6040 for (i
= zsl
->level
-1; i
>= 0; i
--) {
6041 while (x
->forward
[i
] && (traversed
+ (i
>0 ? x
->span
[i
-1] : 1)) <= rank
)
6043 traversed
+= i
> 0 ? x
->span
[i
-1] : 1;
6046 if (traversed
== rank
) {
6053 /* The actual Z-commands implementations */
6055 /* This generic command implements both ZADD and ZINCRBY.
6056 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
6057 * the increment if the operation is a ZINCRBY (doincrement == 1). */
6058 static void zaddGenericCommand(redisClient
*c
, robj
*key
, robj
*ele
, double scoreval
, int doincrement
) {
6063 if (isnan(scoreval
)) {
6064 addReplySds(c
,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
6068 zsetobj
= lookupKeyWrite(c
->db
,key
);
6069 if (zsetobj
== NULL
) {
6070 zsetobj
= createZsetObject();
6071 dbAdd(c
->db
,key
,zsetobj
);
6073 if (zsetobj
->type
!= REDIS_ZSET
) {
6074 addReply(c
,shared
.wrongtypeerr
);
6080 /* Ok now since we implement both ZADD and ZINCRBY here the code
6081 * needs to handle the two different conditions. It's all about setting
6082 * '*score', that is, the new score to set, to the right value. */
6083 score
= zmalloc(sizeof(double));
6087 /* Read the old score. If the element was not present starts from 0 */
6088 de
= dictFind(zs
->dict
,ele
);
6090 double *oldscore
= dictGetEntryVal(de
);
6091 *score
= *oldscore
+ scoreval
;
6095 if (isnan(*score
)) {
6097 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
6099 /* Note that we don't need to check if the zset may be empty and
6100 * should be removed here, as we can only obtain Nan as score if
6101 * there was already an element in the sorted set. */
6108 /* What follows is a simple remove and re-insert operation that is common
6109 * to both ZADD and ZINCRBY... */
6110 if (dictAdd(zs
->dict
,ele
,score
) == DICT_OK
) {
6111 /* case 1: New element */
6112 incrRefCount(ele
); /* added to hash */
6113 zslInsert(zs
->zsl
,*score
,ele
);
6114 incrRefCount(ele
); /* added to skiplist */
6117 addReplyDouble(c
,*score
);
6119 addReply(c
,shared
.cone
);
6124 /* case 2: Score update operation */
6125 de
= dictFind(zs
->dict
,ele
);
6126 redisAssert(de
!= NULL
);
6127 oldscore
= dictGetEntryVal(de
);
6128 if (*score
!= *oldscore
) {
6131 /* Remove and insert the element in the skip list with new score */
6132 deleted
= zslDelete(zs
->zsl
,*oldscore
,ele
);
6133 redisAssert(deleted
!= 0);
6134 zslInsert(zs
->zsl
,*score
,ele
);
6136 /* Update the score in the hash table */
6137 dictReplace(zs
->dict
,ele
,score
);
6143 addReplyDouble(c
,*score
);
6145 addReply(c
,shared
.czero
);
6149 static void zaddCommand(redisClient
*c
) {
6152 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6153 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,0);
6156 static void zincrbyCommand(redisClient
*c
) {
6159 if (getDoubleFromObjectOrReply(c
, c
->argv
[2], &scoreval
, NULL
) != REDIS_OK
) return;
6160 zaddGenericCommand(c
,c
->argv
[1],c
->argv
[3],scoreval
,1);
6163 static void zremCommand(redisClient
*c
) {
6170 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6171 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6174 de
= dictFind(zs
->dict
,c
->argv
[2]);
6176 addReply(c
,shared
.czero
);
6179 /* Delete from the skiplist */
6180 oldscore
= dictGetEntryVal(de
);
6181 deleted
= zslDelete(zs
->zsl
,*oldscore
,c
->argv
[2]);
6182 redisAssert(deleted
!= 0);
6184 /* Delete from the hash table */
6185 dictDelete(zs
->dict
,c
->argv
[2]);
6186 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6187 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6189 addReply(c
,shared
.cone
);
6192 static void zremrangebyscoreCommand(redisClient
*c
) {
6199 if ((getDoubleFromObjectOrReply(c
, c
->argv
[2], &min
, NULL
) != REDIS_OK
) ||
6200 (getDoubleFromObjectOrReply(c
, c
->argv
[3], &max
, NULL
) != REDIS_OK
)) return;
6202 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6203 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6206 deleted
= zslDeleteRangeByScore(zs
->zsl
,min
,max
,zs
->dict
);
6207 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6208 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6209 server
.dirty
+= deleted
;
6210 addReplyLongLong(c
,deleted
);
6213 static void zremrangebyrankCommand(redisClient
*c
) {
6221 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6222 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6224 if ((zsetobj
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6225 checkType(c
,zsetobj
,REDIS_ZSET
)) return;
6227 llen
= zs
->zsl
->length
;
6229 /* convert negative indexes */
6230 if (start
< 0) start
= llen
+start
;
6231 if (end
< 0) end
= llen
+end
;
6232 if (start
< 0) start
= 0;
6233 if (end
< 0) end
= 0;
6235 /* indexes sanity checks */
6236 if (start
> end
|| start
>= llen
) {
6237 addReply(c
,shared
.czero
);
6240 if (end
>= llen
) end
= llen
-1;
6242 /* increment start and end because zsl*Rank functions
6243 * use 1-based rank */
6244 deleted
= zslDeleteRangeByRank(zs
->zsl
,start
+1,end
+1,zs
->dict
);
6245 if (htNeedsResize(zs
->dict
)) dictResize(zs
->dict
);
6246 if (dictSize(zs
->dict
) == 0) dbDelete(c
->db
,c
->argv
[1]);
6247 server
.dirty
+= deleted
;
6248 addReplyLongLong(c
, deleted
);
6256 static int qsortCompareZsetopsrcByCardinality(const void *s1
, const void *s2
) {
6257 zsetopsrc
*d1
= (void*) s1
, *d2
= (void*) s2
;
6258 unsigned long size1
, size2
;
6259 size1
= d1
->dict
? dictSize(d1
->dict
) : 0;
6260 size2
= d2
->dict
? dictSize(d2
->dict
) : 0;
6261 return size1
- size2
;
6264 #define REDIS_AGGR_SUM 1
6265 #define REDIS_AGGR_MIN 2
6266 #define REDIS_AGGR_MAX 3
6267 #define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
6269 inline static void zunionInterAggregate(double *target
, double val
, int aggregate
) {
6270 if (aggregate
== REDIS_AGGR_SUM
) {
6271 *target
= *target
+ val
;
6272 } else if (aggregate
== REDIS_AGGR_MIN
) {
6273 *target
= val
< *target
? val
: *target
;
6274 } else if (aggregate
== REDIS_AGGR_MAX
) {
6275 *target
= val
> *target
? val
: *target
;
6278 redisPanic("Unknown ZUNION/INTER aggregate type");
6282 static void zunionInterGenericCommand(redisClient
*c
, robj
*dstkey
, int op
) {
6284 int aggregate
= REDIS_AGGR_SUM
;
6291 /* expect setnum input keys to be given */
6292 setnum
= atoi(c
->argv
[2]->ptr
);
6294 addReplySds(c
,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
6298 /* test if the expected number of keys would overflow */
6299 if (3+setnum
> c
->argc
) {
6300 addReply(c
,shared
.syntaxerr
);
6304 /* read keys to be used for input */
6305 src
= zmalloc(sizeof(zsetopsrc
) * setnum
);
6306 for (i
= 0, j
= 3; i
< setnum
; i
++, j
++) {
6307 robj
*obj
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
6311 if (obj
->type
== REDIS_ZSET
) {
6312 src
[i
].dict
= ((zset
*)obj
->ptr
)->dict
;
6313 } else if (obj
->type
== REDIS_SET
) {
6314 src
[i
].dict
= (obj
->ptr
);
6317 addReply(c
,shared
.wrongtypeerr
);
6322 /* default all weights to 1 */
6323 src
[i
].weight
= 1.0;
6326 /* parse optional extra arguments */
6328 int remaining
= c
->argc
- j
;
6331 if (remaining
>= (setnum
+ 1) && !strcasecmp(c
->argv
[j
]->ptr
,"weights")) {
6333 for (i
= 0; i
< setnum
; i
++, j
++, remaining
--) {
6334 if (getDoubleFromObjectOrReply(c
, c
->argv
[j
], &src
[i
].weight
, NULL
) != REDIS_OK
)
6337 } else if (remaining
>= 2 && !strcasecmp(c
->argv
[j
]->ptr
,"aggregate")) {
6339 if (!strcasecmp(c
->argv
[j
]->ptr
,"sum")) {
6340 aggregate
= REDIS_AGGR_SUM
;
6341 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"min")) {
6342 aggregate
= REDIS_AGGR_MIN
;
6343 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"max")) {
6344 aggregate
= REDIS_AGGR_MAX
;
6347 addReply(c
,shared
.syntaxerr
);
6353 addReply(c
,shared
.syntaxerr
);
6359 /* sort sets from the smallest to largest, this will improve our
6360 * algorithm's performance */
6361 qsort(src
,setnum
,sizeof(zsetopsrc
),qsortCompareZsetopsrcByCardinality
);
6363 dstobj
= createZsetObject();
6364 dstzset
= dstobj
->ptr
;
6366 if (op
== REDIS_OP_INTER
) {
6367 /* skip going over all entries if the smallest zset is NULL or empty */
6368 if (src
[0].dict
&& dictSize(src
[0].dict
) > 0) {
6369 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6370 * from small to large, all src[i > 0].dict are non-empty too */
6371 di
= dictGetIterator(src
[0].dict
);
6372 while((de
= dictNext(di
)) != NULL
) {
6373 double *score
= zmalloc(sizeof(double)), value
;
6374 *score
= src
[0].weight
* zunionInterDictValue(de
);
6376 for (j
= 1; j
< setnum
; j
++) {
6377 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6379 value
= src
[j
].weight
* zunionInterDictValue(other
);
6380 zunionInterAggregate(score
, value
, aggregate
);
6386 /* skip entry when not present in every source dict */
6390 robj
*o
= dictGetEntryKey(de
);
6391 dictAdd(dstzset
->dict
,o
,score
);
6392 incrRefCount(o
); /* added to dictionary */
6393 zslInsert(dstzset
->zsl
,*score
,o
);
6394 incrRefCount(o
); /* added to skiplist */
6397 dictReleaseIterator(di
);
6399 } else if (op
== REDIS_OP_UNION
) {
6400 for (i
= 0; i
< setnum
; i
++) {
6401 if (!src
[i
].dict
) continue;
6403 di
= dictGetIterator(src
[i
].dict
);
6404 while((de
= dictNext(di
)) != NULL
) {
6405 /* skip key when already processed */
6406 if (dictFind(dstzset
->dict
,dictGetEntryKey(de
)) != NULL
) continue;
6408 double *score
= zmalloc(sizeof(double)), value
;
6409 *score
= src
[i
].weight
* zunionInterDictValue(de
);
6411 /* because the zsets are sorted by size, its only possible
6412 * for sets at larger indices to hold this entry */
6413 for (j
= (i
+1); j
< setnum
; j
++) {
6414 dictEntry
*other
= dictFind(src
[j
].dict
,dictGetEntryKey(de
));
6416 value
= src
[j
].weight
* zunionInterDictValue(other
);
6417 zunionInterAggregate(score
, value
, aggregate
);
6421 robj
*o
= dictGetEntryKey(de
);
6422 dictAdd(dstzset
->dict
,o
,score
);
6423 incrRefCount(o
); /* added to dictionary */
6424 zslInsert(dstzset
->zsl
,*score
,o
);
6425 incrRefCount(o
); /* added to skiplist */
6427 dictReleaseIterator(di
);
6430 /* unknown operator */
6431 redisAssert(op
== REDIS_OP_INTER
|| op
== REDIS_OP_UNION
);
6434 dbDelete(c
->db
,dstkey
);
6435 if (dstzset
->zsl
->length
) {
6436 dbAdd(c
->db
,dstkey
,dstobj
);
6437 addReplyLongLong(c
, dstzset
->zsl
->length
);
6440 decrRefCount(dstobj
);
6441 addReply(c
, shared
.czero
);
6446 static void zunionstoreCommand(redisClient
*c
) {
6447 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_UNION
);
6450 static void zinterstoreCommand(redisClient
*c
) {
6451 zunionInterGenericCommand(c
,c
->argv
[1], REDIS_OP_INTER
);
6454 static void zrangeGenericCommand(redisClient
*c
, int reverse
) {
6466 if ((getLongFromObjectOrReply(c
, c
->argv
[2], &start
, NULL
) != REDIS_OK
) ||
6467 (getLongFromObjectOrReply(c
, c
->argv
[3], &end
, NULL
) != REDIS_OK
)) return;
6469 if (c
->argc
== 5 && !strcasecmp(c
->argv
[4]->ptr
,"withscores")) {
6471 } else if (c
->argc
>= 5) {
6472 addReply(c
,shared
.syntaxerr
);
6476 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
6477 || checkType(c
,o
,REDIS_ZSET
)) return;
6482 /* convert negative indexes */
6483 if (start
< 0) start
= llen
+start
;
6484 if (end
< 0) end
= llen
+end
;
6485 if (start
< 0) start
= 0;
6486 if (end
< 0) end
= 0;
6488 /* indexes sanity checks */
6489 if (start
> end
|| start
>= llen
) {
6490 /* Out of range start or start > end result in empty list */
6491 addReply(c
,shared
.emptymultibulk
);
6494 if (end
>= llen
) end
= llen
-1;
6495 rangelen
= (end
-start
)+1;
6497 /* check if starting point is trivial, before searching
6498 * the element in log(N) time */
6500 ln
= start
== 0 ? zsl
->tail
: zslGetElementByRank(zsl
, llen
-start
);
6503 zsl
->header
->forward
[0] : zslGetElementByRank(zsl
, start
+1);
6506 /* Return the result in form of a multi-bulk reply */
6507 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",
6508 withscores
? (rangelen
*2) : rangelen
));
6509 for (j
= 0; j
< rangelen
; j
++) {
6511 addReplyBulk(c
,ele
);
6513 addReplyDouble(c
,ln
->score
);
6514 ln
= reverse
? ln
->backward
: ln
->forward
[0];
6518 static void zrangeCommand(redisClient
*c
) {
6519 zrangeGenericCommand(c
,0);
6522 static void zrevrangeCommand(redisClient
*c
) {
6523 zrangeGenericCommand(c
,1);
6526 /* This command implements both ZRANGEBYSCORE and ZCOUNT.
6527 * If justcount is non-zero, just the count is returned. */
6528 static void genericZrangebyscoreCommand(redisClient
*c
, int justcount
) {
6531 int minex
= 0, maxex
= 0; /* are min or max exclusive? */
6532 int offset
= 0, limit
= -1;
6536 /* Parse the min-max interval. If one of the values is prefixed
6537 * by the "(" character, it's considered "open". For instance
6538 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6539 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6540 if (((char*)c
->argv
[2]->ptr
)[0] == '(') {
6541 min
= strtod((char*)c
->argv
[2]->ptr
+1,NULL
);
6544 min
= strtod(c
->argv
[2]->ptr
,NULL
);
6546 if (((char*)c
->argv
[3]->ptr
)[0] == '(') {
6547 max
= strtod((char*)c
->argv
[3]->ptr
+1,NULL
);
6550 max
= strtod(c
->argv
[3]->ptr
,NULL
);
6553 /* Parse "WITHSCORES": note that if the command was called with
6554 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6555 * enter the following paths to parse WITHSCORES and LIMIT. */
6556 if (c
->argc
== 5 || c
->argc
== 8) {
6557 if (strcasecmp(c
->argv
[c
->argc
-1]->ptr
,"withscores") == 0)
6562 if (c
->argc
!= (4 + withscores
) && c
->argc
!= (7 + withscores
))
6566 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
6571 if (c
->argc
== (7 + withscores
) && strcasecmp(c
->argv
[4]->ptr
,"limit")) {
6572 addReply(c
,shared
.syntaxerr
);
6574 } else if (c
->argc
== (7 + withscores
)) {
6575 offset
= atoi(c
->argv
[5]->ptr
);
6576 limit
= atoi(c
->argv
[6]->ptr
);
6577 if (offset
< 0) offset
= 0;
6580 /* Ok, lookup the key and get the range */
6581 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
6583 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6585 if (o
->type
!= REDIS_ZSET
) {
6586 addReply(c
,shared
.wrongtypeerr
);
6588 zset
*zsetobj
= o
->ptr
;
6589 zskiplist
*zsl
= zsetobj
->zsl
;
6591 robj
*ele
, *lenobj
= NULL
;
6592 unsigned long rangelen
= 0;
6594 /* Get the first node with the score >= min, or with
6595 * score > min if 'minex' is true. */
6596 ln
= zslFirstWithScore(zsl
,min
);
6597 while (minex
&& ln
&& ln
->score
== min
) ln
= ln
->forward
[0];
6600 /* No element matching the speciifed interval */
6601 addReply(c
,justcount
? shared
.czero
: shared
.emptymultibulk
);
6605 /* We don't know in advance how many matching elements there
6606 * are in the list, so we push this object that will represent
6607 * the multi-bulk length in the output buffer, and will "fix"
6610 lenobj
= createObject(REDIS_STRING
,NULL
);
6612 decrRefCount(lenobj
);
6615 while(ln
&& (maxex
? (ln
->score
< max
) : (ln
->score
<= max
))) {
6618 ln
= ln
->forward
[0];
6621 if (limit
== 0) break;
6624 addReplyBulk(c
,ele
);
6626 addReplyDouble(c
,ln
->score
);
6628 ln
= ln
->forward
[0];
6630 if (limit
> 0) limit
--;
6633 addReplyLongLong(c
,(long)rangelen
);
6635 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",
6636 withscores
? (rangelen
*2) : rangelen
);
6642 static void zrangebyscoreCommand(redisClient
*c
) {
6643 genericZrangebyscoreCommand(c
,0);
6646 static void zcountCommand(redisClient
*c
) {
6647 genericZrangebyscoreCommand(c
,1);
6650 static void zcardCommand(redisClient
*c
) {
6654 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
6655 checkType(c
,o
,REDIS_ZSET
)) return;
6658 addReplyUlong(c
,zs
->zsl
->length
);
6661 static void zscoreCommand(redisClient
*c
) {
6666 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6667 checkType(c
,o
,REDIS_ZSET
)) return;
6670 de
= dictFind(zs
->dict
,c
->argv
[2]);
6672 addReply(c
,shared
.nullbulk
);
6674 double *score
= dictGetEntryVal(de
);
6676 addReplyDouble(c
,*score
);
6680 static void zrankGenericCommand(redisClient
*c
, int reverse
) {
6688 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6689 checkType(c
,o
,REDIS_ZSET
)) return;
6693 de
= dictFind(zs
->dict
,c
->argv
[2]);
6695 addReply(c
,shared
.nullbulk
);
6699 score
= dictGetEntryVal(de
);
6700 rank
= zslGetRank(zsl
, *score
, c
->argv
[2]);
6703 addReplyLongLong(c
, zsl
->length
- rank
);
6705 addReplyLongLong(c
, rank
-1);
6708 addReply(c
,shared
.nullbulk
);
6712 static void zrankCommand(redisClient
*c
) {
6713 zrankGenericCommand(c
, 0);
6716 static void zrevrankCommand(redisClient
*c
) {
6717 zrankGenericCommand(c
, 1);
6720 /* ========================= Hashes utility functions ======================= */
6721 #define REDIS_HASH_KEY 1
6722 #define REDIS_HASH_VALUE 2
6724 /* Check the length of a number of objects to see if we need to convert a
6725 * zipmap to a real hash. Note that we only check string encoded objects
6726 * as their string length can be queried in constant time. */
6727 static void hashTryConversion(robj
*subject
, robj
**argv
, int start
, int end
) {
6729 if (subject
->encoding
!= REDIS_ENCODING_ZIPMAP
) return;
6731 for (i
= start
; i
<= end
; i
++) {
6732 if (argv
[i
]->encoding
== REDIS_ENCODING_RAW
&&
6733 sdslen(argv
[i
]->ptr
) > server
.hash_max_zipmap_value
)
6735 convertToRealHash(subject
);
6741 /* Encode given objects in-place when the hash uses a dict. */
6742 static void hashTryObjectEncoding(robj
*subject
, robj
**o1
, robj
**o2
) {
6743 if (subject
->encoding
== REDIS_ENCODING_HT
) {
6744 if (o1
) *o1
= tryObjectEncoding(*o1
);
6745 if (o2
) *o2
= tryObjectEncoding(*o2
);
6749 /* Get the value from a hash identified by key. Returns either a string
6750 * object or NULL if the value cannot be found. The refcount of the object
6751 * is always increased by 1 when the value was found. */
6752 static robj
*hashGet(robj
*o
, robj
*key
) {
6754 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6757 key
= getDecodedObject(key
);
6758 if (zipmapGet(o
->ptr
,key
->ptr
,sdslen(key
->ptr
),&v
,&vlen
)) {
6759 value
= createStringObject((char*)v
,vlen
);
6763 dictEntry
*de
= dictFind(o
->ptr
,key
);
6765 value
= dictGetEntryVal(de
);
6766 incrRefCount(value
);
6772 /* Test if the key exists in the given hash. Returns 1 if the key
6773 * exists and 0 when it doesn't. */
6774 static int hashExists(robj
*o
, robj
*key
) {
6775 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6776 key
= getDecodedObject(key
);
6777 if (zipmapExists(o
->ptr
,key
->ptr
,sdslen(key
->ptr
))) {
6783 if (dictFind(o
->ptr
,key
) != NULL
) {
6790 /* Add an element, discard the old if the key already exists.
6791 * Return 0 on insert and 1 on update. */
6792 static int hashSet(robj
*o
, robj
*key
, robj
*value
) {
6794 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6795 key
= getDecodedObject(key
);
6796 value
= getDecodedObject(value
);
6797 o
->ptr
= zipmapSet(o
->ptr
,
6798 key
->ptr
,sdslen(key
->ptr
),
6799 value
->ptr
,sdslen(value
->ptr
), &update
);
6801 decrRefCount(value
);
6803 /* Check if the zipmap needs to be upgraded to a real hash table */
6804 if (zipmapLen(o
->ptr
) > server
.hash_max_zipmap_entries
)
6805 convertToRealHash(o
);
6807 if (dictReplace(o
->ptr
,key
,value
)) {
6814 incrRefCount(value
);
6819 /* Delete an element from a hash.
6820 * Return 1 on deleted and 0 on not found. */
6821 static int hashDelete(robj
*o
, robj
*key
) {
6823 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6824 key
= getDecodedObject(key
);
6825 o
->ptr
= zipmapDel(o
->ptr
,key
->ptr
,sdslen(key
->ptr
), &deleted
);
6828 deleted
= dictDelete((dict
*)o
->ptr
,key
) == DICT_OK
;
6829 /* Always check if the dictionary needs a resize after a delete. */
6830 if (deleted
&& htNeedsResize(o
->ptr
)) dictResize(o
->ptr
);
6835 /* Return the number of elements in a hash. */
6836 static unsigned long hashLength(robj
*o
) {
6837 return (o
->encoding
== REDIS_ENCODING_ZIPMAP
) ?
6838 zipmapLen((unsigned char*)o
->ptr
) : dictSize((dict
*)o
->ptr
);
6841 /* Structure to hold hash iteration abstration. Note that iteration over
6842 * hashes involves both fields and values. Because it is possible that
6843 * not both are required, store pointers in the iterator to avoid
6844 * unnecessary memory allocation for fields/values. */
6848 unsigned char *zk
, *zv
;
6849 unsigned int zklen
, zvlen
;
6855 static hashIterator
*hashInitIterator(robj
*subject
) {
6856 hashIterator
*hi
= zmalloc(sizeof(hashIterator
));
6857 hi
->encoding
= subject
->encoding
;
6858 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6859 hi
->zi
= zipmapRewind(subject
->ptr
);
6860 } else if (hi
->encoding
== REDIS_ENCODING_HT
) {
6861 hi
->di
= dictGetIterator(subject
->ptr
);
6868 static void hashReleaseIterator(hashIterator
*hi
) {
6869 if (hi
->encoding
== REDIS_ENCODING_HT
) {
6870 dictReleaseIterator(hi
->di
);
6875 /* Move to the next entry in the hash. Return REDIS_OK when the next entry
6876 * could be found and REDIS_ERR when the iterator reaches the end. */
6877 static int hashNext(hashIterator
*hi
) {
6878 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6879 if ((hi
->zi
= zipmapNext(hi
->zi
, &hi
->zk
, &hi
->zklen
,
6880 &hi
->zv
, &hi
->zvlen
)) == NULL
) return REDIS_ERR
;
6882 if ((hi
->de
= dictNext(hi
->di
)) == NULL
) return REDIS_ERR
;
6887 /* Get key or value object at current iteration position.
6888 * This increases the refcount of the field object by 1. */
6889 static robj
*hashCurrent(hashIterator
*hi
, int what
) {
6891 if (hi
->encoding
== REDIS_ENCODING_ZIPMAP
) {
6892 if (what
& REDIS_HASH_KEY
) {
6893 o
= createStringObject((char*)hi
->zk
,hi
->zklen
);
6895 o
= createStringObject((char*)hi
->zv
,hi
->zvlen
);
6898 if (what
& REDIS_HASH_KEY
) {
6899 o
= dictGetEntryKey(hi
->de
);
6901 o
= dictGetEntryVal(hi
->de
);
6908 static robj
*hashLookupWriteOrCreate(redisClient
*c
, robj
*key
) {
6909 robj
*o
= lookupKeyWrite(c
->db
,key
);
6911 o
= createHashObject();
6914 if (o
->type
!= REDIS_HASH
) {
6915 addReply(c
,shared
.wrongtypeerr
);
6922 /* ============================= Hash commands ============================== */
6923 static void hsetCommand(redisClient
*c
) {
6927 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6928 hashTryConversion(o
,c
->argv
,2,3);
6929 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6930 update
= hashSet(o
,c
->argv
[2],c
->argv
[3]);
6931 addReply(c
, update
? shared
.czero
: shared
.cone
);
6935 static void hsetnxCommand(redisClient
*c
) {
6937 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6938 hashTryConversion(o
,c
->argv
,2,3);
6940 if (hashExists(o
, c
->argv
[2])) {
6941 addReply(c
, shared
.czero
);
6943 hashTryObjectEncoding(o
,&c
->argv
[2], &c
->argv
[3]);
6944 hashSet(o
,c
->argv
[2],c
->argv
[3]);
6945 addReply(c
, shared
.cone
);
6950 static void hmsetCommand(redisClient
*c
) {
6954 if ((c
->argc
% 2) == 1) {
6955 addReplySds(c
,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6959 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6960 hashTryConversion(o
,c
->argv
,2,c
->argc
-1);
6961 for (i
= 2; i
< c
->argc
; i
+= 2) {
6962 hashTryObjectEncoding(o
,&c
->argv
[i
], &c
->argv
[i
+1]);
6963 hashSet(o
,c
->argv
[i
],c
->argv
[i
+1]);
6965 addReply(c
, shared
.ok
);
6969 static void hincrbyCommand(redisClient
*c
) {
6970 long long value
, incr
;
6971 robj
*o
, *current
, *new;
6973 if (getLongLongFromObjectOrReply(c
,c
->argv
[3],&incr
,NULL
) != REDIS_OK
) return;
6974 if ((o
= hashLookupWriteOrCreate(c
,c
->argv
[1])) == NULL
) return;
6975 if ((current
= hashGet(o
,c
->argv
[2])) != NULL
) {
6976 if (getLongLongFromObjectOrReply(c
,current
,&value
,
6977 "hash value is not an integer") != REDIS_OK
) {
6978 decrRefCount(current
);
6981 decrRefCount(current
);
6987 new = createStringObjectFromLongLong(value
);
6988 hashTryObjectEncoding(o
,&c
->argv
[2],NULL
);
6989 hashSet(o
,c
->argv
[2],new);
6991 addReplyLongLong(c
,value
);
6995 static void hgetCommand(redisClient
*c
) {
6997 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.nullbulk
)) == NULL
||
6998 checkType(c
,o
,REDIS_HASH
)) return;
7000 if ((value
= hashGet(o
,c
->argv
[2])) != NULL
) {
7001 addReplyBulk(c
,value
);
7002 decrRefCount(value
);
7004 addReply(c
,shared
.nullbulk
);
7008 static void hmgetCommand(redisClient
*c
) {
7011 o
= lookupKeyRead(c
->db
,c
->argv
[1]);
7012 if (o
!= NULL
&& o
->type
!= REDIS_HASH
) {
7013 addReply(c
,shared
.wrongtypeerr
);
7016 /* Note the check for o != NULL happens inside the loop. This is
7017 * done because objects that cannot be found are considered to be
7018 * an empty hash. The reply should then be a series of NULLs. */
7019 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->argc
-2));
7020 for (i
= 2; i
< c
->argc
; i
++) {
7021 if (o
!= NULL
&& (value
= hashGet(o
,c
->argv
[i
])) != NULL
) {
7022 addReplyBulk(c
,value
);
7023 decrRefCount(value
);
7025 addReply(c
,shared
.nullbulk
);
7030 static void hdelCommand(redisClient
*c
) {
7032 if ((o
= lookupKeyWriteOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7033 checkType(c
,o
,REDIS_HASH
)) return;
7035 if (hashDelete(o
,c
->argv
[2])) {
7036 if (hashLength(o
) == 0) dbDelete(c
->db
,c
->argv
[1]);
7037 addReply(c
,shared
.cone
);
7040 addReply(c
,shared
.czero
);
7044 static void hlenCommand(redisClient
*c
) {
7046 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7047 checkType(c
,o
,REDIS_HASH
)) return;
7049 addReplyUlong(c
,hashLength(o
));
7052 static void genericHgetallCommand(redisClient
*c
, int flags
) {
7053 robj
*o
, *lenobj
, *obj
;
7054 unsigned long count
= 0;
7057 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.emptymultibulk
)) == NULL
7058 || checkType(c
,o
,REDIS_HASH
)) return;
7060 lenobj
= createObject(REDIS_STRING
,NULL
);
7062 decrRefCount(lenobj
);
7064 hi
= hashInitIterator(o
);
7065 while (hashNext(hi
) != REDIS_ERR
) {
7066 if (flags
& REDIS_HASH_KEY
) {
7067 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
7068 addReplyBulk(c
,obj
);
7072 if (flags
& REDIS_HASH_VALUE
) {
7073 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
7074 addReplyBulk(c
,obj
);
7079 hashReleaseIterator(hi
);
7081 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%lu\r\n",count
);
7084 static void hkeysCommand(redisClient
*c
) {
7085 genericHgetallCommand(c
,REDIS_HASH_KEY
);
7088 static void hvalsCommand(redisClient
*c
) {
7089 genericHgetallCommand(c
,REDIS_HASH_VALUE
);
7092 static void hgetallCommand(redisClient
*c
) {
7093 genericHgetallCommand(c
,REDIS_HASH_KEY
|REDIS_HASH_VALUE
);
7096 static void hexistsCommand(redisClient
*c
) {
7098 if ((o
= lookupKeyReadOrReply(c
,c
->argv
[1],shared
.czero
)) == NULL
||
7099 checkType(c
,o
,REDIS_HASH
)) return;
7101 addReply(c
, hashExists(o
,c
->argv
[2]) ? shared
.cone
: shared
.czero
);
7104 static void convertToRealHash(robj
*o
) {
7105 unsigned char *key
, *val
, *p
, *zm
= o
->ptr
;
7106 unsigned int klen
, vlen
;
7107 dict
*dict
= dictCreate(&hashDictType
,NULL
);
7109 assert(o
->type
== REDIS_HASH
&& o
->encoding
!= REDIS_ENCODING_HT
);
7110 p
= zipmapRewind(zm
);
7111 while((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) != NULL
) {
7112 robj
*keyobj
, *valobj
;
7114 keyobj
= createStringObject((char*)key
,klen
);
7115 valobj
= createStringObject((char*)val
,vlen
);
7116 keyobj
= tryObjectEncoding(keyobj
);
7117 valobj
= tryObjectEncoding(valobj
);
7118 dictAdd(dict
,keyobj
,valobj
);
7120 o
->encoding
= REDIS_ENCODING_HT
;
7125 /* ========================= Non type-specific commands ==================== */
7127 static void flushdbCommand(redisClient
*c
) {
7128 server
.dirty
+= dictSize(c
->db
->dict
);
7129 touchWatchedKeysOnFlush(c
->db
->id
);
7130 dictEmpty(c
->db
->dict
);
7131 dictEmpty(c
->db
->expires
);
7132 addReply(c
,shared
.ok
);
7135 static void flushallCommand(redisClient
*c
) {
7136 touchWatchedKeysOnFlush(-1);
7137 server
.dirty
+= emptyDb();
7138 addReply(c
,shared
.ok
);
7139 if (server
.bgsavechildpid
!= -1) {
7140 kill(server
.bgsavechildpid
,SIGKILL
);
7141 rdbRemoveTempFile(server
.bgsavechildpid
);
7143 rdbSave(server
.dbfilename
);
7147 static redisSortOperation
*createSortOperation(int type
, robj
*pattern
) {
7148 redisSortOperation
*so
= zmalloc(sizeof(*so
));
7150 so
->pattern
= pattern
;
7154 /* Return the value associated to the key with a name obtained
7155 * substituting the first occurence of '*' in 'pattern' with 'subst'.
7156 * The returned object will always have its refcount increased by 1
7157 * when it is non-NULL. */
7158 static robj
*lookupKeyByPattern(redisDb
*db
, robj
*pattern
, robj
*subst
) {
7161 robj keyobj
, fieldobj
, *o
;
7162 int prefixlen
, sublen
, postfixlen
, fieldlen
;
7163 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
7167 char buf
[REDIS_SORTKEY_MAX
+1];
7168 } keyname
, fieldname
;
7170 /* If the pattern is "#" return the substitution object itself in order
7171 * to implement the "SORT ... GET #" feature. */
7172 spat
= pattern
->ptr
;
7173 if (spat
[0] == '#' && spat
[1] == '\0') {
7174 incrRefCount(subst
);
7178 /* The substitution object may be specially encoded. If so we create
7179 * a decoded object on the fly. Otherwise getDecodedObject will just
7180 * increment the ref count, that we'll decrement later. */
7181 subst
= getDecodedObject(subst
);
7184 if (sdslen(spat
)+sdslen(ssub
)-1 > REDIS_SORTKEY_MAX
) return NULL
;
7185 p
= strchr(spat
,'*');
7187 decrRefCount(subst
);
7191 /* Find out if we're dealing with a hash dereference. */
7192 if ((f
= strstr(p
+1, "->")) != NULL
) {
7193 fieldlen
= sdslen(spat
)-(f
-spat
);
7194 /* this also copies \0 character */
7195 memcpy(fieldname
.buf
,f
+2,fieldlen
-1);
7196 fieldname
.len
= fieldlen
-2;
7202 sublen
= sdslen(ssub
);
7203 postfixlen
= sdslen(spat
)-(prefixlen
+1)-fieldlen
;
7204 memcpy(keyname
.buf
,spat
,prefixlen
);
7205 memcpy(keyname
.buf
+prefixlen
,ssub
,sublen
);
7206 memcpy(keyname
.buf
+prefixlen
+sublen
,p
+1,postfixlen
);
7207 keyname
.buf
[prefixlen
+sublen
+postfixlen
] = '\0';
7208 keyname
.len
= prefixlen
+sublen
+postfixlen
;
7209 decrRefCount(subst
);
7211 /* Lookup substituted key */
7212 initStaticStringObject(keyobj
,((char*)&keyname
)+(sizeof(long)*2));
7213 o
= lookupKeyRead(db
,&keyobj
);
7214 if (o
== NULL
) return NULL
;
7217 if (o
->type
!= REDIS_HASH
|| fieldname
.len
< 1) return NULL
;
7219 /* Retrieve value from hash by the field name. This operation
7220 * already increases the refcount of the returned object. */
7221 initStaticStringObject(fieldobj
,((char*)&fieldname
)+(sizeof(long)*2));
7222 o
= hashGet(o
, &fieldobj
);
7224 if (o
->type
!= REDIS_STRING
) return NULL
;
7226 /* Every object that this function returns needs to have its refcount
7227 * increased. sortCommand decreases it again. */
7234 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
7235 * the additional parameter is not standard but a BSD-specific we have to
7236 * pass sorting parameters via the global 'server' structure */
7237 static int sortCompare(const void *s1
, const void *s2
) {
7238 const redisSortObject
*so1
= s1
, *so2
= s2
;
7241 if (!server
.sort_alpha
) {
7242 /* Numeric sorting. Here it's trivial as we precomputed scores */
7243 if (so1
->u
.score
> so2
->u
.score
) {
7245 } else if (so1
->u
.score
< so2
->u
.score
) {
7251 /* Alphanumeric sorting */
7252 if (server
.sort_bypattern
) {
7253 if (!so1
->u
.cmpobj
|| !so2
->u
.cmpobj
) {
7254 /* At least one compare object is NULL */
7255 if (so1
->u
.cmpobj
== so2
->u
.cmpobj
)
7257 else if (so1
->u
.cmpobj
== NULL
)
7262 /* We have both the objects, use strcoll */
7263 cmp
= strcoll(so1
->u
.cmpobj
->ptr
,so2
->u
.cmpobj
->ptr
);
7266 /* Compare elements directly. */
7267 cmp
= compareStringObjects(so1
->obj
,so2
->obj
);
7270 return server
.sort_desc
? -cmp
: cmp
;
7273 /* The SORT command is the most complex command in Redis. Warning: this code
7274 * is optimized for speed and a bit less for readability */
7275 static void sortCommand(redisClient
*c
) {
7277 unsigned int outputlen
= 0;
7278 int desc
= 0, alpha
= 0;
7279 int limit_start
= 0, limit_count
= -1, start
, end
;
7280 int j
, dontsort
= 0, vectorlen
;
7281 int getop
= 0; /* GET operation counter */
7282 robj
*sortval
, *sortby
= NULL
, *storekey
= NULL
;
7283 redisSortObject
*vector
; /* Resulting vector to sort */
7285 /* Lookup the key to sort. It must be of the right types */
7286 sortval
= lookupKeyRead(c
->db
,c
->argv
[1]);
7287 if (sortval
== NULL
) {
7288 addReply(c
,shared
.emptymultibulk
);
7291 if (sortval
->type
!= REDIS_SET
&& sortval
->type
!= REDIS_LIST
&&
7292 sortval
->type
!= REDIS_ZSET
)
7294 addReply(c
,shared
.wrongtypeerr
);
7298 /* Create a list of operations to perform for every sorted element.
7299 * Operations can be GET/DEL/INCR/DECR */
7300 operations
= listCreate();
7301 listSetFreeMethod(operations
,zfree
);
7304 /* Now we need to protect sortval incrementing its count, in the future
7305 * SORT may have options able to overwrite/delete keys during the sorting
7306 * and the sorted key itself may get destroied */
7307 incrRefCount(sortval
);
7309 /* The SORT command has an SQL-alike syntax, parse it */
7310 while(j
< c
->argc
) {
7311 int leftargs
= c
->argc
-j
-1;
7312 if (!strcasecmp(c
->argv
[j
]->ptr
,"asc")) {
7314 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"desc")) {
7316 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"alpha")) {
7318 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"limit") && leftargs
>= 2) {
7319 limit_start
= atoi(c
->argv
[j
+1]->ptr
);
7320 limit_count
= atoi(c
->argv
[j
+2]->ptr
);
7322 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"store") && leftargs
>= 1) {
7323 storekey
= c
->argv
[j
+1];
7325 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"by") && leftargs
>= 1) {
7326 sortby
= c
->argv
[j
+1];
7327 /* If the BY pattern does not contain '*', i.e. it is constant,
7328 * we don't need to sort nor to lookup the weight keys. */
7329 if (strchr(c
->argv
[j
+1]->ptr
,'*') == NULL
) dontsort
= 1;
7331 } else if (!strcasecmp(c
->argv
[j
]->ptr
,"get") && leftargs
>= 1) {
7332 listAddNodeTail(operations
,createSortOperation(
7333 REDIS_SORT_GET
,c
->argv
[j
+1]));
7337 decrRefCount(sortval
);
7338 listRelease(operations
);
7339 addReply(c
,shared
.syntaxerr
);
7345 /* Load the sorting vector with all the objects to sort */
7346 switch(sortval
->type
) {
7347 case REDIS_LIST
: vectorlen
= lLength(sortval
); break;
7348 case REDIS_SET
: vectorlen
= dictSize((dict
*)sortval
->ptr
); break;
7349 case REDIS_ZSET
: vectorlen
= dictSize(((zset
*)sortval
->ptr
)->dict
); break;
7350 default: vectorlen
= 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
7352 vector
= zmalloc(sizeof(redisSortObject
)*vectorlen
);
7355 if (sortval
->type
== REDIS_LIST
) {
7356 lIterator
*li
= lInitIterator(sortval
,0,REDIS_TAIL
);
7358 while(lNext(li
,&entry
)) {
7359 vector
[j
].obj
= lGet(&entry
);
7360 vector
[j
].u
.score
= 0;
7361 vector
[j
].u
.cmpobj
= NULL
;
7364 lReleaseIterator(li
);
7370 if (sortval
->type
== REDIS_SET
) {
7373 zset
*zs
= sortval
->ptr
;
7377 di
= dictGetIterator(set
);
7378 while((setele
= dictNext(di
)) != NULL
) {
7379 vector
[j
].obj
= dictGetEntryKey(setele
);
7380 vector
[j
].u
.score
= 0;
7381 vector
[j
].u
.cmpobj
= NULL
;
7384 dictReleaseIterator(di
);
7386 redisAssert(j
== vectorlen
);
7388 /* Now it's time to load the right scores in the sorting vector */
7389 if (dontsort
== 0) {
7390 for (j
= 0; j
< vectorlen
; j
++) {
7393 /* lookup value to sort by */
7394 byval
= lookupKeyByPattern(c
->db
,sortby
,vector
[j
].obj
);
7395 if (!byval
) continue;
7397 /* use object itself to sort by */
7398 byval
= vector
[j
].obj
;
7402 if (sortby
) vector
[j
].u
.cmpobj
= getDecodedObject(byval
);
7404 if (byval
->encoding
== REDIS_ENCODING_RAW
) {
7405 vector
[j
].u
.score
= strtod(byval
->ptr
,NULL
);
7406 } else if (byval
->encoding
== REDIS_ENCODING_INT
) {
7407 /* Don't need to decode the object if it's
7408 * integer-encoded (the only encoding supported) so
7409 * far. We can just cast it */
7410 vector
[j
].u
.score
= (long)byval
->ptr
;
7412 redisAssert(1 != 1);
7416 /* when the object was retrieved using lookupKeyByPattern,
7417 * its refcount needs to be decreased. */
7419 decrRefCount(byval
);
7424 /* We are ready to sort the vector... perform a bit of sanity check
7425 * on the LIMIT option too. We'll use a partial version of quicksort. */
7426 start
= (limit_start
< 0) ? 0 : limit_start
;
7427 end
= (limit_count
< 0) ? vectorlen
-1 : start
+limit_count
-1;
7428 if (start
>= vectorlen
) {
7429 start
= vectorlen
-1;
7432 if (end
>= vectorlen
) end
= vectorlen
-1;
7434 if (dontsort
== 0) {
7435 server
.sort_desc
= desc
;
7436 server
.sort_alpha
= alpha
;
7437 server
.sort_bypattern
= sortby
? 1 : 0;
7438 if (sortby
&& (start
!= 0 || end
!= vectorlen
-1))
7439 pqsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
, start
,end
);
7441 qsort(vector
,vectorlen
,sizeof(redisSortObject
),sortCompare
);
7444 /* Send command output to the output buffer, performing the specified
7445 * GET/DEL/INCR/DECR operations if any. */
7446 outputlen
= getop
? getop
*(end
-start
+1) : end
-start
+1;
7447 if (storekey
== NULL
) {
7448 /* STORE option not specified, sent the sorting result to client */
7449 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",outputlen
));
7450 for (j
= start
; j
<= end
; j
++) {
7454 if (!getop
) addReplyBulk(c
,vector
[j
].obj
);
7455 listRewind(operations
,&li
);
7456 while((ln
= listNext(&li
))) {
7457 redisSortOperation
*sop
= ln
->value
;
7458 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7461 if (sop
->type
== REDIS_SORT_GET
) {
7463 addReply(c
,shared
.nullbulk
);
7465 addReplyBulk(c
,val
);
7469 redisAssert(sop
->type
== REDIS_SORT_GET
); /* always fails */
7474 robj
*sobj
= createZiplistObject();
7476 /* STORE option specified, set the sorting result as a List object */
7477 for (j
= start
; j
<= end
; j
++) {
7482 lPush(sobj
,vector
[j
].obj
,REDIS_TAIL
);
7484 listRewind(operations
,&li
);
7485 while((ln
= listNext(&li
))) {
7486 redisSortOperation
*sop
= ln
->value
;
7487 robj
*val
= lookupKeyByPattern(c
->db
,sop
->pattern
,
7490 if (sop
->type
== REDIS_SORT_GET
) {
7491 if (!val
) val
= createStringObject("",0);
7493 /* lPush does an incrRefCount, so we should take care
7494 * care of the incremented refcount caused by either
7495 * lookupKeyByPattern or createStringObject("",0) */
7496 lPush(sobj
,val
,REDIS_TAIL
);
7500 redisAssert(sop
->type
== REDIS_SORT_GET
);
7505 dbReplace(c
->db
,storekey
,sobj
);
7506 /* Note: we add 1 because the DB is dirty anyway since even if the
7507 * SORT result is empty a new key is set and maybe the old content
7509 server
.dirty
+= 1+outputlen
;
7510 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",outputlen
));
7514 if (sortval
->type
== REDIS_LIST
)
7515 for (j
= 0; j
< vectorlen
; j
++)
7516 decrRefCount(vector
[j
].obj
);
7517 decrRefCount(sortval
);
7518 listRelease(operations
);
7519 for (j
= 0; j
< vectorlen
; j
++) {
7520 if (alpha
&& vector
[j
].u
.cmpobj
)
7521 decrRefCount(vector
[j
].u
.cmpobj
);
7526 /* Convert an amount of bytes into a human readable string in the form
7527 * of 100B, 2G, 100M, 4K, and so forth. */
7528 static void bytesToHuman(char *s
, unsigned long long n
) {
7533 sprintf(s
,"%lluB",n
);
7535 } else if (n
< (1024*1024)) {
7536 d
= (double)n
/(1024);
7537 sprintf(s
,"%.2fK",d
);
7538 } else if (n
< (1024LL*1024*1024)) {
7539 d
= (double)n
/(1024*1024);
7540 sprintf(s
,"%.2fM",d
);
7541 } else if (n
< (1024LL*1024*1024*1024)) {
7542 d
= (double)n
/(1024LL*1024*1024);
7543 sprintf(s
,"%.2fG",d
);
7547 /* Create the string returned by the INFO command. This is decoupled
7548 * by the INFO command itself as we need to report the same information
7549 * on memory corruption problems. */
7550 static sds
genRedisInfoString(void) {
7552 time_t uptime
= time(NULL
)-server
.stat_starttime
;
7556 bytesToHuman(hmem
,zmalloc_used_memory());
7557 info
= sdscatprintf(sdsempty(),
7558 "redis_version:%s\r\n"
7559 "redis_git_sha1:%s\r\n"
7560 "redis_git_dirty:%d\r\n"
7562 "multiplexing_api:%s\r\n"
7563 "process_id:%ld\r\n"
7564 "uptime_in_seconds:%ld\r\n"
7565 "uptime_in_days:%ld\r\n"
7566 "connected_clients:%d\r\n"
7567 "connected_slaves:%d\r\n"
7568 "blocked_clients:%d\r\n"
7569 "used_memory:%zu\r\n"
7570 "used_memory_human:%s\r\n"
7571 "changes_since_last_save:%lld\r\n"
7572 "bgsave_in_progress:%d\r\n"
7573 "last_save_time:%ld\r\n"
7574 "bgrewriteaof_in_progress:%d\r\n"
7575 "total_connections_received:%lld\r\n"
7576 "total_commands_processed:%lld\r\n"
7577 "expired_keys:%lld\r\n"
7578 "hash_max_zipmap_entries:%zu\r\n"
7579 "hash_max_zipmap_value:%zu\r\n"
7580 "pubsub_channels:%ld\r\n"
7581 "pubsub_patterns:%u\r\n"
7586 strtol(REDIS_GIT_DIRTY
,NULL
,10) > 0,
7587 (sizeof(long) == 8) ? "64" : "32",
7592 listLength(server
.clients
)-listLength(server
.slaves
),
7593 listLength(server
.slaves
),
7594 server
.blpop_blocked_clients
,
7595 zmalloc_used_memory(),
7598 server
.bgsavechildpid
!= -1,
7600 server
.bgrewritechildpid
!= -1,
7601 server
.stat_numconnections
,
7602 server
.stat_numcommands
,
7603 server
.stat_expiredkeys
,
7604 server
.hash_max_zipmap_entries
,
7605 server
.hash_max_zipmap_value
,
7606 dictSize(server
.pubsub_channels
),
7607 listLength(server
.pubsub_patterns
),
7608 server
.vm_enabled
!= 0,
7609 server
.masterhost
== NULL
? "master" : "slave"
7611 if (server
.masterhost
) {
7612 info
= sdscatprintf(info
,
7613 "master_host:%s\r\n"
7614 "master_port:%d\r\n"
7615 "master_link_status:%s\r\n"
7616 "master_last_io_seconds_ago:%d\r\n"
7619 (server
.replstate
== REDIS_REPL_CONNECTED
) ?
7621 server
.master
? ((int)(time(NULL
)-server
.master
->lastinteraction
)) : -1
7624 if (server
.vm_enabled
) {
7626 info
= sdscatprintf(info
,
7627 "vm_conf_max_memory:%llu\r\n"
7628 "vm_conf_page_size:%llu\r\n"
7629 "vm_conf_pages:%llu\r\n"
7630 "vm_stats_used_pages:%llu\r\n"
7631 "vm_stats_swapped_objects:%llu\r\n"
7632 "vm_stats_swappin_count:%llu\r\n"
7633 "vm_stats_swappout_count:%llu\r\n"
7634 "vm_stats_io_newjobs_len:%lu\r\n"
7635 "vm_stats_io_processing_len:%lu\r\n"
7636 "vm_stats_io_processed_len:%lu\r\n"
7637 "vm_stats_io_active_threads:%lu\r\n"
7638 "vm_stats_blocked_clients:%lu\r\n"
7639 ,(unsigned long long) server
.vm_max_memory
,
7640 (unsigned long long) server
.vm_page_size
,
7641 (unsigned long long) server
.vm_pages
,
7642 (unsigned long long) server
.vm_stats_used_pages
,
7643 (unsigned long long) server
.vm_stats_swapped_objects
,
7644 (unsigned long long) server
.vm_stats_swapins
,
7645 (unsigned long long) server
.vm_stats_swapouts
,
7646 (unsigned long) listLength(server
.io_newjobs
),
7647 (unsigned long) listLength(server
.io_processing
),
7648 (unsigned long) listLength(server
.io_processed
),
7649 (unsigned long) server
.io_active_threads
,
7650 (unsigned long) server
.vm_blocked_clients
7654 for (j
= 0; j
< server
.dbnum
; j
++) {
7655 long long keys
, vkeys
;
7657 keys
= dictSize(server
.db
[j
].dict
);
7658 vkeys
= dictSize(server
.db
[j
].expires
);
7659 if (keys
|| vkeys
) {
7660 info
= sdscatprintf(info
, "db%d:keys=%lld,expires=%lld\r\n",
7667 static void infoCommand(redisClient
*c
) {
7668 sds info
= genRedisInfoString();
7669 addReplySds(c
,sdscatprintf(sdsempty(),"$%lu\r\n",
7670 (unsigned long)sdslen(info
)));
7671 addReplySds(c
,info
);
7672 addReply(c
,shared
.crlf
);
7675 static void monitorCommand(redisClient
*c
) {
7676 /* ignore MONITOR if aleady slave or in monitor mode */
7677 if (c
->flags
& REDIS_SLAVE
) return;
7679 c
->flags
|= (REDIS_SLAVE
|REDIS_MONITOR
);
7681 listAddNodeTail(server
.monitors
,c
);
7682 addReply(c
,shared
.ok
);
7685 /* ================================= Expire ================================= */
7686 static int removeExpire(redisDb
*db
, robj
*key
) {
7687 if (dictDelete(db
->expires
,key
->ptr
) == DICT_OK
) {
7694 static int setExpire(redisDb
*db
, robj
*key
, time_t when
) {
7695 sds copy
= sdsdup(key
->ptr
);
7696 if (dictAdd(db
->expires
,copy
,(void*)when
) == DICT_ERR
) {
7704 /* Return the expire time of the specified key, or -1 if no expire
7705 * is associated with this key (i.e. the key is non volatile) */
7706 static time_t getExpire(redisDb
*db
, robj
*key
) {
7709 /* No expire? return ASAP */
7710 if (dictSize(db
->expires
) == 0 ||
7711 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return -1;
7713 return (time_t) dictGetEntryVal(de
);
7716 static int expireIfNeeded(redisDb
*db
, robj
*key
) {
7720 /* No expire? return ASAP */
7721 if (dictSize(db
->expires
) == 0 ||
7722 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
7724 /* Lookup the expire */
7725 when
= (time_t) dictGetEntryVal(de
);
7726 if (time(NULL
) <= when
) return 0;
7728 /* Delete the key */
7730 server
.stat_expiredkeys
++;
7734 static int deleteIfVolatile(redisDb
*db
, robj
*key
) {
7737 /* No expire? return ASAP */
7738 if (dictSize(db
->expires
) == 0 ||
7739 (de
= dictFind(db
->expires
,key
->ptr
)) == NULL
) return 0;
7741 /* Delete the key */
7743 server
.stat_expiredkeys
++;
7744 dictDelete(db
->expires
,key
->ptr
);
7745 return dictDelete(db
->dict
,key
->ptr
) == DICT_OK
;
7748 static void expireGenericCommand(redisClient
*c
, robj
*key
, robj
*param
, long offset
) {
7752 if (getLongFromObjectOrReply(c
, param
, &seconds
, NULL
) != REDIS_OK
) return;
7756 de
= dictFind(c
->db
->dict
,key
->ptr
);
7758 addReply(c
,shared
.czero
);
7762 if (dbDelete(c
->db
,key
)) server
.dirty
++;
7763 addReply(c
, shared
.cone
);
7766 time_t when
= time(NULL
)+seconds
;
7767 if (setExpire(c
->db
,key
,when
)) {
7768 addReply(c
,shared
.cone
);
7771 addReply(c
,shared
.czero
);
7777 static void expireCommand(redisClient
*c
) {
7778 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],0);
7781 static void expireatCommand(redisClient
*c
) {
7782 expireGenericCommand(c
,c
->argv
[1],c
->argv
[2],time(NULL
));
7785 static void ttlCommand(redisClient
*c
) {
7789 expire
= getExpire(c
->db
,c
->argv
[1]);
7791 ttl
= (int) (expire
-time(NULL
));
7792 if (ttl
< 0) ttl
= -1;
7794 addReplySds(c
,sdscatprintf(sdsempty(),":%d\r\n",ttl
));
7797 /* ================================ MULTI/EXEC ============================== */
7799 /* Client state initialization for MULTI/EXEC */
7800 static void initClientMultiState(redisClient
*c
) {
7801 c
->mstate
.commands
= NULL
;
7802 c
->mstate
.count
= 0;
7805 /* Release all the resources associated with MULTI/EXEC state */
7806 static void freeClientMultiState(redisClient
*c
) {
7809 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7811 multiCmd
*mc
= c
->mstate
.commands
+j
;
7813 for (i
= 0; i
< mc
->argc
; i
++)
7814 decrRefCount(mc
->argv
[i
]);
7817 zfree(c
->mstate
.commands
);
7820 /* Add a new command into the MULTI commands queue */
7821 static void queueMultiCommand(redisClient
*c
, struct redisCommand
*cmd
) {
7825 c
->mstate
.commands
= zrealloc(c
->mstate
.commands
,
7826 sizeof(multiCmd
)*(c
->mstate
.count
+1));
7827 mc
= c
->mstate
.commands
+c
->mstate
.count
;
7830 mc
->argv
= zmalloc(sizeof(robj
*)*c
->argc
);
7831 memcpy(mc
->argv
,c
->argv
,sizeof(robj
*)*c
->argc
);
7832 for (j
= 0; j
< c
->argc
; j
++)
7833 incrRefCount(mc
->argv
[j
]);
7837 static void multiCommand(redisClient
*c
) {
7838 if (c
->flags
& REDIS_MULTI
) {
7839 addReplySds(c
,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7842 c
->flags
|= REDIS_MULTI
;
7843 addReply(c
,shared
.ok
);
7846 static void discardCommand(redisClient
*c
) {
7847 if (!(c
->flags
& REDIS_MULTI
)) {
7848 addReplySds(c
,sdsnew("-ERR DISCARD without MULTI\r\n"));
7852 freeClientMultiState(c
);
7853 initClientMultiState(c
);
7854 c
->flags
&= (~REDIS_MULTI
);
7855 addReply(c
,shared
.ok
);
7858 /* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7859 * implememntation for more information. */
7860 static void execCommandReplicateMulti(redisClient
*c
) {
7861 struct redisCommand
*cmd
;
7862 robj
*multistring
= createStringObject("MULTI",5);
7864 cmd
= lookupCommand("multi");
7865 if (server
.appendonly
)
7866 feedAppendOnlyFile(cmd
,c
->db
->id
,&multistring
,1);
7867 if (listLength(server
.slaves
))
7868 replicationFeedSlaves(server
.slaves
,c
->db
->id
,&multistring
,1);
7869 decrRefCount(multistring
);
7872 static void execCommand(redisClient
*c
) {
7877 if (!(c
->flags
& REDIS_MULTI
)) {
7878 addReplySds(c
,sdsnew("-ERR EXEC without MULTI\r\n"));
7882 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7883 * A failed EXEC will return a multi bulk nil object. */
7884 if (c
->flags
& REDIS_DIRTY_CAS
) {
7885 freeClientMultiState(c
);
7886 initClientMultiState(c
);
7887 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7889 addReply(c
,shared
.nullmultibulk
);
7893 /* Replicate a MULTI request now that we are sure the block is executed.
7894 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7895 * both the AOF and the replication link will have the same consistency
7896 * and atomicity guarantees. */
7897 execCommandReplicateMulti(c
);
7899 /* Exec all the queued commands */
7900 unwatchAllKeys(c
); /* Unwatch ASAP otherwise we'll waste CPU cycles */
7901 orig_argv
= c
->argv
;
7902 orig_argc
= c
->argc
;
7903 addReplySds(c
,sdscatprintf(sdsempty(),"*%d\r\n",c
->mstate
.count
));
7904 for (j
= 0; j
< c
->mstate
.count
; j
++) {
7905 c
->argc
= c
->mstate
.commands
[j
].argc
;
7906 c
->argv
= c
->mstate
.commands
[j
].argv
;
7907 call(c
,c
->mstate
.commands
[j
].cmd
);
7909 c
->argv
= orig_argv
;
7910 c
->argc
= orig_argc
;
7911 freeClientMultiState(c
);
7912 initClientMultiState(c
);
7913 c
->flags
&= ~(REDIS_MULTI
|REDIS_DIRTY_CAS
);
7914 /* Make sure the EXEC command is always replicated / AOF, since we
7915 * always send the MULTI command (we can't know beforehand if the
7916 * next operations will contain at least a modification to the DB). */
7920 /* =========================== Blocking Operations ========================= */
7922 /* Currently Redis blocking operations support is limited to list POP ops,
7923 * so the current implementation is not fully generic, but it is also not
7924 * completely specific so it will not require a rewrite to support new
7925 * kind of blocking operations in the future.
7927 * Still it's important to note that list blocking operations can be already
7928 * used as a notification mechanism in order to implement other blocking
7929 * operations at application level, so there must be a very strong evidence
7930 * of usefulness and generality before new blocking operations are implemented.
7932 * This is how the current blocking POP works, we use BLPOP as example:
7933 * - If the user calls BLPOP and the key exists and contains a non empty list
7934 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7935 * if there is not to block.
7936 * - If instead BLPOP is called and the key does not exists or the list is
7937 * empty we need to block. In order to do so we remove the notification for
7938 * new data to read in the client socket (so that we'll not serve new
7939 * requests if the blocking request is not served). Also we put the client
7940 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
7941 * blocking for this keys.
7942 * - If a PUSH operation against a key with blocked clients waiting is
7943 * performed, we serve the first in the list: basically instead to push
7944 * the new element inside the list we return it to the (first / oldest)
7945 * blocking client, unblock the client, and remove it form the list.
7947 * The above comment and the source code should be enough in order to understand
7948 * the implementation and modify / fix it later.
7951 /* Set a client in blocking mode for the specified key, with the specified
7953 static void blockForKeys(redisClient
*c
, robj
**keys
, int numkeys
, time_t timeout
) {
7958 c
->blocking_keys
= zmalloc(sizeof(robj
*)*numkeys
);
7959 c
->blocking_keys_num
= numkeys
;
7960 c
->blockingto
= timeout
;
7961 for (j
= 0; j
< numkeys
; j
++) {
7962 /* Add the key in the client structure, to map clients -> keys */
7963 c
->blocking_keys
[j
] = keys
[j
];
7964 incrRefCount(keys
[j
]);
7966 /* And in the other "side", to map keys -> clients */
7967 de
= dictFind(c
->db
->blocking_keys
,keys
[j
]);
7971 /* For every key we take a list of clients blocked for it */
7973 retval
= dictAdd(c
->db
->blocking_keys
,keys
[j
],l
);
7974 incrRefCount(keys
[j
]);
7975 assert(retval
== DICT_OK
);
7977 l
= dictGetEntryVal(de
);
7979 listAddNodeTail(l
,c
);
7981 /* Mark the client as a blocked client */
7982 c
->flags
|= REDIS_BLOCKED
;
7983 server
.blpop_blocked_clients
++;
7986 /* Unblock a client that's waiting in a blocking operation such as BLPOP */
7987 static void unblockClientWaitingData(redisClient
*c
) {
7992 assert(c
->blocking_keys
!= NULL
);
7993 /* The client may wait for multiple keys, so unblock it for every key. */
7994 for (j
= 0; j
< c
->blocking_keys_num
; j
++) {
7995 /* Remove this client from the list of clients waiting for this key. */
7996 de
= dictFind(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
7998 l
= dictGetEntryVal(de
);
7999 listDelNode(l
,listSearchKey(l
,c
));
8000 /* If the list is empty we need to remove it to avoid wasting memory */
8001 if (listLength(l
) == 0)
8002 dictDelete(c
->db
->blocking_keys
,c
->blocking_keys
[j
]);
8003 decrRefCount(c
->blocking_keys
[j
]);
8005 /* Cleanup the client structure */
8006 zfree(c
->blocking_keys
);
8007 c
->blocking_keys
= NULL
;
8008 c
->flags
&= (~REDIS_BLOCKED
);
8009 server
.blpop_blocked_clients
--;
8010 /* We want to process data if there is some command waiting
8011 * in the input buffer. Note that this is safe even if
8012 * unblockClientWaitingData() gets called from freeClient() because
8013 * freeClient() will be smart enough to call this function
8014 * *after* c->querybuf was set to NULL. */
8015 if (c
->querybuf
&& sdslen(c
->querybuf
) > 0) processInputBuffer(c
);
8018 /* This should be called from any function PUSHing into lists.
8019 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
8020 * 'ele' is the element pushed.
8022 * If the function returns 0 there was no client waiting for a list push
8025 * If the function returns 1 there was a client waiting for a list push
8026 * against this key, the element was passed to this client thus it's not
8027 * needed to actually add it to the list and the caller should return asap. */
8028 static int handleClientsWaitingListPush(redisClient
*c
, robj
*key
, robj
*ele
) {
8029 struct dictEntry
*de
;
8030 redisClient
*receiver
;
8034 de
= dictFind(c
->db
->blocking_keys
,key
);
8035 if (de
== NULL
) return 0;
8036 l
= dictGetEntryVal(de
);
8039 receiver
= ln
->value
;
8041 addReplySds(receiver
,sdsnew("*2\r\n"));
8042 addReplyBulk(receiver
,key
);
8043 addReplyBulk(receiver
,ele
);
8044 unblockClientWaitingData(receiver
);
8048 /* Blocking RPOP/LPOP */
8049 static void blockingPopGenericCommand(redisClient
*c
, int where
) {
8054 for (j
= 1; j
< c
->argc
-1; j
++) {
8055 o
= lookupKeyWrite(c
->db
,c
->argv
[j
]);
8057 if (o
->type
!= REDIS_LIST
) {
8058 addReply(c
,shared
.wrongtypeerr
);
8061 list
*list
= o
->ptr
;
8062 if (listLength(list
) != 0) {
8063 /* If the list contains elements fall back to the usual
8064 * non-blocking POP operation */
8065 robj
*argv
[2], **orig_argv
;
8068 /* We need to alter the command arguments before to call
8069 * popGenericCommand() as the command takes a single key. */
8070 orig_argv
= c
->argv
;
8071 orig_argc
= c
->argc
;
8072 argv
[1] = c
->argv
[j
];
8076 /* Also the return value is different, we need to output
8077 * the multi bulk reply header and the key name. The
8078 * "real" command will add the last element (the value)
8079 * for us. If this souds like an hack to you it's just
8080 * because it is... */
8081 addReplySds(c
,sdsnew("*2\r\n"));
8082 addReplyBulk(c
,argv
[1]);
8083 popGenericCommand(c
,where
);
8085 /* Fix the client structure with the original stuff */
8086 c
->argv
= orig_argv
;
8087 c
->argc
= orig_argc
;
8093 /* If the list is empty or the key does not exists we must block */
8094 timeout
= strtol(c
->argv
[c
->argc
-1]->ptr
,NULL
,10);
8095 if (timeout
> 0) timeout
+= time(NULL
);
8096 blockForKeys(c
,c
->argv
+1,c
->argc
-2,timeout
);
8099 static void blpopCommand(redisClient
*c
) {
8100 blockingPopGenericCommand(c
,REDIS_HEAD
);
8103 static void brpopCommand(redisClient
*c
) {
8104 blockingPopGenericCommand(c
,REDIS_TAIL
);
8107 /* =============================== Replication ============================= */
8109 static int syncWrite(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8110 ssize_t nwritten
, ret
= size
;
8111 time_t start
= time(NULL
);
8115 if (aeWait(fd
,AE_WRITABLE
,1000) & AE_WRITABLE
) {
8116 nwritten
= write(fd
,ptr
,size
);
8117 if (nwritten
== -1) return -1;
8121 if ((time(NULL
)-start
) > timeout
) {
8129 static int syncRead(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8130 ssize_t nread
, totread
= 0;
8131 time_t start
= time(NULL
);
8135 if (aeWait(fd
,AE_READABLE
,1000) & AE_READABLE
) {
8136 nread
= read(fd
,ptr
,size
);
8137 if (nread
== -1) return -1;
8142 if ((time(NULL
)-start
) > timeout
) {
8150 static int syncReadLine(int fd
, char *ptr
, ssize_t size
, int timeout
) {
8157 if (syncRead(fd
,&c
,1,timeout
) == -1) return -1;
8160 if (nread
&& *(ptr
-1) == '\r') *(ptr
-1) = '\0';
8171 static void syncCommand(redisClient
*c
) {
8172 /* ignore SYNC if aleady slave or in monitor mode */
8173 if (c
->flags
& REDIS_SLAVE
) return;
8175 /* SYNC can't be issued when the server has pending data to send to
8176 * the client about already issued commands. We need a fresh reply
8177 * buffer registering the differences between the BGSAVE and the current
8178 * dataset, so that we can copy to other slaves if needed. */
8179 if (listLength(c
->reply
) != 0) {
8180 addReplySds(c
,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
8184 redisLog(REDIS_NOTICE
,"Slave ask for synchronization");
8185 /* Here we need to check if there is a background saving operation
8186 * in progress, or if it is required to start one */
8187 if (server
.bgsavechildpid
!= -1) {
8188 /* Ok a background save is in progress. Let's check if it is a good
8189 * one for replication, i.e. if there is another slave that is
8190 * registering differences since the server forked to save */
8195 listRewind(server
.slaves
,&li
);
8196 while((ln
= listNext(&li
))) {
8198 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) break;
8201 /* Perfect, the server is already registering differences for
8202 * another slave. Set the right state, and copy the buffer. */
8203 listRelease(c
->reply
);
8204 c
->reply
= listDup(slave
->reply
);
8205 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8206 redisLog(REDIS_NOTICE
,"Waiting for end of BGSAVE for SYNC");
8208 /* No way, we need to wait for the next BGSAVE in order to
8209 * register differences */
8210 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8211 redisLog(REDIS_NOTICE
,"Waiting for next BGSAVE for SYNC");
8214 /* Ok we don't have a BGSAVE in progress, let's start one */
8215 redisLog(REDIS_NOTICE
,"Starting BGSAVE for SYNC");
8216 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8217 redisLog(REDIS_NOTICE
,"Replication failed, can't BGSAVE");
8218 addReplySds(c
,sdsnew("-ERR Unalbe to perform background save\r\n"));
8221 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8224 c
->flags
|= REDIS_SLAVE
;
8226 listAddNodeTail(server
.slaves
,c
);
8230 static void sendBulkToSlave(aeEventLoop
*el
, int fd
, void *privdata
, int mask
) {
8231 redisClient
*slave
= privdata
;
8233 REDIS_NOTUSED(mask
);
8234 char buf
[REDIS_IOBUF_LEN
];
8235 ssize_t nwritten
, buflen
;
8237 if (slave
->repldboff
== 0) {
8238 /* Write the bulk write count before to transfer the DB. In theory here
8239 * we don't know how much room there is in the output buffer of the
8240 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
8241 * operations) will never be smaller than the few bytes we need. */
8244 bulkcount
= sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
8246 if (write(fd
,bulkcount
,sdslen(bulkcount
)) != (signed)sdslen(bulkcount
))
8254 lseek(slave
->repldbfd
,slave
->repldboff
,SEEK_SET
);
8255 buflen
= read(slave
->repldbfd
,buf
,REDIS_IOBUF_LEN
);
8257 redisLog(REDIS_WARNING
,"Read error sending DB to slave: %s",
8258 (buflen
== 0) ? "premature EOF" : strerror(errno
));
8262 if ((nwritten
= write(fd
,buf
,buflen
)) == -1) {
8263 redisLog(REDIS_VERBOSE
,"Write error sending DB to slave: %s",
8268 slave
->repldboff
+= nwritten
;
8269 if (slave
->repldboff
== slave
->repldbsize
) {
8270 close(slave
->repldbfd
);
8271 slave
->repldbfd
= -1;
8272 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8273 slave
->replstate
= REDIS_REPL_ONLINE
;
8274 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
,
8275 sendReplyToClient
, slave
) == AE_ERR
) {
8279 addReplySds(slave
,sdsempty());
8280 redisLog(REDIS_NOTICE
,"Synchronization with slave succeeded");
8284 /* This function is called at the end of every backgrond saving.
8285 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
8286 * otherwise REDIS_ERR is passed to the function.
8288 * The goal of this function is to handle slaves waiting for a successful
8289 * background saving in order to perform non-blocking synchronization. */
8290 static void updateSlavesWaitingBgsave(int bgsaveerr
) {
8292 int startbgsave
= 0;
8295 listRewind(server
.slaves
,&li
);
8296 while((ln
= listNext(&li
))) {
8297 redisClient
*slave
= ln
->value
;
8299 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
) {
8301 slave
->replstate
= REDIS_REPL_WAIT_BGSAVE_END
;
8302 } else if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_END
) {
8303 struct redis_stat buf
;
8305 if (bgsaveerr
!= REDIS_OK
) {
8307 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE child returned an error");
8310 if ((slave
->repldbfd
= open(server
.dbfilename
,O_RDONLY
)) == -1 ||
8311 redis_fstat(slave
->repldbfd
,&buf
) == -1) {
8313 redisLog(REDIS_WARNING
,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno
));
8316 slave
->repldboff
= 0;
8317 slave
->repldbsize
= buf
.st_size
;
8318 slave
->replstate
= REDIS_REPL_SEND_BULK
;
8319 aeDeleteFileEvent(server
.el
,slave
->fd
,AE_WRITABLE
);
8320 if (aeCreateFileEvent(server
.el
, slave
->fd
, AE_WRITABLE
, sendBulkToSlave
, slave
) == AE_ERR
) {
8327 if (rdbSaveBackground(server
.dbfilename
) != REDIS_OK
) {
8330 listRewind(server
.slaves
,&li
);
8331 redisLog(REDIS_WARNING
,"SYNC failed. BGSAVE failed");
8332 while((ln
= listNext(&li
))) {
8333 redisClient
*slave
= ln
->value
;
8335 if (slave
->replstate
== REDIS_REPL_WAIT_BGSAVE_START
)
8342 static int syncWithMaster(void) {
8343 char buf
[1024], tmpfile
[256], authcmd
[1024];
8345 int fd
= anetTcpConnect(NULL
,server
.masterhost
,server
.masterport
);
8346 int dfd
, maxtries
= 5;
8349 redisLog(REDIS_WARNING
,"Unable to connect to MASTER: %s",
8354 /* AUTH with the master if required. */
8355 if(server
.masterauth
) {
8356 snprintf(authcmd
, 1024, "AUTH %s\r\n", server
.masterauth
);
8357 if (syncWrite(fd
, authcmd
, strlen(server
.masterauth
)+7, 5) == -1) {
8359 redisLog(REDIS_WARNING
,"Unable to AUTH to MASTER: %s",
8363 /* Read the AUTH result. */
8364 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8366 redisLog(REDIS_WARNING
,"I/O error reading auth result from MASTER: %s",
8370 if (buf
[0] != '+') {
8372 redisLog(REDIS_WARNING
,"Cannot AUTH to MASTER, is the masterauth password correct?");
8377 /* Issue the SYNC command */
8378 if (syncWrite(fd
,"SYNC \r\n",7,5) == -1) {
8380 redisLog(REDIS_WARNING
,"I/O error writing to MASTER: %s",
8384 /* Read the bulk write count */
8385 if (syncReadLine(fd
,buf
,1024,3600) == -1) {
8387 redisLog(REDIS_WARNING
,"I/O error reading bulk count from MASTER: %s",
8391 if (buf
[0] != '$') {
8393 redisLog(REDIS_WARNING
,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8396 dumpsize
= strtol(buf
+1,NULL
,10);
8397 redisLog(REDIS_NOTICE
,"Receiving %ld bytes data dump from MASTER",dumpsize
);
8398 /* Read the bulk write data on a temp file */
8400 snprintf(tmpfile
,256,
8401 "temp-%d.%ld.rdb",(int)time(NULL
),(long int)getpid());
8402 dfd
= open(tmpfile
,O_CREAT
|O_WRONLY
|O_EXCL
,0644);
8403 if (dfd
!= -1) break;
8408 redisLog(REDIS_WARNING
,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno
));
8412 int nread
, nwritten
;
8414 nread
= read(fd
,buf
,(dumpsize
< 1024)?dumpsize
:1024);
8416 redisLog(REDIS_WARNING
,"I/O error trying to sync with MASTER: %s",
8422 nwritten
= write(dfd
,buf
,nread
);
8423 if (nwritten
== -1) {
8424 redisLog(REDIS_WARNING
,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno
));
8432 if (rename(tmpfile
,server
.dbfilename
) == -1) {
8433 redisLog(REDIS_WARNING
,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno
));
8439 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
8440 redisLog(REDIS_WARNING
,"Failed trying to load the MASTER synchronization DB from disk");
8444 server
.master
= createClient(fd
);
8445 server
.master
->flags
|= REDIS_MASTER
;
8446 server
.master
->authenticated
= 1;
8447 server
.replstate
= REDIS_REPL_CONNECTED
;
8451 static void slaveofCommand(redisClient
*c
) {
8452 if (!strcasecmp(c
->argv
[1]->ptr
,"no") &&
8453 !strcasecmp(c
->argv
[2]->ptr
,"one")) {
8454 if (server
.masterhost
) {
8455 sdsfree(server
.masterhost
);
8456 server
.masterhost
= NULL
;
8457 if (server
.master
) freeClient(server
.master
);
8458 server
.replstate
= REDIS_REPL_NONE
;
8459 redisLog(REDIS_NOTICE
,"MASTER MODE enabled (user request)");
8462 sdsfree(server
.masterhost
);
8463 server
.masterhost
= sdsdup(c
->argv
[1]->ptr
);
8464 server
.masterport
= atoi(c
->argv
[2]->ptr
);
8465 if (server
.master
) freeClient(server
.master
);
8466 server
.replstate
= REDIS_REPL_CONNECT
;
8467 redisLog(REDIS_NOTICE
,"SLAVE OF %s:%d enabled (user request)",
8468 server
.masterhost
, server
.masterport
);
8470 addReply(c
,shared
.ok
);
8473 /* ============================ Maxmemory directive ======================== */
8475 /* Try to free one object form the pre-allocated objects free list.
8476 * This is useful under low mem conditions as by default we take 1 million
8477 * free objects allocated. On success REDIS_OK is returned, otherwise
8479 static int tryFreeOneObjectFromFreelist(void) {
8482 if (server
.vm_enabled
) pthread_mutex_lock(&server
.obj_freelist_mutex
);
8483 if (listLength(server
.objfreelist
)) {
8484 listNode
*head
= listFirst(server
.objfreelist
);
8485 o
= listNodeValue(head
);
8486 listDelNode(server
.objfreelist
,head
);
8487 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8491 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.obj_freelist_mutex
);
8496 /* This function gets called when 'maxmemory' is set on the config file to limit
8497 * the max memory used by the server, and we are out of memory.
8498 * This function will try to, in order:
8500 * - Free objects from the free list
8501 * - Try to remove keys with an EXPIRE set
8503 * It is not possible to free enough memory to reach used-memory < maxmemory
8504 * the server will start refusing commands that will enlarge even more the
8507 static void freeMemoryIfNeeded(void) {
8508 while (server
.maxmemory
&& zmalloc_used_memory() > server
.maxmemory
) {
8509 int j
, k
, freed
= 0;
8511 if (tryFreeOneObjectFromFreelist() == REDIS_OK
) continue;
8512 for (j
= 0; j
< server
.dbnum
; j
++) {
8514 robj
*minkey
= NULL
;
8515 struct dictEntry
*de
;
8517 if (dictSize(server
.db
[j
].expires
)) {
8519 /* From a sample of three keys drop the one nearest to
8520 * the natural expire */
8521 for (k
= 0; k
< 3; k
++) {
8524 de
= dictGetRandomKey(server
.db
[j
].expires
);
8525 t
= (time_t) dictGetEntryVal(de
);
8526 if (minttl
== -1 || t
< minttl
) {
8527 minkey
= dictGetEntryKey(de
);
8531 dbDelete(server
.db
+j
,minkey
);
8534 if (!freed
) return; /* nothing to free... */
8538 /* ============================== Append Only file ========================== */
8540 /* Called when the user switches from "appendonly yes" to "appendonly no"
8541 * at runtime using the CONFIG command. */
8542 static void stopAppendOnly(void) {
8543 flushAppendOnlyFile();
8544 aof_fsync(server
.appendfd
);
8545 close(server
.appendfd
);
8547 server
.appendfd
= -1;
8548 server
.appendseldb
= -1;
8549 server
.appendonly
= 0;
8550 /* rewrite operation in progress? kill it, wait child exit */
8551 if (server
.bgsavechildpid
!= -1) {
8554 if (kill(server
.bgsavechildpid
,SIGKILL
) != -1)
8555 wait3(&statloc
,0,NULL
);
8556 /* reset the buffer accumulating changes while the child saves */
8557 sdsfree(server
.bgrewritebuf
);
8558 server
.bgrewritebuf
= sdsempty();
8559 server
.bgsavechildpid
= -1;
8563 /* Called when the user switches from "appendonly no" to "appendonly yes"
8564 * at runtime using the CONFIG command. */
8565 static int startAppendOnly(void) {
8566 server
.appendonly
= 1;
8567 server
.lastfsync
= time(NULL
);
8568 server
.appendfd
= open(server
.appendfilename
,O_WRONLY
|O_APPEND
|O_CREAT
,0644);
8569 if (server
.appendfd
== -1) {
8570 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno
));
8573 if (rewriteAppendOnlyFileBackground() == REDIS_ERR
) {
8574 server
.appendonly
= 0;
8575 close(server
.appendfd
);
8576 redisLog(REDIS_WARNING
,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno
));
8582 /* Write the append only file buffer on disk.
8584 * Since we are required to write the AOF before replying to the client,
8585 * and the only way the client socket can get a write is entering when the
8586 * the event loop, we accumulate all the AOF writes in a memory
8587 * buffer and write it on disk using this function just before entering
8588 * the event loop again. */
8589 static void flushAppendOnlyFile(void) {
8593 if (sdslen(server
.aofbuf
) == 0) return;
8595 /* We want to perform a single write. This should be guaranteed atomic
8596 * at least if the filesystem we are writing is a real physical one.
8597 * While this will save us against the server being killed I don't think
8598 * there is much to do about the whole server stopping for power problems
8600 nwritten
= write(server
.appendfd
,server
.aofbuf
,sdslen(server
.aofbuf
));
8601 if (nwritten
!= (signed)sdslen(server
.aofbuf
)) {
8602 /* Ooops, we are in troubles. The best thing to do for now is
8603 * aborting instead of giving the illusion that everything is
8604 * working as expected. */
8605 if (nwritten
== -1) {
8606 redisLog(REDIS_WARNING
,"Exiting on error writing to the append-only file: %s",strerror(errno
));
8608 redisLog(REDIS_WARNING
,"Exiting on short write while writing to the append-only file: %s",strerror(errno
));
8612 sdsfree(server
.aofbuf
);
8613 server
.aofbuf
= sdsempty();
8615 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8616 * childs performing heavy I/O on disk. */
8617 if (server
.no_appendfsync_on_rewrite
&&
8618 (server
.bgrewritechildpid
!= -1 || server
.bgsavechildpid
!= -1))
8620 /* Fsync if needed */
8622 if (server
.appendfsync
== APPENDFSYNC_ALWAYS
||
8623 (server
.appendfsync
== APPENDFSYNC_EVERYSEC
&&
8624 now
-server
.lastfsync
> 1))
8626 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8627 * flushing metadata. */
8628 aof_fsync(server
.appendfd
); /* Let's try to get this data on the disk */
8629 server
.lastfsync
= now
;
8633 static sds
catAppendOnlyGenericCommand(sds buf
, int argc
, robj
**argv
) {
8635 buf
= sdscatprintf(buf
,"*%d\r\n",argc
);
8636 for (j
= 0; j
< argc
; j
++) {
8637 robj
*o
= getDecodedObject(argv
[j
]);
8638 buf
= sdscatprintf(buf
,"$%lu\r\n",(unsigned long)sdslen(o
->ptr
));
8639 buf
= sdscatlen(buf
,o
->ptr
,sdslen(o
->ptr
));
8640 buf
= sdscatlen(buf
,"\r\n",2);
8646 static sds
catAppendOnlyExpireAtCommand(sds buf
, robj
*key
, robj
*seconds
) {
8651 /* Make sure we can use strtol */
8652 seconds
= getDecodedObject(seconds
);
8653 when
= time(NULL
)+strtol(seconds
->ptr
,NULL
,10);
8654 decrRefCount(seconds
);
8656 argv
[0] = createStringObject("EXPIREAT",8);
8658 argv
[2] = createObject(REDIS_STRING
,
8659 sdscatprintf(sdsempty(),"%ld",when
));
8660 buf
= catAppendOnlyGenericCommand(buf
, argc
, argv
);
8661 decrRefCount(argv
[0]);
8662 decrRefCount(argv
[2]);
8666 static void feedAppendOnlyFile(struct redisCommand
*cmd
, int dictid
, robj
**argv
, int argc
) {
8667 sds buf
= sdsempty();
8670 /* The DB this command was targetting is not the same as the last command
8671 * we appendend. To issue a SELECT command is needed. */
8672 if (dictid
!= server
.appendseldb
) {
8675 snprintf(seldb
,sizeof(seldb
),"%d",dictid
);
8676 buf
= sdscatprintf(buf
,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
8677 (unsigned long)strlen(seldb
),seldb
);
8678 server
.appendseldb
= dictid
;
8681 if (cmd
->proc
== expireCommand
) {
8682 /* Translate EXPIRE into EXPIREAT */
8683 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8684 } else if (cmd
->proc
== setexCommand
) {
8685 /* Translate SETEX to SET and EXPIREAT */
8686 tmpargv
[0] = createStringObject("SET",3);
8687 tmpargv
[1] = argv
[1];
8688 tmpargv
[2] = argv
[3];
8689 buf
= catAppendOnlyGenericCommand(buf
,3,tmpargv
);
8690 decrRefCount(tmpargv
[0]);
8691 buf
= catAppendOnlyExpireAtCommand(buf
,argv
[1],argv
[2]);
8693 buf
= catAppendOnlyGenericCommand(buf
,argc
,argv
);
8696 /* Append to the AOF buffer. This will be flushed on disk just before
8697 * of re-entering the event loop, so before the client will get a
8698 * positive reply about the operation performed. */
8699 server
.aofbuf
= sdscatlen(server
.aofbuf
,buf
,sdslen(buf
));
8701 /* If a background append only file rewriting is in progress we want to
8702 * accumulate the differences between the child DB and the current one
8703 * in a buffer, so that when the child process will do its work we
8704 * can append the differences to the new append only file. */
8705 if (server
.bgrewritechildpid
!= -1)
8706 server
.bgrewritebuf
= sdscatlen(server
.bgrewritebuf
,buf
,sdslen(buf
));
8711 /* In Redis commands are always executed in the context of a client, so in
8712 * order to load the append only file we need to create a fake client. */
8713 static struct redisClient
*createFakeClient(void) {
8714 struct redisClient
*c
= zmalloc(sizeof(*c
));
8718 c
->querybuf
= sdsempty();
8722 /* We set the fake client as a slave waiting for the synchronization
8723 * so that Redis will not try to send replies to this client. */
8724 c
->replstate
= REDIS_REPL_WAIT_BGSAVE_START
;
8725 c
->reply
= listCreate();
8726 listSetFreeMethod(c
->reply
,decrRefCount
);
8727 listSetDupMethod(c
->reply
,dupClientReplyValue
);
8728 initClientMultiState(c
);
8732 static void freeFakeClient(struct redisClient
*c
) {
8733 sdsfree(c
->querybuf
);
8734 listRelease(c
->reply
);
8735 freeClientMultiState(c
);
8739 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
8740 * error (the append only file is zero-length) REDIS_ERR is returned. On
8741 * fatal error an error message is logged and the program exists. */
8742 int loadAppendOnlyFile(char *filename
) {
8743 struct redisClient
*fakeClient
;
8744 FILE *fp
= fopen(filename
,"r");
8745 struct redis_stat sb
;
8746 int appendonly
= server
.appendonly
;
8748 if (redis_fstat(fileno(fp
),&sb
) != -1 && sb
.st_size
== 0)
8752 redisLog(REDIS_WARNING
,"Fatal error: can't open the append log file for reading: %s",strerror(errno
));
8756 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8757 * to the same file we're about to read. */
8758 server
.appendonly
= 0;
8760 fakeClient
= createFakeClient();
8767 struct redisCommand
*cmd
;
8770 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) {
8776 if (buf
[0] != '*') goto fmterr
;
8778 argv
= zmalloc(sizeof(robj
*)*argc
);
8779 for (j
= 0; j
< argc
; j
++) {
8780 if (fgets(buf
,sizeof(buf
),fp
) == NULL
) goto readerr
;
8781 if (buf
[0] != '$') goto fmterr
;
8782 len
= strtol(buf
+1,NULL
,10);
8783 argsds
= sdsnewlen(NULL
,len
);
8784 if (len
&& fread(argsds
,len
,1,fp
) == 0) goto fmterr
;
8785 argv
[j
] = createObject(REDIS_STRING
,argsds
);
8786 if (fread(buf
,2,1,fp
) == 0) goto fmterr
; /* discard CRLF */
8789 /* Command lookup */
8790 cmd
= lookupCommand(argv
[0]->ptr
);
8792 redisLog(REDIS_WARNING
,"Unknown command '%s' reading the append only file", argv
[0]->ptr
);
8795 /* Try object encoding */
8796 if (cmd
->flags
& REDIS_CMD_BULK
)
8797 argv
[argc
-1] = tryObjectEncoding(argv
[argc
-1]);
8798 /* Run the command in the context of a fake client */
8799 fakeClient
->argc
= argc
;
8800 fakeClient
->argv
= argv
;
8801 cmd
->proc(fakeClient
);
8802 /* Discard the reply objects list from the fake client */
8803 while(listLength(fakeClient
->reply
))
8804 listDelNode(fakeClient
->reply
,listFirst(fakeClient
->reply
));
8805 /* Clean up, ready for the next command */
8806 for (j
= 0; j
< argc
; j
++) decrRefCount(argv
[j
]);
8808 /* Handle swapping while loading big datasets when VM is on */
8810 if ((zmalloc_used_memory() - server
.vm_max_memory
) > 1024*1024*32)
8813 if (server
.vm_enabled
&& force_swapout
) {
8814 while (zmalloc_used_memory() > server
.vm_max_memory
) {
8815 if (vmSwapOneObjectBlocking() == REDIS_ERR
) break;
8820 /* This point can only be reached when EOF is reached without errors.
8821 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8822 if (fakeClient
->flags
& REDIS_MULTI
) goto readerr
;
8825 freeFakeClient(fakeClient
);
8826 server
.appendonly
= appendonly
;
8831 redisLog(REDIS_WARNING
,"Unexpected end of file reading the append only file");
8833 redisLog(REDIS_WARNING
,"Unrecoverable error reading the append only file: %s", strerror(errno
));
8837 redisLog(REDIS_WARNING
,"Bad file format reading the append only file");
8841 /* Write binary-safe string into a file in the bulkformat
8842 * $<count>\r\n<payload>\r\n */
8843 static int fwriteBulkString(FILE *fp
, char *s
, unsigned long len
) {
8847 clen
= 1+ll2string(cbuf
+1,sizeof(cbuf
)-1,len
);
8848 cbuf
[clen
++] = '\r';
8849 cbuf
[clen
++] = '\n';
8850 if (fwrite(cbuf
,clen
,1,fp
) == 0) return 0;
8851 if (len
> 0 && fwrite(s
,len
,1,fp
) == 0) return 0;
8852 if (fwrite("\r\n",2,1,fp
) == 0) return 0;
8856 /* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8857 static int fwriteBulkDouble(FILE *fp
, double d
) {
8858 char buf
[128], dbuf
[128];
8860 snprintf(dbuf
,sizeof(dbuf
),"%.17g\r\n",d
);
8861 snprintf(buf
,sizeof(buf
),"$%lu\r\n",(unsigned long)strlen(dbuf
)-2);
8862 if (fwrite(buf
,strlen(buf
),1,fp
) == 0) return 0;
8863 if (fwrite(dbuf
,strlen(dbuf
),1,fp
) == 0) return 0;
8867 /* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8868 static int fwriteBulkLongLong(FILE *fp
, long long l
) {
8869 char bbuf
[128], lbuf
[128];
8870 unsigned int blen
, llen
;
8871 llen
= ll2string(lbuf
,32,l
);
8872 blen
= snprintf(bbuf
,sizeof(bbuf
),"$%u\r\n%s\r\n",llen
,lbuf
);
8873 if (fwrite(bbuf
,blen
,1,fp
) == 0) return 0;
8877 /* Delegate writing an object to writing a bulk string or bulk long long. */
8878 static int fwriteBulkObject(FILE *fp
, robj
*obj
) {
8879 /* Avoid using getDecodedObject to help copy-on-write (we are often
8880 * in a child process when this function is called). */
8881 if (obj
->encoding
== REDIS_ENCODING_INT
) {
8882 return fwriteBulkLongLong(fp
,(long)obj
->ptr
);
8883 } else if (obj
->encoding
== REDIS_ENCODING_RAW
) {
8884 return fwriteBulkString(fp
,obj
->ptr
,sdslen(obj
->ptr
));
8886 redisPanic("Unknown string encoding");
8890 /* Write a sequence of commands able to fully rebuild the dataset into
8891 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8892 static int rewriteAppendOnlyFile(char *filename
) {
8893 dictIterator
*di
= NULL
;
8898 time_t now
= time(NULL
);
8900 /* Note that we have to use a different temp name here compared to the
8901 * one used by rewriteAppendOnlyFileBackground() function. */
8902 snprintf(tmpfile
,256,"temp-rewriteaof-%d.aof", (int) getpid());
8903 fp
= fopen(tmpfile
,"w");
8905 redisLog(REDIS_WARNING
, "Failed rewriting the append only file: %s", strerror(errno
));
8908 for (j
= 0; j
< server
.dbnum
; j
++) {
8909 char selectcmd
[] = "*2\r\n$6\r\nSELECT\r\n";
8910 redisDb
*db
= server
.db
+j
;
8912 if (dictSize(d
) == 0) continue;
8913 di
= dictGetIterator(d
);
8919 /* SELECT the new DB */
8920 if (fwrite(selectcmd
,sizeof(selectcmd
)-1,1,fp
) == 0) goto werr
;
8921 if (fwriteBulkLongLong(fp
,j
) == 0) goto werr
;
8923 /* Iterate this DB writing every entry */
8924 while((de
= dictNext(di
)) != NULL
) {
8925 sds keystr
= dictGetEntryKey(de
);
8930 keystr
= dictGetEntryKey(de
);
8931 o
= dictGetEntryVal(de
);
8932 initStaticStringObject(key
,keystr
);
8933 /* If the value for this key is swapped, load a preview in memory.
8934 * We use a "swapped" flag to remember if we need to free the
8935 * value object instead to just increment the ref count anyway
8936 * in order to avoid copy-on-write of pages if we are forked() */
8937 if (!server
.vm_enabled
|| o
->storage
== REDIS_VM_MEMORY
||
8938 o
->storage
== REDIS_VM_SWAPPING
) {
8941 o
= vmPreviewObject(o
);
8944 expiretime
= getExpire(db
,&key
);
8946 /* Save the key and associated value */
8947 if (o
->type
== REDIS_STRING
) {
8948 /* Emit a SET command */
8949 char cmd
[]="*3\r\n$3\r\nSET\r\n";
8950 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8952 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8953 if (fwriteBulkObject(fp
,o
) == 0) goto werr
;
8954 } else if (o
->type
== REDIS_LIST
) {
8955 /* Emit the RPUSHes needed to rebuild the list */
8956 char cmd
[]="*3\r\n$5\r\nRPUSH\r\n";
8957 if (o
->encoding
== REDIS_ENCODING_ZIPLIST
) {
8958 unsigned char *zl
= o
->ptr
;
8959 unsigned char *p
= ziplistIndex(zl
,0);
8960 unsigned char *vstr
;
8964 while(ziplistGet(p
,&vstr
,&vlen
,&vlong
)) {
8965 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8966 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8968 if (fwriteBulkString(fp
,(char*)vstr
,vlen
) == 0)
8971 if (fwriteBulkLongLong(fp
,vlong
) == 0)
8974 p
= ziplistNext(zl
,p
);
8976 } else if (o
->encoding
== REDIS_ENCODING_LIST
) {
8977 list
*list
= o
->ptr
;
8981 listRewind(list
,&li
);
8982 while((ln
= listNext(&li
))) {
8983 robj
*eleobj
= listNodeValue(ln
);
8985 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
8986 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
8987 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
8990 redisPanic("Unknown list encoding");
8992 } else if (o
->type
== REDIS_SET
) {
8993 /* Emit the SADDs needed to rebuild the set */
8995 dictIterator
*di
= dictGetIterator(set
);
8998 while((de
= dictNext(di
)) != NULL
) {
8999 char cmd
[]="*3\r\n$4\r\nSADD\r\n";
9000 robj
*eleobj
= dictGetEntryKey(de
);
9002 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9003 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9004 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9006 dictReleaseIterator(di
);
9007 } else if (o
->type
== REDIS_ZSET
) {
9008 /* Emit the ZADDs needed to rebuild the sorted set */
9010 dictIterator
*di
= dictGetIterator(zs
->dict
);
9013 while((de
= dictNext(di
)) != NULL
) {
9014 char cmd
[]="*4\r\n$4\r\nZADD\r\n";
9015 robj
*eleobj
= dictGetEntryKey(de
);
9016 double *score
= dictGetEntryVal(de
);
9018 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9019 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9020 if (fwriteBulkDouble(fp
,*score
) == 0) goto werr
;
9021 if (fwriteBulkObject(fp
,eleobj
) == 0) goto werr
;
9023 dictReleaseIterator(di
);
9024 } else if (o
->type
== REDIS_HASH
) {
9025 char cmd
[]="*4\r\n$4\r\nHSET\r\n";
9027 /* Emit the HSETs needed to rebuild the hash */
9028 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9029 unsigned char *p
= zipmapRewind(o
->ptr
);
9030 unsigned char *field
, *val
;
9031 unsigned int flen
, vlen
;
9033 while((p
= zipmapNext(p
,&field
,&flen
,&val
,&vlen
)) != NULL
) {
9034 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9035 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9036 if (fwriteBulkString(fp
,(char*)field
,flen
) == -1)
9038 if (fwriteBulkString(fp
,(char*)val
,vlen
) == -1)
9042 dictIterator
*di
= dictGetIterator(o
->ptr
);
9045 while((de
= dictNext(di
)) != NULL
) {
9046 robj
*field
= dictGetEntryKey(de
);
9047 robj
*val
= dictGetEntryVal(de
);
9049 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9050 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9051 if (fwriteBulkObject(fp
,field
) == -1) return -1;
9052 if (fwriteBulkObject(fp
,val
) == -1) return -1;
9054 dictReleaseIterator(di
);
9057 redisPanic("Unknown object type");
9059 /* Save the expire time */
9060 if (expiretime
!= -1) {
9061 char cmd
[]="*3\r\n$8\r\nEXPIREAT\r\n";
9062 /* If this key is already expired skip it */
9063 if (expiretime
< now
) continue;
9064 if (fwrite(cmd
,sizeof(cmd
)-1,1,fp
) == 0) goto werr
;
9065 if (fwriteBulkObject(fp
,&key
) == 0) goto werr
;
9066 if (fwriteBulkLongLong(fp
,expiretime
) == 0) goto werr
;
9068 if (swapped
) decrRefCount(o
);
9070 dictReleaseIterator(di
);
9073 /* Make sure data will not remain on the OS's output buffers */
9075 aof_fsync(fileno(fp
));
9078 /* Use RENAME to make sure the DB file is changed atomically only
9079 * if the generate DB file is ok. */
9080 if (rename(tmpfile
,filename
) == -1) {
9081 redisLog(REDIS_WARNING
,"Error moving temp append only file on the final destination: %s", strerror(errno
));
9085 redisLog(REDIS_NOTICE
,"SYNC append only file rewrite performed");
9091 redisLog(REDIS_WARNING
,"Write error writing append only file on disk: %s", strerror(errno
));
9092 if (di
) dictReleaseIterator(di
);
9096 /* This is how rewriting of the append only file in background works:
9098 * 1) The user calls BGREWRITEAOF
9099 * 2) Redis calls this function, that forks():
9100 * 2a) the child rewrite the append only file in a temp file.
9101 * 2b) the parent accumulates differences in server.bgrewritebuf.
9102 * 3) When the child finished '2a' exists.
9103 * 4) The parent will trap the exit code, if it's OK, will append the
9104 * data accumulated into server.bgrewritebuf into the temp file, and
9105 * finally will rename(2) the temp file in the actual file name.
9106 * The the new file is reopened as the new append only file. Profit!
9108 static int rewriteAppendOnlyFileBackground(void) {
9111 if (server
.bgrewritechildpid
!= -1) return REDIS_ERR
;
9112 if (server
.vm_enabled
) waitEmptyIOJobsQueue();
9113 if ((childpid
= fork()) == 0) {
9117 if (server
.vm_enabled
) vmReopenSwapFile();
9119 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
9120 if (rewriteAppendOnlyFile(tmpfile
) == REDIS_OK
) {
9127 if (childpid
== -1) {
9128 redisLog(REDIS_WARNING
,
9129 "Can't rewrite append only file in background: fork: %s",
9133 redisLog(REDIS_NOTICE
,
9134 "Background append only file rewriting started by pid %d",childpid
);
9135 server
.bgrewritechildpid
= childpid
;
9136 updateDictResizePolicy();
9137 /* We set appendseldb to -1 in order to force the next call to the
9138 * feedAppendOnlyFile() to issue a SELECT command, so the differences
9139 * accumulated by the parent into server.bgrewritebuf will start
9140 * with a SELECT statement and it will be safe to merge. */
9141 server
.appendseldb
= -1;
9144 return REDIS_OK
; /* unreached */
9147 static void bgrewriteaofCommand(redisClient
*c
) {
9148 if (server
.bgrewritechildpid
!= -1) {
9149 addReplySds(c
,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
9152 if (rewriteAppendOnlyFileBackground() == REDIS_OK
) {
9153 char *status
= "+Background append only file rewriting started\r\n";
9154 addReplySds(c
,sdsnew(status
));
9156 addReply(c
,shared
.err
);
9160 static void aofRemoveTempFile(pid_t childpid
) {
9163 snprintf(tmpfile
,256,"temp-rewriteaof-bg-%d.aof", (int) childpid
);
9167 /* Virtual Memory is composed mainly of two subsystems:
9168 * - Blocking Virutal Memory
9169 * - Threaded Virtual Memory I/O
9170 * The two parts are not fully decoupled, but functions are split among two
9171 * different sections of the source code (delimited by comments) in order to
9172 * make more clear what functionality is about the blocking VM and what about
9173 * the threaded (not blocking) VM.
9177 * Redis VM is a blocking VM (one that blocks reading swapped values from
9178 * disk into memory when a value swapped out is needed in memory) that is made
9179 * unblocking by trying to examine the command argument vector in order to
9180 * load in background values that will likely be needed in order to exec
9181 * the command. The command is executed only once all the relevant keys
9182 * are loaded into memory.
9184 * This basically is almost as simple of a blocking VM, but almost as parallel
9185 * as a fully non-blocking VM.
9188 /* =================== Virtual Memory - Blocking Side ====================== */
9190 /* Create a VM pointer object. This kind of objects are used in place of
9191 * values in the key -> value hash table, for swapped out objects. */
9192 static vmpointer
*createVmPointer(int vtype
) {
9193 vmpointer
*vp
= zmalloc(sizeof(vmpointer
));
9195 vp
->type
= REDIS_VMPOINTER
;
9196 vp
->storage
= REDIS_VM_SWAPPED
;
9201 static void vmInit(void) {
9207 if (server
.vm_max_threads
!= 0)
9208 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
9210 redisLog(REDIS_NOTICE
,"Using '%s' as swap file",server
.vm_swap_file
);
9211 /* Try to open the old swap file, otherwise create it */
9212 if ((server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b")) == NULL
) {
9213 server
.vm_fp
= fopen(server
.vm_swap_file
,"w+b");
9215 if (server
.vm_fp
== NULL
) {
9216 redisLog(REDIS_WARNING
,
9217 "Can't open the swap file: %s. Exiting.",
9221 server
.vm_fd
= fileno(server
.vm_fp
);
9222 /* Lock the swap file for writing, this is useful in order to avoid
9223 * another instance to use the same swap file for a config error. */
9224 fl
.l_type
= F_WRLCK
;
9225 fl
.l_whence
= SEEK_SET
;
9226 fl
.l_start
= fl
.l_len
= 0;
9227 if (fcntl(server
.vm_fd
,F_SETLK
,&fl
) == -1) {
9228 redisLog(REDIS_WARNING
,
9229 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server
.vm_swap_file
, strerror(errno
));
9233 server
.vm_next_page
= 0;
9234 server
.vm_near_pages
= 0;
9235 server
.vm_stats_used_pages
= 0;
9236 server
.vm_stats_swapped_objects
= 0;
9237 server
.vm_stats_swapouts
= 0;
9238 server
.vm_stats_swapins
= 0;
9239 totsize
= server
.vm_pages
*server
.vm_page_size
;
9240 redisLog(REDIS_NOTICE
,"Allocating %lld bytes of swap file",totsize
);
9241 if (ftruncate(server
.vm_fd
,totsize
) == -1) {
9242 redisLog(REDIS_WARNING
,"Can't ftruncate swap file: %s. Exiting.",
9246 redisLog(REDIS_NOTICE
,"Swap file allocated with success");
9248 server
.vm_bitmap
= zmalloc((server
.vm_pages
+7)/8);
9249 redisLog(REDIS_VERBOSE
,"Allocated %lld bytes page table for %lld pages",
9250 (long long) (server
.vm_pages
+7)/8, server
.vm_pages
);
9251 memset(server
.vm_bitmap
,0,(server
.vm_pages
+7)/8);
9253 /* Initialize threaded I/O (used by Virtual Memory) */
9254 server
.io_newjobs
= listCreate();
9255 server
.io_processing
= listCreate();
9256 server
.io_processed
= listCreate();
9257 server
.io_ready_clients
= listCreate();
9258 pthread_mutex_init(&server
.io_mutex
,NULL
);
9259 pthread_mutex_init(&server
.obj_freelist_mutex
,NULL
);
9260 pthread_mutex_init(&server
.io_swapfile_mutex
,NULL
);
9261 server
.io_active_threads
= 0;
9262 if (pipe(pipefds
) == -1) {
9263 redisLog(REDIS_WARNING
,"Unable to intialized VM: pipe(2): %s. Exiting."
9267 server
.io_ready_pipe_read
= pipefds
[0];
9268 server
.io_ready_pipe_write
= pipefds
[1];
9269 redisAssert(anetNonBlock(NULL
,server
.io_ready_pipe_read
) != ANET_ERR
);
9270 /* LZF requires a lot of stack */
9271 pthread_attr_init(&server
.io_threads_attr
);
9272 pthread_attr_getstacksize(&server
.io_threads_attr
, &stacksize
);
9273 while (stacksize
< REDIS_THREAD_STACK_SIZE
) stacksize
*= 2;
9274 pthread_attr_setstacksize(&server
.io_threads_attr
, stacksize
);
9275 /* Listen for events in the threaded I/O pipe */
9276 if (aeCreateFileEvent(server
.el
, server
.io_ready_pipe_read
, AE_READABLE
,
9277 vmThreadedIOCompletedJob
, NULL
) == AE_ERR
)
9278 oom("creating file event");
9281 /* Mark the page as used */
9282 static void vmMarkPageUsed(off_t page
) {
9283 off_t byte
= page
/8;
9285 redisAssert(vmFreePage(page
) == 1);
9286 server
.vm_bitmap
[byte
] |= 1<<bit
;
9289 /* Mark N contiguous pages as used, with 'page' being the first. */
9290 static void vmMarkPagesUsed(off_t page
, off_t count
) {
9293 for (j
= 0; j
< count
; j
++)
9294 vmMarkPageUsed(page
+j
);
9295 server
.vm_stats_used_pages
+= count
;
9296 redisLog(REDIS_DEBUG
,"Mark USED pages: %lld pages at %lld\n",
9297 (long long)count
, (long long)page
);
9300 /* Mark the page as free */
9301 static void vmMarkPageFree(off_t page
) {
9302 off_t byte
= page
/8;
9304 redisAssert(vmFreePage(page
) == 0);
9305 server
.vm_bitmap
[byte
] &= ~(1<<bit
);
9308 /* Mark N contiguous pages as free, with 'page' being the first. */
9309 static void vmMarkPagesFree(off_t page
, off_t count
) {
9312 for (j
= 0; j
< count
; j
++)
9313 vmMarkPageFree(page
+j
);
9314 server
.vm_stats_used_pages
-= count
;
9315 redisLog(REDIS_DEBUG
,"Mark FREE pages: %lld pages at %lld\n",
9316 (long long)count
, (long long)page
);
9319 /* Test if the page is free */
9320 static int vmFreePage(off_t page
) {
9321 off_t byte
= page
/8;
9323 return (server
.vm_bitmap
[byte
] & (1<<bit
)) == 0;
9326 /* Find N contiguous free pages storing the first page of the cluster in *first.
9327 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
9328 * REDIS_ERR is returned.
9330 * This function uses a simple algorithm: we try to allocate
9331 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9332 * again from the start of the swap file searching for free spaces.
9334 * If it looks pretty clear that there are no free pages near our offset
9335 * we try to find less populated places doing a forward jump of
9336 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9337 * without hurry, and then we jump again and so forth...
9339 * This function can be improved using a free list to avoid to guess
9340 * too much, since we could collect data about freed pages.
9342 * note: I implemented this function just after watching an episode of
9343 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9345 static int vmFindContiguousPages(off_t
*first
, off_t n
) {
9346 off_t base
, offset
= 0, since_jump
= 0, numfree
= 0;
9348 if (server
.vm_near_pages
== REDIS_VM_MAX_NEAR_PAGES
) {
9349 server
.vm_near_pages
= 0;
9350 server
.vm_next_page
= 0;
9352 server
.vm_near_pages
++; /* Yet another try for pages near to the old ones */
9353 base
= server
.vm_next_page
;
9355 while(offset
< server
.vm_pages
) {
9356 off_t
this = base
+offset
;
9358 /* If we overflow, restart from page zero */
9359 if (this >= server
.vm_pages
) {
9360 this -= server
.vm_pages
;
9362 /* Just overflowed, what we found on tail is no longer
9363 * interesting, as it's no longer contiguous. */
9367 if (vmFreePage(this)) {
9368 /* This is a free page */
9370 /* Already got N free pages? Return to the caller, with success */
9372 *first
= this-(n
-1);
9373 server
.vm_next_page
= this+1;
9374 redisLog(REDIS_DEBUG
, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n
, (long long) *first
);
9378 /* The current one is not a free page */
9382 /* Fast-forward if the current page is not free and we already
9383 * searched enough near this place. */
9385 if (!numfree
&& since_jump
>= REDIS_VM_MAX_RANDOM_JUMP
/4) {
9386 offset
+= random() % REDIS_VM_MAX_RANDOM_JUMP
;
9388 /* Note that even if we rewind after the jump, we are don't need
9389 * to make sure numfree is set to zero as we only jump *if* it
9390 * is set to zero. */
9392 /* Otherwise just check the next page */
9399 /* Write the specified object at the specified page of the swap file */
9400 static int vmWriteObjectOnSwap(robj
*o
, off_t page
) {
9401 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9402 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9403 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9404 redisLog(REDIS_WARNING
,
9405 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
9409 rdbSaveObject(server
.vm_fp
,o
);
9410 fflush(server
.vm_fp
);
9411 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9415 /* Transfers the 'val' object to disk. Store all the information
9416 * a 'vmpointer' object containing all the information needed to load the
9417 * object back later is returned.
9419 * If we can't find enough contiguous empty pages to swap the object on disk
9420 * NULL is returned. */
9421 static vmpointer
*vmSwapObjectBlocking(robj
*val
) {
9422 off_t pages
= rdbSavedObjectPages(val
,NULL
);
9426 assert(val
->storage
== REDIS_VM_MEMORY
);
9427 assert(val
->refcount
== 1);
9428 if (vmFindContiguousPages(&page
,pages
) == REDIS_ERR
) return NULL
;
9429 if (vmWriteObjectOnSwap(val
,page
) == REDIS_ERR
) return NULL
;
9431 vp
= createVmPointer(val
->type
);
9433 vp
->usedpages
= pages
;
9434 decrRefCount(val
); /* Deallocate the object from memory. */
9435 vmMarkPagesUsed(page
,pages
);
9436 redisLog(REDIS_DEBUG
,"VM: object %p swapped out at %lld (%lld pages)",
9438 (unsigned long long) page
, (unsigned long long) pages
);
9439 server
.vm_stats_swapped_objects
++;
9440 server
.vm_stats_swapouts
++;
9444 static robj
*vmReadObjectFromSwap(off_t page
, int type
) {
9447 if (server
.vm_enabled
) pthread_mutex_lock(&server
.io_swapfile_mutex
);
9448 if (fseeko(server
.vm_fp
,page
*server
.vm_page_size
,SEEK_SET
) == -1) {
9449 redisLog(REDIS_WARNING
,
9450 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
9454 o
= rdbLoadObject(type
,server
.vm_fp
);
9456 redisLog(REDIS_WARNING
, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno
));
9459 if (server
.vm_enabled
) pthread_mutex_unlock(&server
.io_swapfile_mutex
);
9463 /* Load the specified object from swap to memory.
9464 * The newly allocated object is returned.
9466 * If preview is true the unserialized object is returned to the caller but
9467 * the pages are not marked as freed, nor the vp object is freed. */
9468 static robj
*vmGenericLoadObject(vmpointer
*vp
, int preview
) {
9471 redisAssert(vp
->type
== REDIS_VMPOINTER
&&
9472 (vp
->storage
== REDIS_VM_SWAPPED
|| vp
->storage
== REDIS_VM_LOADING
));
9473 val
= vmReadObjectFromSwap(vp
->page
,vp
->vtype
);
9475 redisLog(REDIS_DEBUG
, "VM: object %p loaded from disk", (void*)vp
);
9476 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9478 server
.vm_stats_swapped_objects
--;
9480 redisLog(REDIS_DEBUG
, "VM: object %p previewed from disk", (void*)vp
);
9482 server
.vm_stats_swapins
++;
9486 /* Plain object loading, from swap to memory.
9488 * 'o' is actually a redisVmPointer structure that will be freed by the call.
9489 * The return value is the loaded object. */
9490 static robj
*vmLoadObject(robj
*o
) {
9491 /* If we are loading the object in background, stop it, we
9492 * need to load this object synchronously ASAP. */
9493 if (o
->storage
== REDIS_VM_LOADING
)
9494 vmCancelThreadedIOJob(o
);
9495 return vmGenericLoadObject((vmpointer
*)o
,0);
9498 /* Just load the value on disk, without to modify the key.
9499 * This is useful when we want to perform some operation on the value
9500 * without to really bring it from swap to memory, like while saving the
9501 * dataset or rewriting the append only log. */
9502 static robj
*vmPreviewObject(robj
*o
) {
9503 return vmGenericLoadObject((vmpointer
*)o
,1);
9506 /* How a good candidate is this object for swapping?
9507 * The better candidate it is, the greater the returned value.
9509 * Currently we try to perform a fast estimation of the object size in
9510 * memory, and combine it with aging informations.
9512 * Basically swappability = idle-time * log(estimated size)
9514 * Bigger objects are preferred over smaller objects, but not
9515 * proportionally, this is why we use the logarithm. This algorithm is
9516 * just a first try and will probably be tuned later. */
9517 static double computeObjectSwappability(robj
*o
) {
9518 /* actual age can be >= minage, but not < minage. As we use wrapping
9519 * 21 bit clocks with minutes resolution for the LRU. */
9520 time_t minage
= abs(server
.lruclock
- o
->lru
);
9524 struct dictEntry
*de
;
9527 if (minage
<= 0) return 0;
9530 if (o
->encoding
!= REDIS_ENCODING_RAW
) {
9533 asize
= sdslen(o
->ptr
)+sizeof(*o
)+sizeof(long)*2;
9538 listNode
*ln
= listFirst(l
);
9540 asize
= sizeof(list
);
9542 robj
*ele
= ln
->value
;
9545 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9546 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9547 asize
+= (sizeof(listNode
)+elesize
)*listLength(l
);
9552 z
= (o
->type
== REDIS_ZSET
);
9553 d
= z
? ((zset
*)o
->ptr
)->dict
: o
->ptr
;
9555 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9556 if (z
) asize
+= sizeof(zset
)-sizeof(dict
);
9561 de
= dictGetRandomKey(d
);
9562 ele
= dictGetEntryKey(de
);
9563 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9564 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9565 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9566 if (z
) asize
+= sizeof(zskiplistNode
)*dictSize(d
);
9570 if (o
->encoding
== REDIS_ENCODING_ZIPMAP
) {
9571 unsigned char *p
= zipmapRewind((unsigned char*)o
->ptr
);
9572 unsigned int len
= zipmapLen((unsigned char*)o
->ptr
);
9573 unsigned int klen
, vlen
;
9574 unsigned char *key
, *val
;
9576 if ((p
= zipmapNext(p
,&key
,&klen
,&val
,&vlen
)) == NULL
) {
9580 asize
= len
*(klen
+vlen
+3);
9581 } else if (o
->encoding
== REDIS_ENCODING_HT
) {
9583 asize
= sizeof(dict
)+(sizeof(struct dictEntry
*)*dictSlots(d
));
9588 de
= dictGetRandomKey(d
);
9589 ele
= dictGetEntryKey(de
);
9590 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9591 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9592 ele
= dictGetEntryVal(de
);
9593 elesize
= (ele
->encoding
== REDIS_ENCODING_RAW
) ?
9594 (sizeof(*o
)+sdslen(ele
->ptr
)) : sizeof(*o
);
9595 asize
+= (sizeof(struct dictEntry
)+elesize
)*dictSize(d
);
9600 return (double)minage
*log(1+asize
);
9603 /* Try to swap an object that's a good candidate for swapping.
9604 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
9605 * to swap any object at all.
9607 * If 'usethreaded' is true, Redis will try to swap the object in background
9608 * using I/O threads. */
9609 static int vmSwapOneObject(int usethreads
) {
9611 struct dictEntry
*best
= NULL
;
9612 double best_swappability
= 0;
9613 redisDb
*best_db
= NULL
;
9617 for (j
= 0; j
< server
.dbnum
; j
++) {
9618 redisDb
*db
= server
.db
+j
;
9619 /* Why maxtries is set to 100?
9620 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9621 * are swappable objects */
9624 if (dictSize(db
->dict
) == 0) continue;
9625 for (i
= 0; i
< 5; i
++) {
9627 double swappability
;
9629 if (maxtries
) maxtries
--;
9630 de
= dictGetRandomKey(db
->dict
);
9631 val
= dictGetEntryVal(de
);
9632 /* Only swap objects that are currently in memory.
9634 * Also don't swap shared objects: not a good idea in general and
9635 * we need to ensure that the main thread does not touch the
9636 * object while the I/O thread is using it, but we can't
9637 * control other keys without adding additional mutex. */
9638 if (val
->storage
!= REDIS_VM_MEMORY
|| val
->refcount
!= 1) {
9639 if (maxtries
) i
--; /* don't count this try */
9642 swappability
= computeObjectSwappability(val
);
9643 if (!best
|| swappability
> best_swappability
) {
9645 best_swappability
= swappability
;
9650 if (best
== NULL
) return REDIS_ERR
;
9651 key
= dictGetEntryKey(best
);
9652 val
= dictGetEntryVal(best
);
9654 redisLog(REDIS_DEBUG
,"Key with best swappability: %s, %f",
9655 key
, best_swappability
);
9659 robj
*keyobj
= createStringObject(key
,sdslen(key
));
9660 vmSwapObjectThreaded(keyobj
,val
,best_db
);
9661 decrRefCount(keyobj
);
9666 if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
9667 dictGetEntryVal(best
) = vp
;
9675 static int vmSwapOneObjectBlocking() {
9676 return vmSwapOneObject(0);
9679 static int vmSwapOneObjectThreaded() {
9680 return vmSwapOneObject(1);
9683 /* Return true if it's safe to swap out objects in a given moment.
9684 * Basically we don't want to swap objects out while there is a BGSAVE
9685 * or a BGAEOREWRITE running in backgroud. */
9686 static int vmCanSwapOut(void) {
9687 return (server
.bgsavechildpid
== -1 && server
.bgrewritechildpid
== -1);
9690 /* =================== Virtual Memory - Threaded I/O ======================= */
9692 static void freeIOJob(iojob
*j
) {
9693 if ((j
->type
== REDIS_IOJOB_PREPARE_SWAP
||
9694 j
->type
== REDIS_IOJOB_DO_SWAP
||
9695 j
->type
== REDIS_IOJOB_LOAD
) && j
->val
!= NULL
)
9697 /* we fix the storage type, otherwise decrRefCount() will try to
9698 * kill the I/O thread Job (that does no longer exists). */
9699 if (j
->val
->storage
== REDIS_VM_SWAPPING
)
9700 j
->val
->storage
= REDIS_VM_MEMORY
;
9701 decrRefCount(j
->val
);
9703 decrRefCount(j
->key
);
9707 /* Every time a thread finished a Job, it writes a byte into the write side
9708 * of an unix pipe in order to "awake" the main thread, and this function
9710 static void vmThreadedIOCompletedJob(aeEventLoop
*el
, int fd
, void *privdata
,
9714 int retval
, processed
= 0, toprocess
= -1, trytoswap
= 1;
9716 REDIS_NOTUSED(mask
);
9717 REDIS_NOTUSED(privdata
);
9719 /* For every byte we read in the read side of the pipe, there is one
9720 * I/O job completed to process. */
9721 while((retval
= read(fd
,buf
,1)) == 1) {
9724 struct dictEntry
*de
;
9726 redisLog(REDIS_DEBUG
,"Processing I/O completed job");
9728 /* Get the processed element (the oldest one) */
9730 assert(listLength(server
.io_processed
) != 0);
9731 if (toprocess
== -1) {
9732 toprocess
= (listLength(server
.io_processed
)*REDIS_MAX_COMPLETED_JOBS_PROCESSED
)/100;
9733 if (toprocess
<= 0) toprocess
= 1;
9735 ln
= listFirst(server
.io_processed
);
9737 listDelNode(server
.io_processed
,ln
);
9739 /* If this job is marked as canceled, just ignore it */
9744 /* Post process it in the main thread, as there are things we
9745 * can do just here to avoid race conditions and/or invasive locks */
9746 redisLog(REDIS_DEBUG
,"COMPLETED Job type: %d, ID %p, key: %s", j
->type
, (void*)j
->id
, (unsigned char*)j
->key
->ptr
);
9747 de
= dictFind(j
->db
->dict
,j
->key
->ptr
);
9748 redisAssert(de
!= NULL
);
9749 if (j
->type
== REDIS_IOJOB_LOAD
) {
9751 vmpointer
*vp
= dictGetEntryVal(de
);
9753 /* Key loaded, bring it at home */
9754 vmMarkPagesFree(vp
->page
,vp
->usedpages
);
9755 redisLog(REDIS_DEBUG
, "VM: object %s loaded from disk (threaded)",
9756 (unsigned char*) j
->key
->ptr
);
9757 server
.vm_stats_swapped_objects
--;
9758 server
.vm_stats_swapins
++;
9759 dictGetEntryVal(de
) = j
->val
;
9760 incrRefCount(j
->val
);
9762 /* Handle clients waiting for this key to be loaded. */
9763 handleClientsBlockedOnSwappedKey(db
,j
->key
);
9766 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9767 /* Now we know the amount of pages required to swap this object.
9768 * Let's find some space for it, and queue this task again
9769 * rebranded as REDIS_IOJOB_DO_SWAP. */
9770 if (!vmCanSwapOut() ||
9771 vmFindContiguousPages(&j
->page
,j
->pages
) == REDIS_ERR
)
9773 /* Ooops... no space or we can't swap as there is
9774 * a fork()ed Redis trying to save stuff on disk. */
9775 j
->val
->storage
= REDIS_VM_MEMORY
; /* undo operation */
9778 /* Note that we need to mark this pages as used now,
9779 * if the job will be canceled, we'll mark them as freed
9781 vmMarkPagesUsed(j
->page
,j
->pages
);
9782 j
->type
= REDIS_IOJOB_DO_SWAP
;
9787 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9790 /* Key swapped. We can finally free some memory. */
9791 if (j
->val
->storage
!= REDIS_VM_SWAPPING
) {
9792 vmpointer
*vp
= (vmpointer
*) j
->id
;
9793 printf("storage: %d\n",vp
->storage
);
9794 printf("key->name: %s\n",(char*)j
->key
->ptr
);
9795 printf("val: %p\n",(void*)j
->val
);
9796 printf("val->type: %d\n",j
->val
->type
);
9797 printf("val->ptr: %s\n",(char*)j
->val
->ptr
);
9799 redisAssert(j
->val
->storage
== REDIS_VM_SWAPPING
);
9800 vp
= createVmPointer(j
->val
->type
);
9802 vp
->usedpages
= j
->pages
;
9803 dictGetEntryVal(de
) = vp
;
9804 /* Fix the storage otherwise decrRefCount will attempt to
9805 * remove the associated I/O job */
9806 j
->val
->storage
= REDIS_VM_MEMORY
;
9807 decrRefCount(j
->val
);
9808 redisLog(REDIS_DEBUG
,
9809 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
9810 (unsigned char*) j
->key
->ptr
,
9811 (unsigned long long) j
->page
, (unsigned long long) j
->pages
);
9812 server
.vm_stats_swapped_objects
++;
9813 server
.vm_stats_swapouts
++;
9815 /* Put a few more swap requests in queue if we are still
9817 if (trytoswap
&& vmCanSwapOut() &&
9818 zmalloc_used_memory() > server
.vm_max_memory
)
9823 more
= listLength(server
.io_newjobs
) <
9824 (unsigned) server
.vm_max_threads
;
9826 /* Don't waste CPU time if swappable objects are rare. */
9827 if (vmSwapOneObjectThreaded() == REDIS_ERR
) {
9835 if (processed
== toprocess
) return;
9837 if (retval
< 0 && errno
!= EAGAIN
) {
9838 redisLog(REDIS_WARNING
,
9839 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9844 static void lockThreadedIO(void) {
9845 pthread_mutex_lock(&server
.io_mutex
);
9848 static void unlockThreadedIO(void) {
9849 pthread_mutex_unlock(&server
.io_mutex
);
9852 /* Remove the specified object from the threaded I/O queue if still not
9853 * processed, otherwise make sure to flag it as canceled. */
9854 static void vmCancelThreadedIOJob(robj
*o
) {
9856 server
.io_newjobs
, /* 0 */
9857 server
.io_processing
, /* 1 */
9858 server
.io_processed
/* 2 */
9862 assert(o
->storage
== REDIS_VM_LOADING
|| o
->storage
== REDIS_VM_SWAPPING
);
9865 /* Search for a matching object in one of the queues */
9866 for (i
= 0; i
< 3; i
++) {
9870 listRewind(lists
[i
],&li
);
9871 while ((ln
= listNext(&li
)) != NULL
) {
9872 iojob
*job
= ln
->value
;
9874 if (job
->canceled
) continue; /* Skip this, already canceled. */
9876 redisLog(REDIS_DEBUG
,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
9877 (void*)job
, (char*)job
->key
->ptr
, job
->type
, i
);
9878 /* Mark the pages as free since the swap didn't happened
9879 * or happened but is now discarded. */
9880 if (i
!= 1 && job
->type
== REDIS_IOJOB_DO_SWAP
)
9881 vmMarkPagesFree(job
->page
,job
->pages
);
9882 /* Cancel the job. It depends on the list the job is
9885 case 0: /* io_newjobs */
9886 /* If the job was yet not processed the best thing to do
9887 * is to remove it from the queue at all */
9889 listDelNode(lists
[i
],ln
);
9891 case 1: /* io_processing */
9892 /* Oh Shi- the thread is messing with the Job:
9894 * Probably it's accessing the object if this is a
9895 * PREPARE_SWAP or DO_SWAP job.
9896 * If it's a LOAD job it may be reading from disk and
9897 * if we don't wait for the job to terminate before to
9898 * cancel it, maybe in a few microseconds data can be
9899 * corrupted in this pages. So the short story is:
9901 * Better to wait for the job to move into the
9902 * next queue (processed)... */
9904 /* We try again and again until the job is completed. */
9906 /* But let's wait some time for the I/O thread
9907 * to finish with this job. After all this condition
9908 * should be very rare. */
9911 case 2: /* io_processed */
9912 /* The job was already processed, that's easy...
9913 * just mark it as canceled so that we'll ignore it
9914 * when processing completed jobs. */
9918 /* Finally we have to adjust the storage type of the object
9919 * in order to "UNDO" the operaiton. */
9920 if (o
->storage
== REDIS_VM_LOADING
)
9921 o
->storage
= REDIS_VM_SWAPPED
;
9922 else if (o
->storage
== REDIS_VM_SWAPPING
)
9923 o
->storage
= REDIS_VM_MEMORY
;
9925 redisLog(REDIS_DEBUG
,"*** DONE");
9931 printf("Not found: %p\n", (void*)o
);
9932 redisAssert(1 != 1); /* We should never reach this */
9935 static void *IOThreadEntryPoint(void *arg
) {
9940 pthread_detach(pthread_self());
9942 /* Get a new job to process */
9944 if (listLength(server
.io_newjobs
) == 0) {
9945 /* No new jobs in queue, exit. */
9946 redisLog(REDIS_DEBUG
,"Thread %ld exiting, nothing to do",
9947 (long) pthread_self());
9948 server
.io_active_threads
--;
9952 ln
= listFirst(server
.io_newjobs
);
9954 listDelNode(server
.io_newjobs
,ln
);
9955 /* Add the job in the processing queue */
9956 j
->thread
= pthread_self();
9957 listAddNodeTail(server
.io_processing
,j
);
9958 ln
= listLast(server
.io_processing
); /* We use ln later to remove it */
9960 redisLog(REDIS_DEBUG
,"Thread %ld got a new job (type %d): %p about key '%s'",
9961 (long) pthread_self(), j
->type
, (void*)j
, (char*)j
->key
->ptr
);
9963 /* Process the Job */
9964 if (j
->type
== REDIS_IOJOB_LOAD
) {
9965 vmpointer
*vp
= (vmpointer
*)j
->id
;
9966 j
->val
= vmReadObjectFromSwap(j
->page
,vp
->vtype
);
9967 } else if (j
->type
== REDIS_IOJOB_PREPARE_SWAP
) {
9968 FILE *fp
= fopen("/dev/null","w+");
9969 j
->pages
= rdbSavedObjectPages(j
->val
,fp
);
9971 } else if (j
->type
== REDIS_IOJOB_DO_SWAP
) {
9972 if (vmWriteObjectOnSwap(j
->val
,j
->page
) == REDIS_ERR
)
9976 /* Done: insert the job into the processed queue */
9977 redisLog(REDIS_DEBUG
,"Thread %ld completed the job: %p (key %s)",
9978 (long) pthread_self(), (void*)j
, (char*)j
->key
->ptr
);
9980 listDelNode(server
.io_processing
,ln
);
9981 listAddNodeTail(server
.io_processed
,j
);
9984 /* Signal the main thread there is new stuff to process */
9985 assert(write(server
.io_ready_pipe_write
,"x",1) == 1);
9987 return NULL
; /* never reached */
9990 static void spawnIOThread(void) {
9992 sigset_t mask
, omask
;
9996 sigaddset(&mask
,SIGCHLD
);
9997 sigaddset(&mask
,SIGHUP
);
9998 sigaddset(&mask
,SIGPIPE
);
9999 pthread_sigmask(SIG_SETMASK
, &mask
, &omask
);
10000 while ((err
= pthread_create(&thread
,&server
.io_threads_attr
,IOThreadEntryPoint
,NULL
)) != 0) {
10001 redisLog(REDIS_WARNING
,"Unable to spawn an I/O thread: %s",
10005 pthread_sigmask(SIG_SETMASK
, &omask
, NULL
);
10006 server
.io_active_threads
++;
10009 /* We need to wait for the last thread to exit before we are able to
10010 * fork() in order to BGSAVE or BGREWRITEAOF. */
10011 static void waitEmptyIOJobsQueue(void) {
10013 int io_processed_len
;
10016 if (listLength(server
.io_newjobs
) == 0 &&
10017 listLength(server
.io_processing
) == 0 &&
10018 server
.io_active_threads
== 0)
10020 unlockThreadedIO();
10023 /* While waiting for empty jobs queue condition we post-process some
10024 * finshed job, as I/O threads may be hanging trying to write against
10025 * the io_ready_pipe_write FD but there are so much pending jobs that
10026 * it's blocking. */
10027 io_processed_len
= listLength(server
.io_processed
);
10028 unlockThreadedIO();
10029 if (io_processed_len
) {
10030 vmThreadedIOCompletedJob(NULL
,server
.io_ready_pipe_read
,NULL
,0);
10031 usleep(1000); /* 1 millisecond */
10033 usleep(10000); /* 10 milliseconds */
10038 static void vmReopenSwapFile(void) {
10039 /* Note: we don't close the old one as we are in the child process
10040 * and don't want to mess at all with the original file object. */
10041 server
.vm_fp
= fopen(server
.vm_swap_file
,"r+b");
10042 if (server
.vm_fp
== NULL
) {
10043 redisLog(REDIS_WARNING
,"Can't re-open the VM swap file: %s. Exiting.",
10044 server
.vm_swap_file
);
10047 server
.vm_fd
= fileno(server
.vm_fp
);
10050 /* This function must be called while with threaded IO locked */
10051 static void queueIOJob(iojob
*j
) {
10052 redisLog(REDIS_DEBUG
,"Queued IO Job %p type %d about key '%s'\n",
10053 (void*)j
, j
->type
, (char*)j
->key
->ptr
);
10054 listAddNodeTail(server
.io_newjobs
,j
);
10055 if (server
.io_active_threads
< server
.vm_max_threads
)
10059 static int vmSwapObjectThreaded(robj
*key
, robj
*val
, redisDb
*db
) {
10062 j
= zmalloc(sizeof(*j
));
10063 j
->type
= REDIS_IOJOB_PREPARE_SWAP
;
10067 j
->id
= j
->val
= val
;
10070 j
->thread
= (pthread_t
) -1;
10071 val
->storage
= REDIS_VM_SWAPPING
;
10075 unlockThreadedIO();
10079 /* ============ Virtual Memory - Blocking clients on missing keys =========== */
10081 /* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
10082 * If there is not already a job loading the key, it is craeted.
10083 * The key is added to the io_keys list in the client structure, and also
10084 * in the hash table mapping swapped keys to waiting clients, that is,
10085 * server.io_waited_keys. */
10086 static int waitForSwappedKey(redisClient
*c
, robj
*key
) {
10087 struct dictEntry
*de
;
10091 /* If the key does not exist or is already in RAM we don't need to
10092 * block the client at all. */
10093 de
= dictFind(c
->db
->dict
,key
->ptr
);
10094 if (de
== NULL
) return 0;
10095 o
= dictGetEntryVal(de
);
10096 if (o
->storage
== REDIS_VM_MEMORY
) {
10098 } else if (o
->storage
== REDIS_VM_SWAPPING
) {
10099 /* We were swapping the key, undo it! */
10100 vmCancelThreadedIOJob(o
);
10104 /* OK: the key is either swapped, or being loaded just now. */
10106 /* Add the key to the list of keys this client is waiting for.
10107 * This maps clients to keys they are waiting for. */
10108 listAddNodeTail(c
->io_keys
,key
);
10111 /* Add the client to the swapped keys => clients waiting map. */
10112 de
= dictFind(c
->db
->io_keys
,key
);
10116 /* For every key we take a list of clients blocked for it */
10118 retval
= dictAdd(c
->db
->io_keys
,key
,l
);
10120 assert(retval
== DICT_OK
);
10122 l
= dictGetEntryVal(de
);
10124 listAddNodeTail(l
,c
);
10126 /* Are we already loading the key from disk? If not create a job */
10127 if (o
->storage
== REDIS_VM_SWAPPED
) {
10129 vmpointer
*vp
= (vmpointer
*)o
;
10131 o
->storage
= REDIS_VM_LOADING
;
10132 j
= zmalloc(sizeof(*j
));
10133 j
->type
= REDIS_IOJOB_LOAD
;
10138 j
->page
= vp
->page
;
10141 j
->thread
= (pthread_t
) -1;
10144 unlockThreadedIO();
10149 /* Preload keys for any command with first, last and step values for
10150 * the command keys prototype, as defined in the command table. */
10151 static void waitForMultipleSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10153 if (cmd
->vm_firstkey
== 0) return;
10154 last
= cmd
->vm_lastkey
;
10155 if (last
< 0) last
= argc
+last
;
10156 for (j
= cmd
->vm_firstkey
; j
<= last
; j
+= cmd
->vm_keystep
) {
10157 redisAssert(j
< argc
);
10158 waitForSwappedKey(c
,argv
[j
]);
10162 /* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
10163 * Note that the number of keys to preload is user-defined, so we need to
10164 * apply a sanity check against argc. */
10165 static void zunionInterBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10167 REDIS_NOTUSED(cmd
);
10169 num
= atoi(argv
[2]->ptr
);
10170 if (num
> (argc
-3)) return;
10171 for (i
= 0; i
< num
; i
++) {
10172 waitForSwappedKey(c
,argv
[3+i
]);
10176 /* Preload keys needed to execute the entire MULTI/EXEC block.
10178 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
10179 * and will block the client when any command requires a swapped out value. */
10180 static void execBlockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
, int argc
, robj
**argv
) {
10182 struct redisCommand
*mcmd
;
10184 REDIS_NOTUSED(cmd
);
10185 REDIS_NOTUSED(argc
);
10186 REDIS_NOTUSED(argv
);
10188 if (!(c
->flags
& REDIS_MULTI
)) return;
10189 for (i
= 0; i
< c
->mstate
.count
; i
++) {
10190 mcmd
= c
->mstate
.commands
[i
].cmd
;
10191 margc
= c
->mstate
.commands
[i
].argc
;
10192 margv
= c
->mstate
.commands
[i
].argv
;
10194 if (mcmd
->vm_preload_proc
!= NULL
) {
10195 mcmd
->vm_preload_proc(c
,mcmd
,margc
,margv
);
10197 waitForMultipleSwappedKeys(c
,mcmd
,margc
,margv
);
10202 /* Is this client attempting to run a command against swapped keys?
10203 * If so, block it ASAP, load the keys in background, then resume it.
10205 * The important idea about this function is that it can fail! If keys will
10206 * still be swapped when the client is resumed, this key lookups will
10207 * just block loading keys from disk. In practical terms this should only
10208 * happen with SORT BY command or if there is a bug in this function.
10210 * Return 1 if the client is marked as blocked, 0 if the client can
10211 * continue as the keys it is going to access appear to be in memory. */
10212 static int blockClientOnSwappedKeys(redisClient
*c
, struct redisCommand
*cmd
) {
10213 if (cmd
->vm_preload_proc
!= NULL
) {
10214 cmd
->vm_preload_proc(c
,cmd
,c
->argc
,c
->argv
);
10216 waitForMultipleSwappedKeys(c
,cmd
,c
->argc
,c
->argv
);
10219 /* If the client was blocked for at least one key, mark it as blocked. */
10220 if (listLength(c
->io_keys
)) {
10221 c
->flags
|= REDIS_IO_WAIT
;
10222 aeDeleteFileEvent(server
.el
,c
->fd
,AE_READABLE
);
10223 server
.vm_blocked_clients
++;
10230 /* Remove the 'key' from the list of blocked keys for a given client.
10232 * The function returns 1 when there are no longer blocking keys after
10233 * the current one was removed (and the client can be unblocked). */
10234 static int dontWaitForSwappedKey(redisClient
*c
, robj
*key
) {
10238 struct dictEntry
*de
;
10240 /* Remove the key from the list of keys this client is waiting for. */
10241 listRewind(c
->io_keys
,&li
);
10242 while ((ln
= listNext(&li
)) != NULL
) {
10243 if (equalStringObjects(ln
->value
,key
)) {
10244 listDelNode(c
->io_keys
,ln
);
10248 assert(ln
!= NULL
);
10250 /* Remove the client form the key => waiting clients map. */
10251 de
= dictFind(c
->db
->io_keys
,key
);
10252 assert(de
!= NULL
);
10253 l
= dictGetEntryVal(de
);
10254 ln
= listSearchKey(l
,c
);
10255 assert(ln
!= NULL
);
10257 if (listLength(l
) == 0)
10258 dictDelete(c
->db
->io_keys
,key
);
10260 return listLength(c
->io_keys
) == 0;
10263 /* Every time we now a key was loaded back in memory, we handle clients
10264 * waiting for this key if any. */
10265 static void handleClientsBlockedOnSwappedKey(redisDb
*db
, robj
*key
) {
10266 struct dictEntry
*de
;
10271 de
= dictFind(db
->io_keys
,key
);
10274 l
= dictGetEntryVal(de
);
10275 len
= listLength(l
);
10276 /* Note: we can't use something like while(listLength(l)) as the list
10277 * can be freed by the calling function when we remove the last element. */
10280 redisClient
*c
= ln
->value
;
10282 if (dontWaitForSwappedKey(c
,key
)) {
10283 /* Put the client in the list of clients ready to go as we
10284 * loaded all the keys about it. */
10285 listAddNodeTail(server
.io_ready_clients
,c
);
10290 /* =========================== Remote Configuration ========================= */
10292 static void configSetCommand(redisClient
*c
) {
10293 robj
*o
= getDecodedObject(c
->argv
[3]);
10296 if (!strcasecmp(c
->argv
[2]->ptr
,"dbfilename")) {
10297 zfree(server
.dbfilename
);
10298 server
.dbfilename
= zstrdup(o
->ptr
);
10299 } else if (!strcasecmp(c
->argv
[2]->ptr
,"requirepass")) {
10300 zfree(server
.requirepass
);
10301 server
.requirepass
= zstrdup(o
->ptr
);
10302 } else if (!strcasecmp(c
->argv
[2]->ptr
,"masterauth")) {
10303 zfree(server
.masterauth
);
10304 server
.masterauth
= zstrdup(o
->ptr
);
10305 } else if (!strcasecmp(c
->argv
[2]->ptr
,"maxmemory")) {
10306 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10307 ll
< 0) goto badfmt
;
10308 server
.maxmemory
= ll
;
10309 } else if (!strcasecmp(c
->argv
[2]->ptr
,"timeout")) {
10310 if (getLongLongFromObject(o
,&ll
) == REDIS_ERR
||
10311 ll
< 0 || ll
> LONG_MAX
) goto badfmt
;
10312 server
.maxidletime
= ll
;
10313 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendfsync")) {
10314 if (!strcasecmp(o
->ptr
,"no")) {
10315 server
.appendfsync
= APPENDFSYNC_NO
;
10316 } else if (!strcasecmp(o
->ptr
,"everysec")) {
10317 server
.appendfsync
= APPENDFSYNC_EVERYSEC
;
10318 } else if (!strcasecmp(o
->ptr
,"always")) {
10319 server
.appendfsync
= APPENDFSYNC_ALWAYS
;
10323 } else if (!strcasecmp(c
->argv
[2]->ptr
,"no-appendfsync-on-rewrite")) {
10324 int yn
= yesnotoi(o
->ptr
);
10326 if (yn
== -1) goto badfmt
;
10327 server
.no_appendfsync_on_rewrite
= yn
;
10328 } else if (!strcasecmp(c
->argv
[2]->ptr
,"appendonly")) {
10329 int old
= server
.appendonly
;
10330 int new = yesnotoi(o
->ptr
);
10332 if (new == -1) goto badfmt
;
10337 if (startAppendOnly() == REDIS_ERR
) {
10338 addReplySds(c
,sdscatprintf(sdsempty(),
10339 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10345 } else if (!strcasecmp(c
->argv
[2]->ptr
,"save")) {
10347 sds
*v
= sdssplitlen(o
->ptr
,sdslen(o
->ptr
)," ",1,&vlen
);
10349 /* Perform sanity check before setting the new config:
10350 * - Even number of args
10351 * - Seconds >= 1, changes >= 0 */
10353 sdsfreesplitres(v
,vlen
);
10356 for (j
= 0; j
< vlen
; j
++) {
10360 val
= strtoll(v
[j
], &eptr
, 10);
10361 if (eptr
[0] != '\0' ||
10362 ((j
& 1) == 0 && val
< 1) ||
10363 ((j
& 1) == 1 && val
< 0)) {
10364 sdsfreesplitres(v
,vlen
);
10368 /* Finally set the new config */
10369 resetServerSaveParams();
10370 for (j
= 0; j
< vlen
; j
+= 2) {
10374 seconds
= strtoll(v
[j
],NULL
,10);
10375 changes
= strtoll(v
[j
+1],NULL
,10);
10376 appendServerSaveParams(seconds
, changes
);
10378 sdsfreesplitres(v
,vlen
);
10380 addReplySds(c
,sdscatprintf(sdsempty(),
10381 "-ERR not supported CONFIG parameter %s\r\n",
10382 (char*)c
->argv
[2]->ptr
));
10387 addReply(c
,shared
.ok
);
10390 badfmt
: /* Bad format errors */
10391 addReplySds(c
,sdscatprintf(sdsempty(),
10392 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10394 (char*)c
->argv
[2]->ptr
));
10398 static void configGetCommand(redisClient
*c
) {
10399 robj
*o
= getDecodedObject(c
->argv
[2]);
10400 robj
*lenobj
= createObject(REDIS_STRING
,NULL
);
10401 char *pattern
= o
->ptr
;
10404 addReply(c
,lenobj
);
10405 decrRefCount(lenobj
);
10407 if (stringmatch(pattern
,"dbfilename",0)) {
10408 addReplyBulkCString(c
,"dbfilename");
10409 addReplyBulkCString(c
,server
.dbfilename
);
10412 if (stringmatch(pattern
,"requirepass",0)) {
10413 addReplyBulkCString(c
,"requirepass");
10414 addReplyBulkCString(c
,server
.requirepass
);
10417 if (stringmatch(pattern
,"masterauth",0)) {
10418 addReplyBulkCString(c
,"masterauth");
10419 addReplyBulkCString(c
,server
.masterauth
);
10422 if (stringmatch(pattern
,"maxmemory",0)) {
10425 ll2string(buf
,128,server
.maxmemory
);
10426 addReplyBulkCString(c
,"maxmemory");
10427 addReplyBulkCString(c
,buf
);
10430 if (stringmatch(pattern
,"timeout",0)) {
10433 ll2string(buf
,128,server
.maxidletime
);
10434 addReplyBulkCString(c
,"timeout");
10435 addReplyBulkCString(c
,buf
);
10438 if (stringmatch(pattern
,"appendonly",0)) {
10439 addReplyBulkCString(c
,"appendonly");
10440 addReplyBulkCString(c
,server
.appendonly
? "yes" : "no");
10443 if (stringmatch(pattern
,"no-appendfsync-on-rewrite",0)) {
10444 addReplyBulkCString(c
,"no-appendfsync-on-rewrite");
10445 addReplyBulkCString(c
,server
.no_appendfsync_on_rewrite
? "yes" : "no");
10448 if (stringmatch(pattern
,"appendfsync",0)) {
10451 switch(server
.appendfsync
) {
10452 case APPENDFSYNC_NO
: policy
= "no"; break;
10453 case APPENDFSYNC_EVERYSEC
: policy
= "everysec"; break;
10454 case APPENDFSYNC_ALWAYS
: policy
= "always"; break;
10455 default: policy
= "unknown"; break; /* too harmless to panic */
10457 addReplyBulkCString(c
,"appendfsync");
10458 addReplyBulkCString(c
,policy
);
10461 if (stringmatch(pattern
,"save",0)) {
10462 sds buf
= sdsempty();
10465 for (j
= 0; j
< server
.saveparamslen
; j
++) {
10466 buf
= sdscatprintf(buf
,"%ld %d",
10467 server
.saveparams
[j
].seconds
,
10468 server
.saveparams
[j
].changes
);
10469 if (j
!= server
.saveparamslen
-1)
10470 buf
= sdscatlen(buf
," ",1);
10472 addReplyBulkCString(c
,"save");
10473 addReplyBulkCString(c
,buf
);
10478 lenobj
->ptr
= sdscatprintf(sdsempty(),"*%d\r\n",matches
*2);
10481 static void configCommand(redisClient
*c
) {
10482 if (!strcasecmp(c
->argv
[1]->ptr
,"set")) {
10483 if (c
->argc
!= 4) goto badarity
;
10484 configSetCommand(c
);
10485 } else if (!strcasecmp(c
->argv
[1]->ptr
,"get")) {
10486 if (c
->argc
!= 3) goto badarity
;
10487 configGetCommand(c
);
10488 } else if (!strcasecmp(c
->argv
[1]->ptr
,"resetstat")) {
10489 if (c
->argc
!= 2) goto badarity
;
10490 server
.stat_numcommands
= 0;
10491 server
.stat_numconnections
= 0;
10492 server
.stat_expiredkeys
= 0;
10493 server
.stat_starttime
= time(NULL
);
10494 addReply(c
,shared
.ok
);
10496 addReplySds(c
,sdscatprintf(sdsempty(),
10497 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10502 addReplySds(c
,sdscatprintf(sdsempty(),
10503 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10504 (char*) c
->argv
[1]->ptr
));
10507 /* =========================== Pubsub implementation ======================== */
10509 static void freePubsubPattern(void *p
) {
10510 pubsubPattern
*pat
= p
;
10512 decrRefCount(pat
->pattern
);
10516 static int listMatchPubsubPattern(void *a
, void *b
) {
10517 pubsubPattern
*pa
= a
, *pb
= b
;
10519 return (pa
->client
== pb
->client
) &&
10520 (equalStringObjects(pa
->pattern
,pb
->pattern
));
10523 /* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10524 * 0 if the client was already subscribed to that channel. */
10525 static int pubsubSubscribeChannel(redisClient
*c
, robj
*channel
) {
10526 struct dictEntry
*de
;
10527 list
*clients
= NULL
;
10530 /* Add the channel to the client -> channels hash table */
10531 if (dictAdd(c
->pubsub_channels
,channel
,NULL
) == DICT_OK
) {
10533 incrRefCount(channel
);
10534 /* Add the client to the channel -> list of clients hash table */
10535 de
= dictFind(server
.pubsub_channels
,channel
);
10537 clients
= listCreate();
10538 dictAdd(server
.pubsub_channels
,channel
,clients
);
10539 incrRefCount(channel
);
10541 clients
= dictGetEntryVal(de
);
10543 listAddNodeTail(clients
,c
);
10545 /* Notify the client */
10546 addReply(c
,shared
.mbulk3
);
10547 addReply(c
,shared
.subscribebulk
);
10548 addReplyBulk(c
,channel
);
10549 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10553 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10554 * 0 if the client was not subscribed to the specified channel. */
10555 static int pubsubUnsubscribeChannel(redisClient
*c
, robj
*channel
, int notify
) {
10556 struct dictEntry
*de
;
10561 /* Remove the channel from the client -> channels hash table */
10562 incrRefCount(channel
); /* channel may be just a pointer to the same object
10563 we have in the hash tables. Protect it... */
10564 if (dictDelete(c
->pubsub_channels
,channel
) == DICT_OK
) {
10566 /* Remove the client from the channel -> clients list hash table */
10567 de
= dictFind(server
.pubsub_channels
,channel
);
10568 assert(de
!= NULL
);
10569 clients
= dictGetEntryVal(de
);
10570 ln
= listSearchKey(clients
,c
);
10571 assert(ln
!= NULL
);
10572 listDelNode(clients
,ln
);
10573 if (listLength(clients
) == 0) {
10574 /* Free the list and associated hash entry at all if this was
10575 * the latest client, so that it will be possible to abuse
10576 * Redis PUBSUB creating millions of channels. */
10577 dictDelete(server
.pubsub_channels
,channel
);
10580 /* Notify the client */
10582 addReply(c
,shared
.mbulk3
);
10583 addReply(c
,shared
.unsubscribebulk
);
10584 addReplyBulk(c
,channel
);
10585 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10586 listLength(c
->pubsub_patterns
));
10589 decrRefCount(channel
); /* it is finally safe to release it */
10593 /* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10594 static int pubsubSubscribePattern(redisClient
*c
, robj
*pattern
) {
10597 if (listSearchKey(c
->pubsub_patterns
,pattern
) == NULL
) {
10599 pubsubPattern
*pat
;
10600 listAddNodeTail(c
->pubsub_patterns
,pattern
);
10601 incrRefCount(pattern
);
10602 pat
= zmalloc(sizeof(*pat
));
10603 pat
->pattern
= getDecodedObject(pattern
);
10605 listAddNodeTail(server
.pubsub_patterns
,pat
);
10607 /* Notify the client */
10608 addReply(c
,shared
.mbulk3
);
10609 addReply(c
,shared
.psubscribebulk
);
10610 addReplyBulk(c
,pattern
);
10611 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+listLength(c
->pubsub_patterns
));
10615 /* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10616 * 0 if the client was not subscribed to the specified channel. */
10617 static int pubsubUnsubscribePattern(redisClient
*c
, robj
*pattern
, int notify
) {
10622 incrRefCount(pattern
); /* Protect the object. May be the same we remove */
10623 if ((ln
= listSearchKey(c
->pubsub_patterns
,pattern
)) != NULL
) {
10625 listDelNode(c
->pubsub_patterns
,ln
);
10627 pat
.pattern
= pattern
;
10628 ln
= listSearchKey(server
.pubsub_patterns
,&pat
);
10629 listDelNode(server
.pubsub_patterns
,ln
);
10631 /* Notify the client */
10633 addReply(c
,shared
.mbulk3
);
10634 addReply(c
,shared
.punsubscribebulk
);
10635 addReplyBulk(c
,pattern
);
10636 addReplyLongLong(c
,dictSize(c
->pubsub_channels
)+
10637 listLength(c
->pubsub_patterns
));
10639 decrRefCount(pattern
);
10643 /* Unsubscribe from all the channels. Return the number of channels the
10644 * client was subscribed from. */
10645 static int pubsubUnsubscribeAllChannels(redisClient
*c
, int notify
) {
10646 dictIterator
*di
= dictGetIterator(c
->pubsub_channels
);
10650 while((de
= dictNext(di
)) != NULL
) {
10651 robj
*channel
= dictGetEntryKey(de
);
10653 count
+= pubsubUnsubscribeChannel(c
,channel
,notify
);
10655 dictReleaseIterator(di
);
10659 /* Unsubscribe from all the patterns. Return the number of patterns the
10660 * client was subscribed from. */
10661 static int pubsubUnsubscribeAllPatterns(redisClient
*c
, int notify
) {
10666 listRewind(c
->pubsub_patterns
,&li
);
10667 while ((ln
= listNext(&li
)) != NULL
) {
10668 robj
*pattern
= ln
->value
;
10670 count
+= pubsubUnsubscribePattern(c
,pattern
,notify
);
10675 /* Publish a message */
10676 static int pubsubPublishMessage(robj
*channel
, robj
*message
) {
10678 struct dictEntry
*de
;
10682 /* Send to clients listening for that channel */
10683 de
= dictFind(server
.pubsub_channels
,channel
);
10685 list
*list
= dictGetEntryVal(de
);
10689 listRewind(list
,&li
);
10690 while ((ln
= listNext(&li
)) != NULL
) {
10691 redisClient
*c
= ln
->value
;
10693 addReply(c
,shared
.mbulk3
);
10694 addReply(c
,shared
.messagebulk
);
10695 addReplyBulk(c
,channel
);
10696 addReplyBulk(c
,message
);
10700 /* Send to clients listening to matching channels */
10701 if (listLength(server
.pubsub_patterns
)) {
10702 listRewind(server
.pubsub_patterns
,&li
);
10703 channel
= getDecodedObject(channel
);
10704 while ((ln
= listNext(&li
)) != NULL
) {
10705 pubsubPattern
*pat
= ln
->value
;
10707 if (stringmatchlen((char*)pat
->pattern
->ptr
,
10708 sdslen(pat
->pattern
->ptr
),
10709 (char*)channel
->ptr
,
10710 sdslen(channel
->ptr
),0)) {
10711 addReply(pat
->client
,shared
.mbulk4
);
10712 addReply(pat
->client
,shared
.pmessagebulk
);
10713 addReplyBulk(pat
->client
,pat
->pattern
);
10714 addReplyBulk(pat
->client
,channel
);
10715 addReplyBulk(pat
->client
,message
);
10719 decrRefCount(channel
);
10724 static void subscribeCommand(redisClient
*c
) {
10727 for (j
= 1; j
< c
->argc
; j
++)
10728 pubsubSubscribeChannel(c
,c
->argv
[j
]);
10731 static void unsubscribeCommand(redisClient
*c
) {
10732 if (c
->argc
== 1) {
10733 pubsubUnsubscribeAllChannels(c
,1);
10738 for (j
= 1; j
< c
->argc
; j
++)
10739 pubsubUnsubscribeChannel(c
,c
->argv
[j
],1);
10743 static void psubscribeCommand(redisClient
*c
) {
10746 for (j
= 1; j
< c
->argc
; j
++)
10747 pubsubSubscribePattern(c
,c
->argv
[j
]);
10750 static void punsubscribeCommand(redisClient
*c
) {
10751 if (c
->argc
== 1) {
10752 pubsubUnsubscribeAllPatterns(c
,1);
10757 for (j
= 1; j
< c
->argc
; j
++)
10758 pubsubUnsubscribePattern(c
,c
->argv
[j
],1);
10762 static void publishCommand(redisClient
*c
) {
10763 int receivers
= pubsubPublishMessage(c
->argv
[1],c
->argv
[2]);
10764 addReplyLongLong(c
,receivers
);
10767 /* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10769 * The implementation uses a per-DB hash table mapping keys to list of clients
10770 * WATCHing those keys, so that given a key that is going to be modified
10771 * we can mark all the associated clients as dirty.
10773 * Also every client contains a list of WATCHed keys so that's possible to
10774 * un-watch such keys when the client is freed or when UNWATCH is called. */
10776 /* In the client->watched_keys list we need to use watchedKey structures
10777 * as in order to identify a key in Redis we need both the key name and the
10779 typedef struct watchedKey
{
10784 /* Watch for the specified key */
10785 static void watchForKey(redisClient
*c
, robj
*key
) {
10786 list
*clients
= NULL
;
10791 /* Check if we are already watching for this key */
10792 listRewind(c
->watched_keys
,&li
);
10793 while((ln
= listNext(&li
))) {
10794 wk
= listNodeValue(ln
);
10795 if (wk
->db
== c
->db
&& equalStringObjects(key
,wk
->key
))
10796 return; /* Key already watched */
10798 /* This key is not already watched in this DB. Let's add it */
10799 clients
= dictFetchValue(c
->db
->watched_keys
,key
);
10801 clients
= listCreate();
10802 dictAdd(c
->db
->watched_keys
,key
,clients
);
10805 listAddNodeTail(clients
,c
);
10806 /* Add the new key to the lits of keys watched by this client */
10807 wk
= zmalloc(sizeof(*wk
));
10811 listAddNodeTail(c
->watched_keys
,wk
);
10814 /* Unwatch all the keys watched by this client. To clean the EXEC dirty
10815 * flag is up to the caller. */
10816 static void unwatchAllKeys(redisClient
*c
) {
10820 if (listLength(c
->watched_keys
) == 0) return;
10821 listRewind(c
->watched_keys
,&li
);
10822 while((ln
= listNext(&li
))) {
10826 /* Lookup the watched key -> clients list and remove the client
10828 wk
= listNodeValue(ln
);
10829 clients
= dictFetchValue(wk
->db
->watched_keys
, wk
->key
);
10830 assert(clients
!= NULL
);
10831 listDelNode(clients
,listSearchKey(clients
,c
));
10832 /* Kill the entry at all if this was the only client */
10833 if (listLength(clients
) == 0)
10834 dictDelete(wk
->db
->watched_keys
, wk
->key
);
10835 /* Remove this watched key from the client->watched list */
10836 listDelNode(c
->watched_keys
,ln
);
10837 decrRefCount(wk
->key
);
10842 /* "Touch" a key, so that if this key is being WATCHed by some client the
10843 * next EXEC will fail. */
10844 static void touchWatchedKey(redisDb
*db
, robj
*key
) {
10849 if (dictSize(db
->watched_keys
) == 0) return;
10850 clients
= dictFetchValue(db
->watched_keys
, key
);
10851 if (!clients
) return;
10853 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10854 /* Check if we are already watching for this key */
10855 listRewind(clients
,&li
);
10856 while((ln
= listNext(&li
))) {
10857 redisClient
*c
= listNodeValue(ln
);
10859 c
->flags
|= REDIS_DIRTY_CAS
;
10863 /* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10864 * flush but will be deleted as effect of the flushing operation should
10865 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10866 * a FLUSHALL operation (all the DBs flushed). */
10867 static void touchWatchedKeysOnFlush(int dbid
) {
10871 /* For every client, check all the waited keys */
10872 listRewind(server
.clients
,&li1
);
10873 while((ln
= listNext(&li1
))) {
10874 redisClient
*c
= listNodeValue(ln
);
10875 listRewind(c
->watched_keys
,&li2
);
10876 while((ln
= listNext(&li2
))) {
10877 watchedKey
*wk
= listNodeValue(ln
);
10879 /* For every watched key matching the specified DB, if the
10880 * key exists, mark the client as dirty, as the key will be
10882 if (dbid
== -1 || wk
->db
->id
== dbid
) {
10883 if (dictFind(wk
->db
->dict
, wk
->key
->ptr
) != NULL
)
10884 c
->flags
|= REDIS_DIRTY_CAS
;
10890 static void watchCommand(redisClient
*c
) {
10893 if (c
->flags
& REDIS_MULTI
) {
10894 addReplySds(c
,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10897 for (j
= 1; j
< c
->argc
; j
++)
10898 watchForKey(c
,c
->argv
[j
]);
10899 addReply(c
,shared
.ok
);
10902 static void unwatchCommand(redisClient
*c
) {
10904 c
->flags
&= (~REDIS_DIRTY_CAS
);
10905 addReply(c
,shared
.ok
);
10908 /* ================================= Debugging ============================== */
10910 /* Compute the sha1 of string at 's' with 'len' bytes long.
10911 * The SHA1 is then xored againt the string pointed by digest.
10912 * Since xor is commutative, this operation is used in order to
10913 * "add" digests relative to unordered elements.
10915 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10916 static void xorDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10918 unsigned char hash
[20], *s
= ptr
;
10922 SHA1Update(&ctx
,s
,len
);
10923 SHA1Final(hash
,&ctx
);
10925 for (j
= 0; j
< 20; j
++)
10926 digest
[j
] ^= hash
[j
];
10929 static void xorObjectDigest(unsigned char *digest
, robj
*o
) {
10930 o
= getDecodedObject(o
);
10931 xorDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10935 /* This function instead of just computing the SHA1 and xoring it
10936 * against diget, also perform the digest of "digest" itself and
10937 * replace the old value with the new one.
10939 * So the final digest will be:
10941 * digest = SHA1(digest xor SHA1(data))
10943 * This function is used every time we want to preserve the order so
10944 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10946 * Also note that mixdigest("foo") followed by mixdigest("bar")
10947 * will lead to a different digest compared to "fo", "obar".
10949 static void mixDigest(unsigned char *digest
, void *ptr
, size_t len
) {
10953 xorDigest(digest
,s
,len
);
10955 SHA1Update(&ctx
,digest
,20);
10956 SHA1Final(digest
,&ctx
);
10959 static void mixObjectDigest(unsigned char *digest
, robj
*o
) {
10960 o
= getDecodedObject(o
);
10961 mixDigest(digest
,o
->ptr
,sdslen(o
->ptr
));
10965 /* Compute the dataset digest. Since keys, sets elements, hashes elements
10966 * are not ordered, we use a trick: every aggregate digest is the xor
10967 * of the digests of their elements. This way the order will not change
10968 * the result. For list instead we use a feedback entering the output digest
10969 * as input in order to ensure that a different ordered list will result in
10970 * a different digest. */
10971 static void computeDatasetDigest(unsigned char *final
) {
10972 unsigned char digest
[20];
10974 dictIterator
*di
= NULL
;
10979 memset(final
,0,20); /* Start with a clean result */
10981 for (j
= 0; j
< server
.dbnum
; j
++) {
10982 redisDb
*db
= server
.db
+j
;
10984 if (dictSize(db
->dict
) == 0) continue;
10985 di
= dictGetIterator(db
->dict
);
10987 /* hash the DB id, so the same dataset moved in a different
10988 * DB will lead to a different digest */
10990 mixDigest(final
,&aux
,sizeof(aux
));
10992 /* Iterate this DB writing every entry */
10993 while((de
= dictNext(di
)) != NULL
) {
10998 memset(digest
,0,20); /* This key-val digest */
10999 key
= dictGetEntryKey(de
);
11000 keyobj
= createStringObject(key
,sdslen(key
));
11002 mixDigest(digest
,key
,sdslen(key
));
11004 /* Make sure the key is loaded if VM is active */
11005 o
= lookupKeyRead(db
,keyobj
);
11007 aux
= htonl(o
->type
);
11008 mixDigest(digest
,&aux
,sizeof(aux
));
11009 expiretime
= getExpire(db
,keyobj
);
11011 /* Save the key and associated value */
11012 if (o
->type
== REDIS_STRING
) {
11013 mixObjectDigest(digest
,o
);
11014 } else if (o
->type
== REDIS_LIST
) {
11015 lIterator
*li
= lInitIterator(o
,0,REDIS_TAIL
);
11017 while(lNext(li
,&entry
)) {
11018 robj
*eleobj
= lGet(&entry
);
11019 mixObjectDigest(digest
,eleobj
);
11020 decrRefCount(eleobj
);
11022 lReleaseIterator(li
);
11023 } else if (o
->type
== REDIS_SET
) {
11024 dict
*set
= o
->ptr
;
11025 dictIterator
*di
= dictGetIterator(set
);
11028 while((de
= dictNext(di
)) != NULL
) {
11029 robj
*eleobj
= dictGetEntryKey(de
);
11031 xorObjectDigest(digest
,eleobj
);
11033 dictReleaseIterator(di
);
11034 } else if (o
->type
== REDIS_ZSET
) {
11036 dictIterator
*di
= dictGetIterator(zs
->dict
);
11039 while((de
= dictNext(di
)) != NULL
) {
11040 robj
*eleobj
= dictGetEntryKey(de
);
11041 double *score
= dictGetEntryVal(de
);
11042 unsigned char eledigest
[20];
11044 snprintf(buf
,sizeof(buf
),"%.17g",*score
);
11045 memset(eledigest
,0,20);
11046 mixObjectDigest(eledigest
,eleobj
);
11047 mixDigest(eledigest
,buf
,strlen(buf
));
11048 xorDigest(digest
,eledigest
,20);
11050 dictReleaseIterator(di
);
11051 } else if (o
->type
== REDIS_HASH
) {
11055 hi
= hashInitIterator(o
);
11056 while (hashNext(hi
) != REDIS_ERR
) {
11057 unsigned char eledigest
[20];
11059 memset(eledigest
,0,20);
11060 obj
= hashCurrent(hi
,REDIS_HASH_KEY
);
11061 mixObjectDigest(eledigest
,obj
);
11063 obj
= hashCurrent(hi
,REDIS_HASH_VALUE
);
11064 mixObjectDigest(eledigest
,obj
);
11066 xorDigest(digest
,eledigest
,20);
11068 hashReleaseIterator(hi
);
11070 redisPanic("Unknown object type");
11072 /* If the key has an expire, add it to the mix */
11073 if (expiretime
!= -1) xorDigest(digest
,"!!expire!!",10);
11074 /* We can finally xor the key-val digest to the final digest */
11075 xorDigest(final
,digest
,20);
11076 decrRefCount(keyobj
);
11078 dictReleaseIterator(di
);
11082 static void debugCommand(redisClient
*c
) {
11083 if (!strcasecmp(c
->argv
[1]->ptr
,"segfault")) {
11084 *((char*)-1) = 'x';
11085 } else if (!strcasecmp(c
->argv
[1]->ptr
,"reload")) {
11086 if (rdbSave(server
.dbfilename
) != REDIS_OK
) {
11087 addReply(c
,shared
.err
);
11091 if (rdbLoad(server
.dbfilename
) != REDIS_OK
) {
11092 addReply(c
,shared
.err
);
11095 redisLog(REDIS_WARNING
,"DB reloaded by DEBUG RELOAD");
11096 addReply(c
,shared
.ok
);
11097 } else if (!strcasecmp(c
->argv
[1]->ptr
,"loadaof")) {
11099 if (loadAppendOnlyFile(server
.appendfilename
) != REDIS_OK
) {
11100 addReply(c
,shared
.err
);
11103 redisLog(REDIS_WARNING
,"Append Only File loaded by DEBUG LOADAOF");
11104 addReply(c
,shared
.ok
);
11105 } else if (!strcasecmp(c
->argv
[1]->ptr
,"object") && c
->argc
== 3) {
11106 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11110 addReply(c
,shared
.nokeyerr
);
11113 val
= dictGetEntryVal(de
);
11114 if (!server
.vm_enabled
|| (val
->storage
== REDIS_VM_MEMORY
||
11115 val
->storage
== REDIS_VM_SWAPPING
)) {
11119 if (val
->encoding
< (sizeof(strencoding
)/sizeof(char*))) {
11120 strenc
= strencoding
[val
->encoding
];
11122 snprintf(buf
,64,"unknown encoding %d\n", val
->encoding
);
11125 addReplySds(c
,sdscatprintf(sdsempty(),
11126 "+Value at:%p refcount:%d "
11127 "encoding:%s serializedlength:%lld\r\n",
11128 (void*)val
, val
->refcount
,
11129 strenc
, (long long) rdbSavedObjectLen(val
,NULL
)));
11131 vmpointer
*vp
= (vmpointer
*) val
;
11132 addReplySds(c
,sdscatprintf(sdsempty(),
11133 "+Value swapped at: page %llu "
11134 "using %llu pages\r\n",
11135 (unsigned long long) vp
->page
,
11136 (unsigned long long) vp
->usedpages
));
11138 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapin") && c
->argc
== 3) {
11139 lookupKeyRead(c
->db
,c
->argv
[2]);
11140 addReply(c
,shared
.ok
);
11141 } else if (!strcasecmp(c
->argv
[1]->ptr
,"swapout") && c
->argc
== 3) {
11142 dictEntry
*de
= dictFind(c
->db
->dict
,c
->argv
[2]->ptr
);
11146 if (!server
.vm_enabled
) {
11147 addReplySds(c
,sdsnew("-ERR Virtual Memory is disabled\r\n"));
11151 addReply(c
,shared
.nokeyerr
);
11154 val
= dictGetEntryVal(de
);
11156 if (val
->storage
!= REDIS_VM_MEMORY
) {
11157 addReplySds(c
,sdsnew("-ERR This key is not in memory\r\n"));
11158 } else if (val
->refcount
!= 1) {
11159 addReplySds(c
,sdsnew("-ERR Object is shared\r\n"));
11160 } else if ((vp
= vmSwapObjectBlocking(val
)) != NULL
) {
11161 dictGetEntryVal(de
) = vp
;
11162 addReply(c
,shared
.ok
);
11164 addReply(c
,shared
.err
);
11166 } else if (!strcasecmp(c
->argv
[1]->ptr
,"populate") && c
->argc
== 3) {
11171 if (getLongFromObjectOrReply(c
, c
->argv
[2], &keys
, NULL
) != REDIS_OK
)
11173 for (j
= 0; j
< keys
; j
++) {
11174 snprintf(buf
,sizeof(buf
),"key:%lu",j
);
11175 key
= createStringObject(buf
,strlen(buf
));
11176 if (lookupKeyRead(c
->db
,key
) != NULL
) {
11180 snprintf(buf
,sizeof(buf
),"value:%lu",j
);
11181 val
= createStringObject(buf
,strlen(buf
));
11182 dbAdd(c
->db
,key
,val
);
11185 addReply(c
,shared
.ok
);
11186 } else if (!strcasecmp(c
->argv
[1]->ptr
,"digest") && c
->argc
== 2) {
11187 unsigned char digest
[20];
11188 sds d
= sdsnew("+");
11191 computeDatasetDigest(digest
);
11192 for (j
= 0; j
< 20; j
++)
11193 d
= sdscatprintf(d
, "%02x",digest
[j
]);
11195 d
= sdscatlen(d
,"\r\n",2);
11198 addReplySds(c
,sdsnew(
11199 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
11203 static void _redisAssert(char *estr
, char *file
, int line
) {
11204 redisLog(REDIS_WARNING
,"=== ASSERTION FAILED ===");
11205 redisLog(REDIS_WARNING
,"==> %s:%d '%s' is not true",file
,line
,estr
);
11206 #ifdef HAVE_BACKTRACE
11207 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11208 *((char*)-1) = 'x';
11212 static void _redisPanic(char *msg
, char *file
, int line
) {
11213 redisLog(REDIS_WARNING
,"!!! Software Failure. Press left mouse button to continue");
11214 redisLog(REDIS_WARNING
,"Guru Meditation: %s #%s:%d",msg
,file
,line
);
11215 #ifdef HAVE_BACKTRACE
11216 redisLog(REDIS_WARNING
,"(forcing SIGSEGV in order to print the stack trace)");
11217 *((char*)-1) = 'x';
11221 /* =================================== Main! ================================ */
11224 int linuxOvercommitMemoryValue(void) {
11225 FILE *fp
= fopen("/proc/sys/vm/overcommit_memory","r");
11228 if (!fp
) return -1;
11229 if (fgets(buf
,64,fp
) == NULL
) {
11238 void linuxOvercommitMemoryWarning(void) {
11239 if (linuxOvercommitMemoryValue() == 0) {
11240 redisLog(REDIS_WARNING
,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
11243 #endif /* __linux__ */
11245 static void daemonize(void) {
11249 if (fork() != 0) exit(0); /* parent exits */
11250 setsid(); /* create a new session */
11252 /* Every output goes to /dev/null. If Redis is daemonized but
11253 * the 'logfile' is set to 'stdout' in the configuration file
11254 * it will not log at all. */
11255 if ((fd
= open("/dev/null", O_RDWR
, 0)) != -1) {
11256 dup2(fd
, STDIN_FILENO
);
11257 dup2(fd
, STDOUT_FILENO
);
11258 dup2(fd
, STDERR_FILENO
);
11259 if (fd
> STDERR_FILENO
) close(fd
);
11261 /* Try to write the pid file */
11262 fp
= fopen(server
.pidfile
,"w");
11264 fprintf(fp
,"%d\n",getpid());
11269 static void version() {
11270 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION
,
11271 REDIS_GIT_SHA1
, atoi(REDIS_GIT_DIRTY
) > 0);
11275 static void usage() {
11276 fprintf(stderr
,"Usage: ./redis-server [/path/to/redis.conf]\n");
11277 fprintf(stderr
," ./redis-server - (read config from stdin)\n");
11281 int main(int argc
, char **argv
) {
11284 initServerConfig();
11285 sortCommandTable();
11287 if (strcmp(argv
[1], "-v") == 0 ||
11288 strcmp(argv
[1], "--version") == 0) version();
11289 if (strcmp(argv
[1], "--help") == 0) usage();
11290 resetServerSaveParams();
11291 loadServerConfig(argv
[1]);
11292 } else if ((argc
> 2)) {
11295 redisLog(REDIS_WARNING
,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
11297 if (server
.daemonize
) daemonize();
11299 redisLog(REDIS_NOTICE
,"Server started, Redis version " REDIS_VERSION
);
11301 linuxOvercommitMemoryWarning();
11303 start
= time(NULL
);
11304 if (server
.appendonly
) {
11305 if (loadAppendOnlyFile(server
.appendfilename
) == REDIS_OK
)
11306 redisLog(REDIS_NOTICE
,"DB loaded from append only file: %ld seconds",time(NULL
)-start
);
11308 if (rdbLoad(server
.dbfilename
) == REDIS_OK
)
11309 redisLog(REDIS_NOTICE
,"DB loaded from disk: %ld seconds",time(NULL
)-start
);
11311 redisLog(REDIS_NOTICE
,"The server is now ready to accept connections on port %d", server
.port
);
11312 aeSetBeforeSleepProc(server
.el
,beforeSleep
);
11314 aeDeleteEventLoop(server
.el
);
11318 /* ============================= Backtrace support ========================= */
11320 #ifdef HAVE_BACKTRACE
11321 static char *findFuncName(void *pointer
, unsigned long *offset
);
11323 static void *getMcontextEip(ucontext_t
*uc
) {
11324 #if defined(__FreeBSD__)
11325 return (void*) uc
->uc_mcontext
.mc_eip
;
11326 #elif defined(__dietlibc__)
11327 return (void*) uc
->uc_mcontext
.eip
;
11328 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
11330 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11332 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11334 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
11335 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
11336 return (void*) uc
->uc_mcontext
->__ss
.__rip
;
11338 return (void*) uc
->uc_mcontext
->__ss
.__eip
;
11340 #elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
11341 return (void*) uc
->uc_mcontext
.gregs
[REG_EIP
]; /* Linux 32/64 bit */
11342 #elif defined(__ia64__) /* Linux IA64 */
11343 return (void*) uc
->uc_mcontext
.sc_ip
;
11349 static void segvHandler(int sig
, siginfo_t
*info
, void *secret
) {
11351 char **messages
= NULL
;
11352 int i
, trace_size
= 0;
11353 unsigned long offset
=0;
11354 ucontext_t
*uc
= (ucontext_t
*) secret
;
11356 REDIS_NOTUSED(info
);
11358 redisLog(REDIS_WARNING
,
11359 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION
, sig
);
11360 infostring
= genRedisInfoString();
11361 redisLog(REDIS_WARNING
, "%s",infostring
);
11362 /* It's not safe to sdsfree() the returned string under memory
11363 * corruption conditions. Let it leak as we are going to abort */
11365 trace_size
= backtrace(trace
, 100);
11366 /* overwrite sigaction with caller's address */
11367 if (getMcontextEip(uc
) != NULL
) {
11368 trace
[1] = getMcontextEip(uc
);
11370 messages
= backtrace_symbols(trace
, trace_size
);
11372 for (i
=1; i
<trace_size
; ++i
) {
11373 char *fn
= findFuncName(trace
[i
], &offset
), *p
;
11375 p
= strchr(messages
[i
],'+');
11376 if (!fn
|| (p
&& ((unsigned long)strtol(p
+1,NULL
,10)) < offset
)) {
11377 redisLog(REDIS_WARNING
,"%s", messages
[i
]);
11379 redisLog(REDIS_WARNING
,"%d redis-server %p %s + %d", i
, trace
[i
], fn
, (unsigned int)offset
);
11382 /* free(messages); Don't call free() with possibly corrupted memory. */
11386 static void sigtermHandler(int sig
) {
11387 REDIS_NOTUSED(sig
);
11389 redisLog(REDIS_WARNING
,"SIGTERM received, scheduling shutting down...");
11390 server
.shutdown_asap
= 1;
11393 static void setupSigSegvAction(void) {
11394 struct sigaction act
;
11396 sigemptyset (&act
.sa_mask
);
11397 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11398 * is used. Otherwise, sa_handler is used */
11399 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
| SA_SIGINFO
;
11400 act
.sa_sigaction
= segvHandler
;
11401 sigaction (SIGSEGV
, &act
, NULL
);
11402 sigaction (SIGBUS
, &act
, NULL
);
11403 sigaction (SIGFPE
, &act
, NULL
);
11404 sigaction (SIGILL
, &act
, NULL
);
11405 sigaction (SIGBUS
, &act
, NULL
);
11407 act
.sa_flags
= SA_NODEFER
| SA_ONSTACK
| SA_RESETHAND
;
11408 act
.sa_handler
= sigtermHandler
;
11409 sigaction (SIGTERM
, &act
, NULL
);
11413 #include "staticsymbols.h"
11414 /* This function try to convert a pointer into a function name. It's used in
11415 * oreder to provide a backtrace under segmentation fault that's able to
11416 * display functions declared as static (otherwise the backtrace is useless). */
11417 static char *findFuncName(void *pointer
, unsigned long *offset
){
11419 unsigned long off
, minoff
= 0;
11421 /* Try to match against the Symbol with the smallest offset */
11422 for (i
=0; symsTable
[i
].pointer
; i
++) {
11423 unsigned long lp
= (unsigned long) pointer
;
11425 if (lp
!= (unsigned long)-1 && lp
>= symsTable
[i
].pointer
) {
11426 off
=lp
-symsTable
[i
].pointer
;
11427 if (ret
< 0 || off
< minoff
) {
11433 if (ret
== -1) return NULL
;
11435 return symsTable
[ret
].name
;
11437 #else /* HAVE_BACKTRACE */
11438 static void setupSigSegvAction(void) {
11440 #endif /* HAVE_BACKTRACE */