]> git.saurik.com Git - redis.git/blame - redis.c
test restored
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
12d090d2 2 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com>
ed9b544e 3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
9005896c 30#define REDIS_VERSION "2.1.1"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
40#include <signal.h>
fbf9bcdb 41
42#ifdef HAVE_BACKTRACE
c9468bcf 43#include <execinfo.h>
44#include <ucontext.h>
fbf9bcdb 45#endif /* HAVE_BACKTRACE */
46
ed9b544e 47#include <sys/wait.h>
48#include <errno.h>
49#include <assert.h>
50#include <ctype.h>
51#include <stdarg.h>
52#include <inttypes.h>
53#include <arpa/inet.h>
54#include <sys/stat.h>
55#include <fcntl.h>
56#include <sys/time.h>
57#include <sys/resource.h>
2895e862 58#include <sys/uio.h>
f78fd11b 59#include <limits.h>
fb82e75c 60#include <float.h>
a7866db6 61#include <math.h>
92f8e882 62#include <pthread.h>
0bc1b2f6 63
64#if defined(__sun)
5043dff3 65#include "solarisfixes.h"
66#endif
ed9b544e 67
c9468bcf 68#include "redis.h"
ed9b544e 69#include "ae.h" /* Event driven programming library */
70#include "sds.h" /* Dynamic safe strings */
71#include "anet.h" /* Networking the easy way */
72#include "dict.h" /* Hash tables */
73#include "adlist.h" /* Linked lists */
74#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 75#include "lzf.h" /* LZF compression library */
76#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ba798261 77#include "zipmap.h" /* Compact dictionary-alike data structure */
78#include "sha1.h" /* SHA1 is used for DEBUG DIGEST */
5436146c 79#include "release.h" /* Release and/or git repository information */
ed9b544e 80
81/* Error codes */
82#define REDIS_OK 0
83#define REDIS_ERR -1
84
85/* Static server configuration */
86#define REDIS_SERVERPORT 6379 /* TCP port */
87#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 88#define REDIS_IOBUF_LEN 1024
ed9b544e 89#define REDIS_LOADBUF_LEN 1024
248ea310 90#define REDIS_STATIC_ARGS 8
ed9b544e 91#define REDIS_DEFAULT_DBNUM 16
92#define REDIS_CONFIGLINE_MAX 1024
612e4de8 93#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
ed9b544e 94#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
8ca3e9d1 95#define REDIS_EXPIRELOOKUPS_PER_CRON 10 /* lookup 10 expires per loop */
6f376729 96#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
2895e862 97#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
98
99/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
100#define REDIS_WRITEV_THRESHOLD 3
101/* Max number of iovecs used for each writev call */
102#define REDIS_WRITEV_IOVEC_COUNT 256
ed9b544e 103
104/* Hash table parameters */
105#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 106
107/* Command flags */
3fd78bcd 108#define REDIS_CMD_BULK 1 /* Bulk write command */
109#define REDIS_CMD_INLINE 2 /* Inline command */
110/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
111 this flags will return an error when the 'maxmemory' option is set in the
112 config file and the server is using more than maxmemory bytes of memory.
113 In short this commands are denied on low memory conditions. */
114#define REDIS_CMD_DENYOOM 4
4005fef1 115#define REDIS_CMD_FORCE_REPLICATION 8 /* Force replication even if dirty is 0 */
ed9b544e 116
117/* Object types */
118#define REDIS_STRING 0
119#define REDIS_LIST 1
120#define REDIS_SET 2
1812e024 121#define REDIS_ZSET 3
122#define REDIS_HASH 4
560db612 123#define REDIS_VMPOINTER 8
f78fd11b 124
5234952b 125/* Objects encoding. Some kind of objects like Strings and Hashes can be
126 * internally represented in multiple ways. The 'encoding' field of the object
127 * is set to one of this fields for this object. */
942a3961 128#define REDIS_ENCODING_RAW 0 /* Raw representation */
129#define REDIS_ENCODING_INT 1 /* Encoded as integer */
5234952b 130#define REDIS_ENCODING_ZIPMAP 2 /* Encoded as zipmap */
131#define REDIS_ENCODING_HT 3 /* Encoded as an hash table */
942a3961 132
07efaf74 133static char* strencoding[] = {
134 "raw", "int", "zipmap", "hashtable"
135};
136
f78fd11b 137/* Object types only used for dumping to disk */
bb32ede5 138#define REDIS_EXPIRETIME 253
ed9b544e 139#define REDIS_SELECTDB 254
140#define REDIS_EOF 255
141
f78fd11b 142/* Defines related to the dump file format. To store 32 bits lengths for short
143 * keys requires a lot of space, so we check the most significant 2 bits of
144 * the first byte to interpreter the length:
145 *
146 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
147 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
148 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 149 * 11|000000 this means: specially encoded object will follow. The six bits
150 * number specify the kind of object that follows.
151 * See the REDIS_RDB_ENC_* defines.
f78fd11b 152 *
10c43610 153 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
154 * values, will fit inside. */
f78fd11b 155#define REDIS_RDB_6BITLEN 0
156#define REDIS_RDB_14BITLEN 1
157#define REDIS_RDB_32BITLEN 2
17be1a4a 158#define REDIS_RDB_ENCVAL 3
f78fd11b 159#define REDIS_RDB_LENERR UINT_MAX
160
a4d1ba9a 161/* When a length of a string object stored on disk has the first two bits
162 * set, the remaining two bits specify a special encoding for the object
163 * accordingly to the following defines: */
164#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
165#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
166#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 167#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 168
75680a3c 169/* Virtual memory object->where field. */
170#define REDIS_VM_MEMORY 0 /* The object is on memory */
171#define REDIS_VM_SWAPPED 1 /* The object is on disk */
172#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
173#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
174
06224fec 175/* Virtual memory static configuration stuff.
176 * Check vmFindContiguousPages() to know more about this magic numbers. */
177#define REDIS_VM_MAX_NEAR_PAGES 65536
178#define REDIS_VM_MAX_RANDOM_JUMP 4096
92f8e882 179#define REDIS_VM_MAX_THREADS 32
bcaa7a4f 180#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
f6c0bba8 181/* The following is the *percentage* of completed I/O jobs to process when the
182 * handelr is called. While Virtual Memory I/O operations are performed by
183 * threads, this operations must be processed by the main thread when completed
184 * in order to take effect. */
c953f24b 185#define REDIS_MAX_COMPLETED_JOBS_PROCESSED 1
06224fec 186
ed9b544e 187/* Client flags */
d5d55fc3 188#define REDIS_SLAVE 1 /* This client is a slave server */
189#define REDIS_MASTER 2 /* This client is a master server */
190#define REDIS_MONITOR 4 /* This client is a slave monitor, see MONITOR */
191#define REDIS_MULTI 8 /* This client is in a MULTI context */
192#define REDIS_BLOCKED 16 /* The client is waiting in a blocking operation */
193#define REDIS_IO_WAIT 32 /* The client is waiting for Virtual Memory I/O */
37ab76c9 194#define REDIS_DIRTY_CAS 64 /* Watched keys modified. EXEC will fail. */
ed9b544e 195
40d224a9 196/* Slave replication state - slave side */
ed9b544e 197#define REDIS_REPL_NONE 0 /* No active replication */
198#define REDIS_REPL_CONNECT 1 /* Must connect to master */
199#define REDIS_REPL_CONNECTED 2 /* Connected to master */
200
40d224a9 201/* Slave replication state - from the point of view of master
202 * Note that in SEND_BULK and ONLINE state the slave receives new updates
203 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
204 * to start the next background saving in order to send updates to it. */
205#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
206#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
207#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
208#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
209
ed9b544e 210/* List related stuff */
211#define REDIS_HEAD 0
212#define REDIS_TAIL 1
213
214/* Sort operations */
215#define REDIS_SORT_GET 0
443c6409 216#define REDIS_SORT_ASC 1
217#define REDIS_SORT_DESC 2
ed9b544e 218#define REDIS_SORTKEY_MAX 1024
219
220/* Log levels */
221#define REDIS_DEBUG 0
f870935d 222#define REDIS_VERBOSE 1
223#define REDIS_NOTICE 2
224#define REDIS_WARNING 3
ed9b544e 225
226/* Anti-warning macro... */
227#define REDIS_NOTUSED(V) ((void) V)
228
6b47e12e 229#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
230#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 231
48f0308a 232/* Append only defines */
233#define APPENDFSYNC_NO 0
234#define APPENDFSYNC_ALWAYS 1
235#define APPENDFSYNC_EVERYSEC 2
236
cbba7dd7 237/* Hashes related defaults */
238#define REDIS_HASH_MAX_ZIPMAP_ENTRIES 64
239#define REDIS_HASH_MAX_ZIPMAP_VALUE 512
240
dfc5e96c 241/* We can print the stacktrace, so our assert is defined this way: */
478c2c6f 242#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e,__FILE__,__LINE__),_exit(1)))
c651fd9e 243#define redisPanic(_e) _redisPanic(#_e,__FILE__,__LINE__),_exit(1)
6c96ba7d 244static void _redisAssert(char *estr, char *file, int line);
c651fd9e 245static void _redisPanic(char *msg, char *file, int line);
dfc5e96c 246
ed9b544e 247/*================================= Data types ============================== */
248
249/* A redis object, that is a type able to hold a string / list / set */
75680a3c 250
75680a3c 251/* The actual Redis Object */
ed9b544e 252typedef struct redisObject {
560db612 253 unsigned type:4;
254 unsigned storage:2; /* REDIS_VM_MEMORY or REDIS_VM_SWAPPING */
255 unsigned encoding:4;
256 unsigned lru:22; /* lru time (relative to server.lruclock) */
ed9b544e 257 int refcount;
560db612 258 void *ptr;
75680a3c 259 /* VM fields, this are only allocated if VM is active, otherwise the
260 * object allocation function will just allocate
261 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
262 * Redis without VM active will not have any overhead. */
ed9b544e 263} robj;
264
560db612 265/* The VM pointer structure - identifies an object in the swap file.
266 *
267 * This object is stored in place of the value
268 * object in the main key->value hash table representing a database.
269 * Note that the first fields (type, storage) are the same as the redisObject
270 * structure so that vmPointer strucuters can be accessed even when casted
271 * as redisObject structures.
272 *
273 * This is useful as we don't know if a value object is or not on disk, but we
169dd6b7 274 * are always able to read obj->storage to check this. For vmPointer
560db612 275 * structures "type" is set to REDIS_VMPOINTER (even if without this field
276 * is still possible to check the kind of object from the value of 'storage').*/
277typedef struct vmPointer {
278 unsigned type:4;
279 unsigned storage:2; /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
280 unsigned notused:26;
281 unsigned int vtype; /* type of the object stored in the swap file */
282 off_t page; /* the page at witch the object is stored on disk */
283 off_t usedpages; /* number of pages used on disk */
284} vmpointer;
285
dfc5e96c 286/* Macro used to initalize a Redis object allocated on the stack.
287 * Note that this macro is taken near the structure definition to make sure
288 * we'll update it when the structure is changed, to avoid bugs like
289 * bug #85 introduced exactly in this way. */
290#define initStaticStringObject(_var,_ptr) do { \
291 _var.refcount = 1; \
292 _var.type = REDIS_STRING; \
293 _var.encoding = REDIS_ENCODING_RAW; \
294 _var.ptr = _ptr; \
560db612 295 _var.storage = REDIS_VM_MEMORY; \
dfc5e96c 296} while(0);
297
3305306f 298typedef struct redisDb {
4409877e 299 dict *dict; /* The keyspace for this DB */
300 dict *expires; /* Timeout of keys with a timeout set */
37ab76c9 301 dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
d5d55fc3 302 dict *io_keys; /* Keys with clients waiting for VM I/O */
37ab76c9 303 dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
3305306f 304 int id;
305} redisDb;
306
6e469882 307/* Client MULTI/EXEC state */
308typedef struct multiCmd {
309 robj **argv;
310 int argc;
311 struct redisCommand *cmd;
312} multiCmd;
313
314typedef struct multiState {
315 multiCmd *commands; /* Array of MULTI commands */
316 int count; /* Total number of MULTI commands */
317} multiState;
318
ed9b544e 319/* With multiplexing we need to take per-clinet state.
320 * Clients are taken in a liked list. */
321typedef struct redisClient {
322 int fd;
3305306f 323 redisDb *db;
ed9b544e 324 int dictid;
325 sds querybuf;
e8a74421 326 robj **argv, **mbargv;
327 int argc, mbargc;
40d224a9 328 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 329 int multibulk; /* multi bulk command format active */
ed9b544e 330 list *reply;
331 int sentlen;
332 time_t lastinteraction; /* time of the last interaction, used for timeout */
d5d55fc3 333 int flags; /* REDIS_SLAVE | REDIS_MONITOR | REDIS_MULTI ... */
40d224a9 334 int slaveseldb; /* slave selected db, if this client is a slave */
335 int authenticated; /* when requirepass is non-NULL */
336 int replstate; /* replication state if this is a slave */
337 int repldbfd; /* replication DB file descriptor */
6e469882 338 long repldboff; /* replication DB file offset */
40d224a9 339 off_t repldbsize; /* replication DB file size */
6e469882 340 multiState mstate; /* MULTI/EXEC state */
37ab76c9 341 robj **blocking_keys; /* The key we are waiting to terminate a blocking
4409877e 342 * operation such as BLPOP. Otherwise NULL. */
37ab76c9 343 int blocking_keys_num; /* Number of blocking keys */
4409877e 344 time_t blockingto; /* Blocking operation timeout. If UNIX current time
345 * is >= blockingto then the operation timed out. */
92f8e882 346 list *io_keys; /* Keys this client is waiting to be loaded from the
347 * swap file in order to continue. */
37ab76c9 348 list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
ffc6b7f8 349 dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
350 list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
ed9b544e 351} redisClient;
352
353struct saveparam {
354 time_t seconds;
355 int changes;
356};
357
358/* Global server state structure */
359struct redisServer {
360 int port;
361 int fd;
3305306f 362 redisDb *db;
ed9b544e 363 long long dirty; /* changes to DB from the last save */
364 list *clients;
87eca727 365 list *slaves, *monitors;
ed9b544e 366 char neterr[ANET_ERR_LEN];
367 aeEventLoop *el;
368 int cronloops; /* number of times the cron function run */
369 list *objfreelist; /* A list of freed objects to avoid malloc() */
370 time_t lastsave; /* Unix time of last save succeeede */
ed9b544e 371 /* Fields used only for stats */
372 time_t stat_starttime; /* server start time */
373 long long stat_numcommands; /* number of processed commands */
374 long long stat_numconnections; /* number of connections received */
2a6a2ed1 375 long long stat_expiredkeys; /* number of expired keys */
ed9b544e 376 /* Configuration */
377 int verbosity;
378 int glueoutputbuf;
379 int maxidletime;
380 int dbnum;
381 int daemonize;
44b38ef4 382 int appendonly;
48f0308a 383 int appendfsync;
38db9171 384 int no_appendfsync_on_rewrite;
fab43727 385 int shutdown_asap;
48f0308a 386 time_t lastfsync;
44b38ef4 387 int appendfd;
388 int appendseldb;
ed329fcf 389 char *pidfile;
9f3c422c 390 pid_t bgsavechildpid;
9d65a1bb 391 pid_t bgrewritechildpid;
392 sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
28ed1f33 393 sds aofbuf; /* AOF buffer, written before entering the event loop */
ed9b544e 394 struct saveparam *saveparams;
395 int saveparamslen;
396 char *logfile;
397 char *bindaddr;
398 char *dbfilename;
44b38ef4 399 char *appendfilename;
abcb223e 400 char *requirepass;
121f70cf 401 int rdbcompression;
8ca3e9d1 402 int activerehashing;
ed9b544e 403 /* Replication related */
404 int isslave;
d0ccebcf 405 char *masterauth;
ed9b544e 406 char *masterhost;
407 int masterport;
40d224a9 408 redisClient *master; /* client that is master for this slave */
ed9b544e 409 int replstate;
285add55 410 unsigned int maxclients;
4ef8de8a 411 unsigned long long maxmemory;
d5d55fc3 412 unsigned int blpop_blocked_clients;
413 unsigned int vm_blocked_clients;
ed9b544e 414 /* Sort parameters - qsort_r() is only available under BSD so we
415 * have to take this state global, in order to pass it to sortCompare() */
416 int sort_desc;
417 int sort_alpha;
418 int sort_bypattern;
75680a3c 419 /* Virtual memory configuration */
420 int vm_enabled;
054e426d 421 char *vm_swap_file;
75680a3c 422 off_t vm_page_size;
423 off_t vm_pages;
4ef8de8a 424 unsigned long long vm_max_memory;
cbba7dd7 425 /* Hashes config */
426 size_t hash_max_zipmap_entries;
427 size_t hash_max_zipmap_value;
75680a3c 428 /* Virtual memory state */
429 FILE *vm_fp;
430 int vm_fd;
431 off_t vm_next_page; /* Next probably empty page */
432 off_t vm_near_pages; /* Number of pages allocated sequentially */
06224fec 433 unsigned char *vm_bitmap; /* Bitmap of free/used pages */
3a66edc7 434 time_t unixtime; /* Unix time sampled every second. */
92f8e882 435 /* Virtual memory I/O threads stuff */
92f8e882 436 /* An I/O thread process an element taken from the io_jobs queue and
996cb5f7 437 * put the result of the operation in the io_done list. While the
438 * job is being processed, it's put on io_processing queue. */
439 list *io_newjobs; /* List of VM I/O jobs yet to be processed */
440 list *io_processing; /* List of VM I/O jobs being processed */
441 list *io_processed; /* List of VM I/O jobs already processed */
d5d55fc3 442 list *io_ready_clients; /* Clients ready to be unblocked. All keys loaded */
996cb5f7 443 pthread_mutex_t io_mutex; /* lock to access io_jobs/io_done/io_thread_job */
a5819310 444 pthread_mutex_t obj_freelist_mutex; /* safe redis objects creation/free */
445 pthread_mutex_t io_swapfile_mutex; /* So we can lseek + write */
bcaa7a4f 446 pthread_attr_t io_threads_attr; /* attributes for threads creation */
92f8e882 447 int io_active_threads; /* Number of running I/O threads */
448 int vm_max_threads; /* Max number of I/O threads running at the same time */
996cb5f7 449 /* Our main thread is blocked on the event loop, locking for sockets ready
450 * to be read or written, so when a threaded I/O operation is ready to be
451 * processed by the main thread, the I/O thread will use a unix pipe to
452 * awake the main thread. The followings are the two pipe FDs. */
453 int io_ready_pipe_read;
454 int io_ready_pipe_write;
7d98e08c 455 /* Virtual memory stats */
456 unsigned long long vm_stats_used_pages;
457 unsigned long long vm_stats_swapped_objects;
458 unsigned long long vm_stats_swapouts;
459 unsigned long long vm_stats_swapins;
befec3cd 460 /* Pubsub */
ffc6b7f8 461 dict *pubsub_channels; /* Map channels to list of subscribed clients */
462 list *pubsub_patterns; /* A list of pubsub_patterns */
befec3cd 463 /* Misc */
b9bc0eef 464 FILE *devnull;
560db612 465 unsigned lruclock:22; /* clock incrementing every minute, for LRU */
466 unsigned lruclock_padding:10;
ed9b544e 467};
468
ffc6b7f8 469typedef struct pubsubPattern {
470 redisClient *client;
471 robj *pattern;
472} pubsubPattern;
473
ed9b544e 474typedef void redisCommandProc(redisClient *c);
ca1788b5 475typedef void redisVmPreloadProc(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
ed9b544e 476struct redisCommand {
477 char *name;
478 redisCommandProc *proc;
479 int arity;
480 int flags;
76583ea4
PN
481 /* Use a function to determine which keys need to be loaded
482 * in the background prior to executing this command. Takes precedence
483 * over vm_firstkey and others, ignored when NULL */
ca1788b5 484 redisVmPreloadProc *vm_preload_proc;
7c775e09 485 /* What keys should be loaded in background when calling this command? */
486 int vm_firstkey; /* The first argument that's a key (0 = no keys) */
487 int vm_lastkey; /* THe last argument that's a key */
488 int vm_keystep; /* The step between first and last key */
ed9b544e 489};
490
de96dbfe 491struct redisFunctionSym {
492 char *name;
56906eef 493 unsigned long pointer;
de96dbfe 494};
495
ed9b544e 496typedef struct _redisSortObject {
497 robj *obj;
498 union {
499 double score;
500 robj *cmpobj;
501 } u;
502} redisSortObject;
503
504typedef struct _redisSortOperation {
505 int type;
506 robj *pattern;
507} redisSortOperation;
508
6b47e12e 509/* ZSETs use a specialized version of Skiplists */
510
511typedef struct zskiplistNode {
512 struct zskiplistNode **forward;
e3870fab 513 struct zskiplistNode *backward;
912b9165 514 unsigned int *span;
6b47e12e 515 double score;
516 robj *obj;
517} zskiplistNode;
518
519typedef struct zskiplist {
e3870fab 520 struct zskiplistNode *header, *tail;
d13f767c 521 unsigned long length;
6b47e12e 522 int level;
523} zskiplist;
524
1812e024 525typedef struct zset {
526 dict *dict;
6b47e12e 527 zskiplist *zsl;
1812e024 528} zset;
529
6b47e12e 530/* Our shared "common" objects */
531
05df7621 532#define REDIS_SHARED_INTEGERS 10000
ed9b544e 533struct sharedObjectsStruct {
c937aa89 534 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
6e469882 535 *colon, *nullbulk, *nullmultibulk, *queued,
c937aa89 536 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
537 *outofrangeerr, *plus,
ed9b544e 538 *select0, *select1, *select2, *select3, *select4,
befec3cd 539 *select5, *select6, *select7, *select8, *select9,
c8d0ea0e 540 *messagebulk, *pmessagebulk, *subscribebulk, *unsubscribebulk, *mbulk3,
541 *mbulk4, *psubscribebulk, *punsubscribebulk,
542 *integers[REDIS_SHARED_INTEGERS];
ed9b544e 543} shared;
544
a7866db6 545/* Global vars that are actally used as constants. The following double
546 * values are used for double on-disk serialization, and are initialized
547 * at runtime to avoid strange compiler optimizations. */
548
549static double R_Zero, R_PosInf, R_NegInf, R_Nan;
550
92f8e882 551/* VM threaded I/O request message */
b9bc0eef 552#define REDIS_IOJOB_LOAD 0 /* Load from disk to memory */
553#define REDIS_IOJOB_PREPARE_SWAP 1 /* Compute needed pages */
554#define REDIS_IOJOB_DO_SWAP 2 /* Swap from memory to disk */
d5d55fc3 555typedef struct iojob {
996cb5f7 556 int type; /* Request type, REDIS_IOJOB_* */
b9bc0eef 557 redisDb *db;/* Redis database */
92f8e882 558 robj *key; /* This I/O request is about swapping this key */
560db612 559 robj *id; /* Unique identifier of this job:
560 this is the object to swap for REDIS_IOREQ_*_SWAP, or the
561 vmpointer objct for REDIS_IOREQ_LOAD. */
b9bc0eef 562 robj *val; /* the value to swap for REDIS_IOREQ_*_SWAP, otherwise this
92f8e882 563 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
564 off_t page; /* Swap page where to read/write the object */
248ea310 565 off_t pages; /* Swap pages needed to save object. PREPARE_SWAP return val */
996cb5f7 566 int canceled; /* True if this command was canceled by blocking side of VM */
567 pthread_t thread; /* ID of the thread processing this entry */
568} iojob;
92f8e882 569
ed9b544e 570/*================================ Prototypes =============================== */
571
572static void freeStringObject(robj *o);
573static void freeListObject(robj *o);
574static void freeSetObject(robj *o);
575static void decrRefCount(void *o);
576static robj *createObject(int type, void *ptr);
577static void freeClient(redisClient *c);
f78fd11b 578static int rdbLoad(char *filename);
ed9b544e 579static void addReply(redisClient *c, robj *obj);
580static void addReplySds(redisClient *c, sds s);
581static void incrRefCount(robj *o);
f78fd11b 582static int rdbSaveBackground(char *filename);
ed9b544e 583static robj *createStringObject(char *ptr, size_t len);
4ef8de8a 584static robj *dupStringObject(robj *o);
248ea310 585static void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
dd142b9c 586static void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc);
28ed1f33 587static void flushAppendOnlyFile(void);
44b38ef4 588static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 589static int syncWithMaster(void);
05df7621 590static robj *tryObjectEncoding(robj *o);
9d65a1bb 591static robj *getDecodedObject(robj *o);
3305306f 592static int removeExpire(redisDb *db, robj *key);
593static int expireIfNeeded(redisDb *db, robj *key);
594static int deleteIfVolatile(redisDb *db, robj *key);
09241813 595static int dbDelete(redisDb *db, robj *key);
bb32ede5 596static time_t getExpire(redisDb *db, robj *key);
597static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 598static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 599static void freeMemoryIfNeeded(void);
de96dbfe 600static int processCommand(redisClient *c);
56906eef 601static void setupSigSegvAction(void);
a3b21203 602static void rdbRemoveTempFile(pid_t childpid);
9d65a1bb 603static void aofRemoveTempFile(pid_t childpid);
0ea663ea 604static size_t stringObjectLen(robj *o);
638e42ac 605static void processInputBuffer(redisClient *c);
6b47e12e 606static zskiplist *zslCreate(void);
fd8ccf44 607static void zslFree(zskiplist *zsl);
2b59cfdf 608static void zslInsert(zskiplist *zsl, double score, robj *obj);
2895e862 609static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
6e469882 610static void initClientMultiState(redisClient *c);
611static void freeClientMultiState(redisClient *c);
612static void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
b0d8747d 613static void unblockClientWaitingData(redisClient *c);
4409877e 614static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
75680a3c 615static void vmInit(void);
a35ddf12 616static void vmMarkPagesFree(off_t page, off_t count);
560db612 617static robj *vmLoadObject(robj *o);
618static robj *vmPreviewObject(robj *o);
a69a0c9c 619static int vmSwapOneObjectBlocking(void);
620static int vmSwapOneObjectThreaded(void);
7e69548d 621static int vmCanSwapOut(void);
a5819310 622static int tryFreeOneObjectFromFreelist(void);
996cb5f7 623static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
624static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata, int mask);
625static void vmCancelThreadedIOJob(robj *o);
b9bc0eef 626static void lockThreadedIO(void);
627static void unlockThreadedIO(void);
628static int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db);
629static void freeIOJob(iojob *j);
630static void queueIOJob(iojob *j);
a5819310 631static int vmWriteObjectOnSwap(robj *o, off_t page);
632static robj *vmReadObjectFromSwap(off_t page, int type);
054e426d 633static void waitEmptyIOJobsQueue(void);
634static void vmReopenSwapFile(void);
970e10bb 635static int vmFreePage(off_t page);
ca1788b5 636static void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
3805e04f 637static void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv);
0a6f3f0f 638static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd);
d5d55fc3 639static int dontWaitForSwappedKey(redisClient *c, robj *key);
640static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key);
641static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
642static struct redisCommand *lookupCommand(char *name);
643static void call(redisClient *c, struct redisCommand *cmd);
644static void resetClient(redisClient *c);
ada386b2 645static void convertToRealHash(robj *o);
ffc6b7f8 646static int pubsubUnsubscribeAllChannels(redisClient *c, int notify);
647static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify);
648static void freePubsubPattern(void *p);
649static int listMatchPubsubPattern(void *a, void *b);
650static int compareStringObjects(robj *a, robj *b);
bf028098 651static int equalStringObjects(robj *a, robj *b);
befec3cd 652static void usage();
8f63ddca 653static int rewriteAppendOnlyFileBackground(void);
560db612 654static vmpointer *vmSwapObjectBlocking(robj *val);
fab43727 655static int prepareForShutdown();
37ab76c9 656static void touchWatchedKey(redisDb *db, robj *key);
9b30e1a2 657static void touchWatchedKeysOnFlush(int dbid);
37ab76c9 658static void unwatchAllKeys(redisClient *c);
ed9b544e 659
abcb223e 660static void authCommand(redisClient *c);
ed9b544e 661static void pingCommand(redisClient *c);
662static void echoCommand(redisClient *c);
663static void setCommand(redisClient *c);
664static void setnxCommand(redisClient *c);
526d00a5 665static void setexCommand(redisClient *c);
ed9b544e 666static void getCommand(redisClient *c);
667static void delCommand(redisClient *c);
668static void existsCommand(redisClient *c);
669static void incrCommand(redisClient *c);
670static void decrCommand(redisClient *c);
671static void incrbyCommand(redisClient *c);
672static void decrbyCommand(redisClient *c);
673static void selectCommand(redisClient *c);
674static void randomkeyCommand(redisClient *c);
675static void keysCommand(redisClient *c);
676static void dbsizeCommand(redisClient *c);
677static void lastsaveCommand(redisClient *c);
678static void saveCommand(redisClient *c);
679static void bgsaveCommand(redisClient *c);
9d65a1bb 680static void bgrewriteaofCommand(redisClient *c);
ed9b544e 681static void shutdownCommand(redisClient *c);
682static void moveCommand(redisClient *c);
683static void renameCommand(redisClient *c);
684static void renamenxCommand(redisClient *c);
685static void lpushCommand(redisClient *c);
686static void rpushCommand(redisClient *c);
687static void lpopCommand(redisClient *c);
688static void rpopCommand(redisClient *c);
689static void llenCommand(redisClient *c);
690static void lindexCommand(redisClient *c);
691static void lrangeCommand(redisClient *c);
692static void ltrimCommand(redisClient *c);
693static void typeCommand(redisClient *c);
694static void lsetCommand(redisClient *c);
695static void saddCommand(redisClient *c);
696static void sremCommand(redisClient *c);
a4460ef4 697static void smoveCommand(redisClient *c);
ed9b544e 698static void sismemberCommand(redisClient *c);
699static void scardCommand(redisClient *c);
12fea928 700static void spopCommand(redisClient *c);
2abb95a9 701static void srandmemberCommand(redisClient *c);
ed9b544e 702static void sinterCommand(redisClient *c);
703static void sinterstoreCommand(redisClient *c);
40d224a9 704static void sunionCommand(redisClient *c);
705static void sunionstoreCommand(redisClient *c);
f4f56e1d 706static void sdiffCommand(redisClient *c);
707static void sdiffstoreCommand(redisClient *c);
ed9b544e 708static void syncCommand(redisClient *c);
709static void flushdbCommand(redisClient *c);
710static void flushallCommand(redisClient *c);
711static void sortCommand(redisClient *c);
712static void lremCommand(redisClient *c);
0f5f7e9a 713static void rpoplpushcommand(redisClient *c);
ed9b544e 714static void infoCommand(redisClient *c);
70003d28 715static void mgetCommand(redisClient *c);
87eca727 716static void monitorCommand(redisClient *c);
3305306f 717static void expireCommand(redisClient *c);
802e8373 718static void expireatCommand(redisClient *c);
f6b141c5 719static void getsetCommand(redisClient *c);
fd88489a 720static void ttlCommand(redisClient *c);
321b0e13 721static void slaveofCommand(redisClient *c);
7f957c92 722static void debugCommand(redisClient *c);
f6b141c5 723static void msetCommand(redisClient *c);
724static void msetnxCommand(redisClient *c);
fd8ccf44 725static void zaddCommand(redisClient *c);
7db723ad 726static void zincrbyCommand(redisClient *c);
cc812361 727static void zrangeCommand(redisClient *c);
50c55df5 728static void zrangebyscoreCommand(redisClient *c);
f44dd428 729static void zcountCommand(redisClient *c);
e3870fab 730static void zrevrangeCommand(redisClient *c);
3c41331e 731static void zcardCommand(redisClient *c);
1b7106e7 732static void zremCommand(redisClient *c);
6e333bbe 733static void zscoreCommand(redisClient *c);
1807985b 734static void zremrangebyscoreCommand(redisClient *c);
6e469882 735static void multiCommand(redisClient *c);
736static void execCommand(redisClient *c);
18b6cb76 737static void discardCommand(redisClient *c);
4409877e 738static void blpopCommand(redisClient *c);
739static void brpopCommand(redisClient *c);
4b00bebd 740static void appendCommand(redisClient *c);
39191553 741static void substrCommand(redisClient *c);
69d95c3e 742static void zrankCommand(redisClient *c);
798d9e55 743static void zrevrankCommand(redisClient *c);
978c2c94 744static void hsetCommand(redisClient *c);
1f1c7695 745static void hsetnxCommand(redisClient *c);
978c2c94 746static void hgetCommand(redisClient *c);
09aeb579
PN
747static void hmsetCommand(redisClient *c);
748static void hmgetCommand(redisClient *c);
07efaf74 749static void hdelCommand(redisClient *c);
92b27fe9 750static void hlenCommand(redisClient *c);
9212eafd 751static void zremrangebyrankCommand(redisClient *c);
5d373da9 752static void zunionstoreCommand(redisClient *c);
753static void zinterstoreCommand(redisClient *c);
78409a0f 754static void hkeysCommand(redisClient *c);
755static void hvalsCommand(redisClient *c);
756static void hgetallCommand(redisClient *c);
a86f14b1 757static void hexistsCommand(redisClient *c);
500ece7c 758static void configCommand(redisClient *c);
01426b05 759static void hincrbyCommand(redisClient *c);
befec3cd 760static void subscribeCommand(redisClient *c);
761static void unsubscribeCommand(redisClient *c);
ffc6b7f8 762static void psubscribeCommand(redisClient *c);
763static void punsubscribeCommand(redisClient *c);
befec3cd 764static void publishCommand(redisClient *c);
37ab76c9 765static void watchCommand(redisClient *c);
766static void unwatchCommand(redisClient *c);
f6b141c5 767
ed9b544e 768/*================================= Globals ================================= */
769
770/* Global vars */
771static struct redisServer server; /* server global state */
1a132bbc 772static struct redisCommand *commandTable;
1a132bbc 773static struct redisCommand readonlyCommandTable[] = {
76583ea4
PN
774 {"get",getCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
775 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
776 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
526d00a5 777 {"setex",setexCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,0,0,0},
76583ea4
PN
778 {"append",appendCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
779 {"substr",substrCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
780 {"del",delCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
781 {"exists",existsCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
782 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
783 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
784 {"mget",mgetCommand,-2,REDIS_CMD_INLINE,NULL,1,-1,1},
785 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
786 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
787 {"rpop",rpopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
788 {"lpop",lpopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
789 {"brpop",brpopCommand,-3,REDIS_CMD_INLINE,NULL,1,1,1},
790 {"blpop",blpopCommand,-3,REDIS_CMD_INLINE,NULL,1,1,1},
791 {"llen",llenCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
792 {"lindex",lindexCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
793 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
794 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
795 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
796 {"lrem",lremCommand,4,REDIS_CMD_BULK,NULL,1,1,1},
797 {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,2,1},
798 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
799 {"srem",sremCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
800 {"smove",smoveCommand,4,REDIS_CMD_BULK,NULL,1,2,1},
801 {"sismember",sismemberCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
802 {"scard",scardCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
803 {"spop",spopCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
804 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
805 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
806 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
807 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
808 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
809 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,-1,1},
810 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,2,-1,1},
811 {"smembers",sinterCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
812 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
813 {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
814 {"zrem",zremCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
815 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
816 {"zremrangebyrank",zremrangebyrankCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
5d373da9 817 {"zunionstore",zunionstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0},
818 {"zinterstore",zinterstoreCommand,-4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,zunionInterBlockClientOnSwappedKeys,0,0,0},
76583ea4
PN
819 {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
820 {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
821 {"zcount",zcountCommand,4,REDIS_CMD_INLINE,NULL,1,1,1},
822 {"zrevrange",zrevrangeCommand,-4,REDIS_CMD_INLINE,NULL,1,1,1},
823 {"zcard",zcardCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
824 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
825 {"zrank",zrankCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
826 {"zrevrank",zrevrankCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
827 {"hset",hsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
1f1c7695 828 {"hsetnx",hsetnxCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
09aeb579 829 {"hget",hgetCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
d33278d1 830 {"hmset",hmsetCommand,-4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
09aeb579 831 {"hmget",hmgetCommand,-3,REDIS_CMD_BULK,NULL,1,1,1},
01426b05 832 {"hincrby",hincrbyCommand,4,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
76583ea4
PN
833 {"hdel",hdelCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
834 {"hlen",hlenCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
835 {"hkeys",hkeysCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
836 {"hvals",hvalsCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
837 {"hgetall",hgetallCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
4583c4f0 838 {"hexists",hexistsCommand,3,REDIS_CMD_BULK,NULL,1,1,1},
76583ea4
PN
839 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
840 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
841 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,1,1},
842 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,-1,2},
843 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM,NULL,1,-1,2},
844 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
845 {"select",selectCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
846 {"move",moveCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
847 {"rename",renameCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
848 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE,NULL,1,1,1},
849 {"expire",expireCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
850 {"expireat",expireatCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
851 {"keys",keysCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
852 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
853 {"auth",authCommand,2,REDIS_CMD_INLINE,NULL,0,0,0},
854 {"ping",pingCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
855 {"echo",echoCommand,2,REDIS_CMD_BULK,NULL,0,0,0},
856 {"save",saveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
857 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
858 {"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
859 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
860 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
861 {"type",typeCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
862 {"multi",multiCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
3805e04f 863 {"exec",execCommand,1,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,execBlockClientOnSwappedKeys,0,0,0},
76583ea4
PN
864 {"discard",discardCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
865 {"sync",syncCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
866 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
867 {"flushall",flushallCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
868 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM,NULL,1,1,1},
869 {"info",infoCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
870 {"monitor",monitorCommand,1,REDIS_CMD_INLINE,NULL,0,0,0},
871 {"ttl",ttlCommand,2,REDIS_CMD_INLINE,NULL,1,1,1},
872 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE,NULL,0,0,0},
873 {"debug",debugCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
500ece7c 874 {"config",configCommand,-2,REDIS_CMD_BULK,NULL,0,0,0},
befec3cd 875 {"subscribe",subscribeCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
876 {"unsubscribe",unsubscribeCommand,-1,REDIS_CMD_INLINE,NULL,0,0,0},
ffc6b7f8 877 {"psubscribe",psubscribeCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
878 {"punsubscribe",punsubscribeCommand,-1,REDIS_CMD_INLINE,NULL,0,0,0},
4005fef1 879 {"publish",publishCommand,3,REDIS_CMD_BULK|REDIS_CMD_FORCE_REPLICATION,NULL,0,0,0},
37ab76c9 880 {"watch",watchCommand,-2,REDIS_CMD_INLINE,NULL,0,0,0},
d55d5c5d 881 {"unwatch",unwatchCommand,1,REDIS_CMD_INLINE,NULL,0,0,0}
ed9b544e 882};
bcfc686d 883
ed9b544e 884/*============================ Utility functions ============================ */
885
886/* Glob-style pattern matching. */
500ece7c 887static int stringmatchlen(const char *pattern, int patternLen,
ed9b544e 888 const char *string, int stringLen, int nocase)
889{
890 while(patternLen) {
891 switch(pattern[0]) {
892 case '*':
893 while (pattern[1] == '*') {
894 pattern++;
895 patternLen--;
896 }
897 if (patternLen == 1)
898 return 1; /* match */
899 while(stringLen) {
900 if (stringmatchlen(pattern+1, patternLen-1,
901 string, stringLen, nocase))
902 return 1; /* match */
903 string++;
904 stringLen--;
905 }
906 return 0; /* no match */
907 break;
908 case '?':
909 if (stringLen == 0)
910 return 0; /* no match */
911 string++;
912 stringLen--;
913 break;
914 case '[':
915 {
916 int not, match;
917
918 pattern++;
919 patternLen--;
920 not = pattern[0] == '^';
921 if (not) {
922 pattern++;
923 patternLen--;
924 }
925 match = 0;
926 while(1) {
927 if (pattern[0] == '\\') {
928 pattern++;
929 patternLen--;
930 if (pattern[0] == string[0])
931 match = 1;
932 } else if (pattern[0] == ']') {
933 break;
934 } else if (patternLen == 0) {
935 pattern--;
936 patternLen++;
937 break;
938 } else if (pattern[1] == '-' && patternLen >= 3) {
939 int start = pattern[0];
940 int end = pattern[2];
941 int c = string[0];
942 if (start > end) {
943 int t = start;
944 start = end;
945 end = t;
946 }
947 if (nocase) {
948 start = tolower(start);
949 end = tolower(end);
950 c = tolower(c);
951 }
952 pattern += 2;
953 patternLen -= 2;
954 if (c >= start && c <= end)
955 match = 1;
956 } else {
957 if (!nocase) {
958 if (pattern[0] == string[0])
959 match = 1;
960 } else {
961 if (tolower((int)pattern[0]) == tolower((int)string[0]))
962 match = 1;
963 }
964 }
965 pattern++;
966 patternLen--;
967 }
968 if (not)
969 match = !match;
970 if (!match)
971 return 0; /* no match */
972 string++;
973 stringLen--;
974 break;
975 }
976 case '\\':
977 if (patternLen >= 2) {
978 pattern++;
979 patternLen--;
980 }
981 /* fall through */
982 default:
983 if (!nocase) {
984 if (pattern[0] != string[0])
985 return 0; /* no match */
986 } else {
987 if (tolower((int)pattern[0]) != tolower((int)string[0]))
988 return 0; /* no match */
989 }
990 string++;
991 stringLen--;
992 break;
993 }
994 pattern++;
995 patternLen--;
996 if (stringLen == 0) {
997 while(*pattern == '*') {
998 pattern++;
999 patternLen--;
1000 }
1001 break;
1002 }
1003 }
1004 if (patternLen == 0 && stringLen == 0)
1005 return 1;
1006 return 0;
1007}
1008
500ece7c 1009static int stringmatch(const char *pattern, const char *string, int nocase) {
1010 return stringmatchlen(pattern,strlen(pattern),string,strlen(string),nocase);
1011}
1012
2b619329 1013/* Convert a string representing an amount of memory into the number of
1014 * bytes, so for instance memtoll("1Gi") will return 1073741824 that is
1015 * (1024*1024*1024).
1016 *
1017 * On parsing error, if *err is not NULL, it's set to 1, otherwise it's
1018 * set to 0 */
1019static long long memtoll(const char *p, int *err) {
1020 const char *u;
1021 char buf[128];
1022 long mul; /* unit multiplier */
1023 long long val;
1024 unsigned int digits;
1025
1026 if (err) *err = 0;
1027 /* Search the first non digit character. */
1028 u = p;
1029 if (*u == '-') u++;
1030 while(*u && isdigit(*u)) u++;
1031 if (*u == '\0' || !strcasecmp(u,"b")) {
1032 mul = 1;
72324005 1033 } else if (!strcasecmp(u,"k")) {
2b619329 1034 mul = 1000;
72324005 1035 } else if (!strcasecmp(u,"kb")) {
2b619329 1036 mul = 1024;
72324005 1037 } else if (!strcasecmp(u,"m")) {
2b619329 1038 mul = 1000*1000;
72324005 1039 } else if (!strcasecmp(u,"mb")) {
2b619329 1040 mul = 1024*1024;
72324005 1041 } else if (!strcasecmp(u,"g")) {
2b619329 1042 mul = 1000L*1000*1000;
72324005 1043 } else if (!strcasecmp(u,"gb")) {
2b619329 1044 mul = 1024L*1024*1024;
1045 } else {
1046 if (err) *err = 1;
1047 mul = 1;
1048 }
1049 digits = u-p;
1050 if (digits >= sizeof(buf)) {
1051 if (err) *err = 1;
1052 return LLONG_MAX;
1053 }
1054 memcpy(buf,p,digits);
1055 buf[digits] = '\0';
1056 val = strtoll(buf,NULL,10);
1057 return val*mul;
1058}
1059
ee14da56 1060/* Convert a long long into a string. Returns the number of
1061 * characters needed to represent the number, that can be shorter if passed
1062 * buffer length is not enough to store the whole number. */
1063static int ll2string(char *s, size_t len, long long value) {
1064 char buf[32], *p;
1065 unsigned long long v;
1066 size_t l;
1067
1068 if (len == 0) return 0;
1069 v = (value < 0) ? -value : value;
1070 p = buf+31; /* point to the last character */
1071 do {
1072 *p-- = '0'+(v%10);
1073 v /= 10;
1074 } while(v);
1075 if (value < 0) *p-- = '-';
1076 p++;
1077 l = 32-(p-buf);
1078 if (l+1 > len) l = len-1; /* Make sure it fits, including the nul term */
1079 memcpy(s,p,l);
1080 s[l] = '\0';
1081 return l;
1082}
1083
56906eef 1084static void redisLog(int level, const char *fmt, ...) {
ed9b544e 1085 va_list ap;
1086 FILE *fp;
1087
1088 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
1089 if (!fp) return;
1090
1091 va_start(ap, fmt);
1092 if (level >= server.verbosity) {
6766f45e 1093 char *c = ".-*#";
1904ecc1 1094 char buf[64];
1095 time_t now;
1096
1097 now = time(NULL);
6c9385e0 1098 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
054e426d 1099 fprintf(fp,"[%d] %s %c ",(int)getpid(),buf,c[level]);
ed9b544e 1100 vfprintf(fp, fmt, ap);
1101 fprintf(fp,"\n");
1102 fflush(fp);
1103 }
1104 va_end(ap);
1105
1106 if (server.logfile) fclose(fp);
1107}
1108
1109/*====================== Hash table type implementation ==================== */
1110
1111/* This is an hash table type that uses the SDS dynamic strings libary as
1112 * keys and radis objects as values (objects can hold SDS strings,
1113 * lists, sets). */
1114
1812e024 1115static void dictVanillaFree(void *privdata, void *val)
1116{
1117 DICT_NOTUSED(privdata);
1118 zfree(val);
1119}
1120
4409877e 1121static void dictListDestructor(void *privdata, void *val)
1122{
1123 DICT_NOTUSED(privdata);
1124 listRelease((list*)val);
1125}
1126
09241813 1127static int dictSdsKeyCompare(void *privdata, const void *key1,
ed9b544e 1128 const void *key2)
1129{
1130 int l1,l2;
1131 DICT_NOTUSED(privdata);
1132
1133 l1 = sdslen((sds)key1);
1134 l2 = sdslen((sds)key2);
1135 if (l1 != l2) return 0;
1136 return memcmp(key1, key2, l1) == 0;
1137}
1138
1139static void dictRedisObjectDestructor(void *privdata, void *val)
1140{
1141 DICT_NOTUSED(privdata);
1142
a35ddf12 1143 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
ed9b544e 1144 decrRefCount(val);
1145}
1146
09241813 1147static void dictSdsDestructor(void *privdata, void *val)
1148{
1149 DICT_NOTUSED(privdata);
1150
1151 sdsfree(val);
1152}
1153
942a3961 1154static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 1155 const void *key2)
1156{
1157 const robj *o1 = key1, *o2 = key2;
09241813 1158 return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
ed9b544e 1159}
1160
942a3961 1161static unsigned int dictObjHash(const void *key) {
ed9b544e 1162 const robj *o = key;
1163 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
1164}
1165
09241813 1166static unsigned int dictSdsHash(const void *key) {
1167 return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
1168}
1169
942a3961 1170static int dictEncObjKeyCompare(void *privdata, const void *key1,
1171 const void *key2)
1172{
9d65a1bb 1173 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
1174 int cmp;
942a3961 1175
2a1198b4 1176 if (o1->encoding == REDIS_ENCODING_INT &&
dc05abde 1177 o2->encoding == REDIS_ENCODING_INT)
1178 return o1->ptr == o2->ptr;
2a1198b4 1179
9d65a1bb 1180 o1 = getDecodedObject(o1);
1181 o2 = getDecodedObject(o2);
09241813 1182 cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
9d65a1bb 1183 decrRefCount(o1);
1184 decrRefCount(o2);
1185 return cmp;
942a3961 1186}
1187
1188static unsigned int dictEncObjHash(const void *key) {
9d65a1bb 1189 robj *o = (robj*) key;
942a3961 1190
ed9e4966 1191 if (o->encoding == REDIS_ENCODING_RAW) {
1192 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
1193 } else {
1194 if (o->encoding == REDIS_ENCODING_INT) {
1195 char buf[32];
1196 int len;
1197
ee14da56 1198 len = ll2string(buf,32,(long)o->ptr);
ed9e4966 1199 return dictGenHashFunction((unsigned char*)buf, len);
1200 } else {
1201 unsigned int hash;
1202
1203 o = getDecodedObject(o);
1204 hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
1205 decrRefCount(o);
1206 return hash;
1207 }
1208 }
942a3961 1209}
1210
09241813 1211/* Sets type */
ed9b544e 1212static dictType setDictType = {
942a3961 1213 dictEncObjHash, /* hash function */
ed9b544e 1214 NULL, /* key dup */
1215 NULL, /* val dup */
942a3961 1216 dictEncObjKeyCompare, /* key compare */
ed9b544e 1217 dictRedisObjectDestructor, /* key destructor */
1218 NULL /* val destructor */
1219};
1220
f2d9f50f 1221/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
1812e024 1222static dictType zsetDictType = {
1223 dictEncObjHash, /* hash function */
1224 NULL, /* key dup */
1225 NULL, /* val dup */
1226 dictEncObjKeyCompare, /* key compare */
1227 dictRedisObjectDestructor, /* key destructor */
da0a1620 1228 dictVanillaFree /* val destructor of malloc(sizeof(double)) */
1812e024 1229};
1230
09241813 1231/* Db->dict, keys are sds strings, vals are Redis objects. */
5234952b 1232static dictType dbDictType = {
09241813 1233 dictSdsHash, /* hash function */
ed9b544e 1234 NULL, /* key dup */
1235 NULL, /* val dup */
09241813 1236 dictSdsKeyCompare, /* key compare */
1237 dictSdsDestructor, /* key destructor */
ed9b544e 1238 dictRedisObjectDestructor /* val destructor */
1239};
1240
f2d9f50f 1241/* Db->expires */
1242static dictType keyptrDictType = {
09241813 1243 dictSdsHash, /* hash function */
f2d9f50f 1244 NULL, /* key dup */
1245 NULL, /* val dup */
09241813 1246 dictSdsKeyCompare, /* key compare */
1247 dictSdsDestructor, /* key destructor */
f2d9f50f 1248 NULL /* val destructor */
1249};
1250
5234952b 1251/* Hash type hash table (note that small hashes are represented with zimpaps) */
1252static dictType hashDictType = {
1253 dictEncObjHash, /* hash function */
1254 NULL, /* key dup */
1255 NULL, /* val dup */
1256 dictEncObjKeyCompare, /* key compare */
1257 dictRedisObjectDestructor, /* key destructor */
1258 dictRedisObjectDestructor /* val destructor */
1259};
1260
4409877e 1261/* Keylist hash table type has unencoded redis objects as keys and
d5d55fc3 1262 * lists as values. It's used for blocking operations (BLPOP) and to
1263 * map swapped keys to a list of clients waiting for this keys to be loaded. */
4409877e 1264static dictType keylistDictType = {
1265 dictObjHash, /* hash function */
1266 NULL, /* key dup */
1267 NULL, /* val dup */
1268 dictObjKeyCompare, /* key compare */
1269 dictRedisObjectDestructor, /* key destructor */
1270 dictListDestructor /* val destructor */
1271};
1272
42ab0172
AO
1273static void version();
1274
ed9b544e 1275/* ========================= Random utility functions ======================= */
1276
1277/* Redis generally does not try to recover from out of memory conditions
1278 * when allocating objects or strings, it is not clear if it will be possible
1279 * to report this condition to the client since the networking layer itself
1280 * is based on heap allocation for send buffers, so we simply abort.
1281 * At least the code will be simpler to read... */
1282static void oom(const char *msg) {
71c54b21 1283 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
ed9b544e 1284 sleep(1);
1285 abort();
1286}
1287
1288/* ====================== Redis server networking stuff ===================== */
56906eef 1289static void closeTimedoutClients(void) {
ed9b544e 1290 redisClient *c;
ed9b544e 1291 listNode *ln;
1292 time_t now = time(NULL);
c7df85a4 1293 listIter li;
ed9b544e 1294
c7df85a4 1295 listRewind(server.clients,&li);
1296 while ((ln = listNext(&li)) != NULL) {
ed9b544e 1297 c = listNodeValue(ln);
f86a74e9 1298 if (server.maxidletime &&
1299 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 1300 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
ffc6b7f8 1301 dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */
1302 listLength(c->pubsub_patterns) == 0 &&
d6cc8867 1303 (now - c->lastinteraction > server.maxidletime))
f86a74e9 1304 {
f870935d 1305 redisLog(REDIS_VERBOSE,"Closing idle client");
ed9b544e 1306 freeClient(c);
f86a74e9 1307 } else if (c->flags & REDIS_BLOCKED) {
58d976b8 1308 if (c->blockingto != 0 && c->blockingto < now) {
b177fd30 1309 addReply(c,shared.nullmultibulk);
b0d8747d 1310 unblockClientWaitingData(c);
f86a74e9 1311 }
ed9b544e 1312 }
1313 }
ed9b544e 1314}
1315
12fea928 1316static int htNeedsResize(dict *dict) {
1317 long long size, used;
1318
1319 size = dictSlots(dict);
1320 used = dictSize(dict);
1321 return (size && used && size > DICT_HT_INITIAL_SIZE &&
1322 (used*100/size < REDIS_HT_MINFILL));
1323}
1324
0bc03378 1325/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1326 * we resize the hash table to save memory */
56906eef 1327static void tryResizeHashTables(void) {
0bc03378 1328 int j;
1329
1330 for (j = 0; j < server.dbnum; j++) {
5413c40d 1331 if (htNeedsResize(server.db[j].dict))
0bc03378 1332 dictResize(server.db[j].dict);
12fea928 1333 if (htNeedsResize(server.db[j].expires))
1334 dictResize(server.db[j].expires);
0bc03378 1335 }
1336}
1337
8ca3e9d1 1338/* Our hash table implementation performs rehashing incrementally while
1339 * we write/read from the hash table. Still if the server is idle, the hash
1340 * table will use two tables for a long time. So we try to use 1 millisecond
1341 * of CPU time at every serverCron() loop in order to rehash some key. */
1342static void incrementallyRehash(void) {
1343 int j;
1344
1345 for (j = 0; j < server.dbnum; j++) {
1346 if (dictIsRehashing(server.db[j].dict)) {
1347 dictRehashMilliseconds(server.db[j].dict,1);
1348 break; /* already used our millisecond for this loop... */
1349 }
1350 }
1351}
1352
9d65a1bb 1353/* A background saving child (BGSAVE) terminated its work. Handle this. */
1354void backgroundSaveDoneHandler(int statloc) {
1355 int exitcode = WEXITSTATUS(statloc);
1356 int bysignal = WIFSIGNALED(statloc);
1357
1358 if (!bysignal && exitcode == 0) {
1359 redisLog(REDIS_NOTICE,
1360 "Background saving terminated with success");
1361 server.dirty = 0;
1362 server.lastsave = time(NULL);
1363 } else if (!bysignal && exitcode != 0) {
1364 redisLog(REDIS_WARNING, "Background saving error");
1365 } else {
1366 redisLog(REDIS_WARNING,
454eea7c 1367 "Background saving terminated by signal %d", WTERMSIG(statloc));
9d65a1bb 1368 rdbRemoveTempFile(server.bgsavechildpid);
1369 }
1370 server.bgsavechildpid = -1;
1371 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1372 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1373 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
1374}
1375
1376/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1377 * Handle this. */
1378void backgroundRewriteDoneHandler(int statloc) {
1379 int exitcode = WEXITSTATUS(statloc);
1380 int bysignal = WIFSIGNALED(statloc);
1381
1382 if (!bysignal && exitcode == 0) {
1383 int fd;
1384 char tmpfile[256];
1385
1386 redisLog(REDIS_NOTICE,
1387 "Background append only file rewriting terminated with success");
1388 /* Now it's time to flush the differences accumulated by the parent */
1389 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
1390 fd = open(tmpfile,O_WRONLY|O_APPEND);
1391 if (fd == -1) {
1392 redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
1393 goto cleanup;
1394 }
1395 /* Flush our data... */
1396 if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
1397 (signed) sdslen(server.bgrewritebuf)) {
1398 redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
1399 close(fd);
1400 goto cleanup;
1401 }
b32627cd 1402 redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
9d65a1bb 1403 /* Now our work is to rename the temp file into the stable file. And
1404 * switch the file descriptor used by the server for append only. */
1405 if (rename(tmpfile,server.appendfilename) == -1) {
1406 redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
1407 close(fd);
1408 goto cleanup;
1409 }
1410 /* Mission completed... almost */
1411 redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
1412 if (server.appendfd != -1) {
1413 /* If append only is actually enabled... */
1414 close(server.appendfd);
1415 server.appendfd = fd;
d5d23dab 1416 if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(fd);
85a83172 1417 server.appendseldb = -1; /* Make sure it will issue SELECT */
9d65a1bb 1418 redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
1419 } else {
1420 /* If append only is disabled we just generate a dump in this
1421 * format. Why not? */
1422 close(fd);
1423 }
1424 } else if (!bysignal && exitcode != 0) {
1425 redisLog(REDIS_WARNING, "Background append only file rewriting error");
1426 } else {
1427 redisLog(REDIS_WARNING,
454eea7c 1428 "Background append only file rewriting terminated by signal %d",
1429 WTERMSIG(statloc));
9d65a1bb 1430 }
1431cleanup:
1432 sdsfree(server.bgrewritebuf);
1433 server.bgrewritebuf = sdsempty();
1434 aofRemoveTempFile(server.bgrewritechildpid);
1435 server.bgrewritechildpid = -1;
1436}
1437
884d4b39 1438/* This function is called once a background process of some kind terminates,
1439 * as we want to avoid resizing the hash tables when there is a child in order
1440 * to play well with copy-on-write (otherwise when a resize happens lots of
1441 * memory pages are copied). The goal of this function is to update the ability
1442 * for dict.c to resize the hash tables accordingly to the fact we have o not
1443 * running childs. */
1444static void updateDictResizePolicy(void) {
1445 if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1)
1446 dictEnableResize();
1447 else
1448 dictDisableResize();
1449}
1450
56906eef 1451static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 1452 int j, loops = server.cronloops++;
ed9b544e 1453 REDIS_NOTUSED(eventLoop);
1454 REDIS_NOTUSED(id);
1455 REDIS_NOTUSED(clientData);
1456
3a66edc7 1457 /* We take a cached value of the unix time in the global state because
1458 * with virtual memory and aging there is to store the current time
1459 * in objects at every object access, and accuracy is not needed.
1460 * To access a global var is faster than calling time(NULL) */
1461 server.unixtime = time(NULL);
560db612 1462 /* We have just 21 bits per object for LRU information.
1463 * So we use an (eventually wrapping) LRU clock with minutes resolution.
1464 *
1465 * When we need to select what object to swap, we compute the minimum
1466 * time distance between the current lruclock and the object last access
1467 * lruclock info. Even if clocks will wrap on overflow, there is
1468 * the interesting property that we are sure that at least
1469 * ABS(A-B) minutes passed between current time and timestamp B.
1470 *
1471 * This is not precise but we don't need at all precision, but just
1472 * something statistically reasonable.
1473 */
1474 server.lruclock = (time(NULL)/60)&((1<<21)-1);
3a66edc7 1475
fab43727 1476 /* We received a SIGTERM, shutting down here in a safe way, as it is
1477 * not ok doing so inside the signal handler. */
1478 if (server.shutdown_asap) {
1479 if (prepareForShutdown() == REDIS_OK) exit(0);
1480 redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1481 }
1482
0bc03378 1483 /* Show some info about non-empty databases */
ed9b544e 1484 for (j = 0; j < server.dbnum; j++) {
dec423d9 1485 long long size, used, vkeys;
94754ccc 1486
3305306f 1487 size = dictSlots(server.db[j].dict);
1488 used = dictSize(server.db[j].dict);
94754ccc 1489 vkeys = dictSize(server.db[j].expires);
1763929f 1490 if (!(loops % 50) && (used || vkeys)) {
f870935d 1491 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 1492 /* dictPrintStats(server.dict); */
ed9b544e 1493 }
ed9b544e 1494 }
1495
0bc03378 1496 /* We don't want to resize the hash tables while a bacground saving
1497 * is in progress: the saving child is created using fork() that is
1498 * implemented with a copy-on-write semantic in most modern systems, so
1499 * if we resize the HT while there is the saving child at work actually
1500 * a lot of memory movements in the parent will cause a lot of pages
1501 * copied. */
8ca3e9d1 1502 if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1) {
1503 if (!(loops % 10)) tryResizeHashTables();
1504 if (server.activerehashing) incrementallyRehash();
884d4b39 1505 }
0bc03378 1506
ed9b544e 1507 /* Show information about connected clients */
1763929f 1508 if (!(loops % 50)) {
bdcb92f2 1509 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use",
ed9b544e 1510 listLength(server.clients)-listLength(server.slaves),
1511 listLength(server.slaves),
bdcb92f2 1512 zmalloc_used_memory());
ed9b544e 1513 }
1514
1515 /* Close connections of timedout clients */
1763929f 1516 if ((server.maxidletime && !(loops % 100)) || server.blpop_blocked_clients)
ed9b544e 1517 closeTimedoutClients();
1518
9d65a1bb 1519 /* Check if a background saving or AOF rewrite in progress terminated */
1520 if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
ed9b544e 1521 int statloc;
9d65a1bb 1522 pid_t pid;
1523
1524 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
1525 if (pid == server.bgsavechildpid) {
1526 backgroundSaveDoneHandler(statloc);
ed9b544e 1527 } else {
9d65a1bb 1528 backgroundRewriteDoneHandler(statloc);
ed9b544e 1529 }
884d4b39 1530 updateDictResizePolicy();
ed9b544e 1531 }
1532 } else {
1533 /* If there is not a background saving in progress check if
1534 * we have to save now */
1535 time_t now = time(NULL);
1536 for (j = 0; j < server.saveparamslen; j++) {
1537 struct saveparam *sp = server.saveparams+j;
1538
1539 if (server.dirty >= sp->changes &&
1540 now-server.lastsave > sp->seconds) {
1541 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
1542 sp->changes, sp->seconds);
f78fd11b 1543 rdbSaveBackground(server.dbfilename);
ed9b544e 1544 break;
1545 }
1546 }
1547 }
94754ccc 1548
f2324293 1549 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1550 * will use few CPU cycles if there are few expiring keys, otherwise
1551 * it will get more aggressive to avoid that too much memory is used by
1552 * keys that can be removed from the keyspace. */
94754ccc 1553 for (j = 0; j < server.dbnum; j++) {
f2324293 1554 int expired;
94754ccc 1555 redisDb *db = server.db+j;
94754ccc 1556
f2324293 1557 /* Continue to expire if at the end of the cycle more than 25%
1558 * of the keys were expired. */
1559 do {
4ef8de8a 1560 long num = dictSize(db->expires);
94754ccc 1561 time_t now = time(NULL);
1562
f2324293 1563 expired = 0;
94754ccc 1564 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
1565 num = REDIS_EXPIRELOOKUPS_PER_CRON;
1566 while (num--) {
1567 dictEntry *de;
1568 time_t t;
1569
1570 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
1571 t = (time_t) dictGetEntryVal(de);
1572 if (now > t) {
09241813 1573 sds key = dictGetEntryKey(de);
1574 robj *keyobj = createStringObject(key,sdslen(key));
1575
1576 dbDelete(db,keyobj);
1577 decrRefCount(keyobj);
f2324293 1578 expired++;
2a6a2ed1 1579 server.stat_expiredkeys++;
94754ccc 1580 }
1581 }
f2324293 1582 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
94754ccc 1583 }
1584
4ef8de8a 1585 /* Swap a few keys on disk if we are over the memory limit and VM
f870935d 1586 * is enbled. Try to free objects from the free list first. */
7e69548d 1587 if (vmCanSwapOut()) {
1588 while (server.vm_enabled && zmalloc_used_memory() >
f870935d 1589 server.vm_max_memory)
1590 {
72e9fd40 1591 int retval;
1592
a5819310 1593 if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
72e9fd40 1594 retval = (server.vm_max_threads == 0) ?
1595 vmSwapOneObjectBlocking() :
1596 vmSwapOneObjectThreaded();
1763929f 1597 if (retval == REDIS_ERR && !(loops % 300) &&
72e9fd40 1598 zmalloc_used_memory() >
1599 (server.vm_max_memory+server.vm_max_memory/10))
1600 {
1601 redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
7e69548d 1602 }
72e9fd40 1603 /* Note that when using threade I/O we free just one object,
1604 * because anyway when the I/O thread in charge to swap this
1605 * object out will finish, the handler of completed jobs
1606 * will try to swap more objects if we are still out of memory. */
1607 if (retval == REDIS_ERR || server.vm_max_threads > 0) break;
4ef8de8a 1608 }
1609 }
1610
ed9b544e 1611 /* Check if we should connect to a MASTER */
1763929f 1612 if (server.replstate == REDIS_REPL_CONNECT && !(loops % 10)) {
ed9b544e 1613 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
1614 if (syncWithMaster() == REDIS_OK) {
1615 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
8f63ddca 1616 if (server.appendonly) rewriteAppendOnlyFileBackground();
ed9b544e 1617 }
1618 }
1763929f 1619 return 100;
ed9b544e 1620}
1621
d5d55fc3 1622/* This function gets called every time Redis is entering the
1623 * main loop of the event driven library, that is, before to sleep
1624 * for ready file descriptors. */
1625static void beforeSleep(struct aeEventLoop *eventLoop) {
1626 REDIS_NOTUSED(eventLoop);
1627
28ed1f33 1628 /* Awake clients that got all the swapped keys they requested */
d5d55fc3 1629 if (server.vm_enabled && listLength(server.io_ready_clients)) {
1630 listIter li;
1631 listNode *ln;
1632
1633 listRewind(server.io_ready_clients,&li);
1634 while((ln = listNext(&li))) {
1635 redisClient *c = ln->value;
1636 struct redisCommand *cmd;
1637
1638 /* Resume the client. */
1639 listDelNode(server.io_ready_clients,ln);
1640 c->flags &= (~REDIS_IO_WAIT);
1641 server.vm_blocked_clients--;
1642 aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1643 readQueryFromClient, c);
1644 cmd = lookupCommand(c->argv[0]->ptr);
1645 assert(cmd != NULL);
1646 call(c,cmd);
1647 resetClient(c);
1648 /* There may be more data to process in the input buffer. */
1649 if (c->querybuf && sdslen(c->querybuf) > 0)
1650 processInputBuffer(c);
1651 }
1652 }
28ed1f33 1653 /* Write the AOF buffer on disk */
1654 flushAppendOnlyFile();
d5d55fc3 1655}
1656
ed9b544e 1657static void createSharedObjects(void) {
05df7621 1658 int j;
1659
ed9b544e 1660 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1661 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1662 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 1663 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1664 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1665 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1666 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1667 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1668 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 1669 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
6e469882 1670 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
ed9b544e 1671 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1672 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 1673 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1674 "-ERR no such key\r\n"));
ed9b544e 1675 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1676 "-ERR syntax error\r\n"));
c937aa89 1677 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1678 "-ERR source and destination objects are the same\r\n"));
1679 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1680 "-ERR index out of range\r\n"));
ed9b544e 1681 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1682 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1683 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1684 shared.select0 = createStringObject("select 0\r\n",10);
1685 shared.select1 = createStringObject("select 1\r\n",10);
1686 shared.select2 = createStringObject("select 2\r\n",10);
1687 shared.select3 = createStringObject("select 3\r\n",10);
1688 shared.select4 = createStringObject("select 4\r\n",10);
1689 shared.select5 = createStringObject("select 5\r\n",10);
1690 shared.select6 = createStringObject("select 6\r\n",10);
1691 shared.select7 = createStringObject("select 7\r\n",10);
1692 shared.select8 = createStringObject("select 8\r\n",10);
1693 shared.select9 = createStringObject("select 9\r\n",10);
befec3cd 1694 shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
c8d0ea0e 1695 shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
befec3cd 1696 shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
fc46bb71 1697 shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
ffc6b7f8 1698 shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1699 shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
befec3cd 1700 shared.mbulk3 = createStringObject("*3\r\n",4);
c8d0ea0e 1701 shared.mbulk4 = createStringObject("*4\r\n",4);
05df7621 1702 for (j = 0; j < REDIS_SHARED_INTEGERS; j++) {
1703 shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j);
1704 shared.integers[j]->encoding = REDIS_ENCODING_INT;
1705 }
ed9b544e 1706}
1707
1708static void appendServerSaveParams(time_t seconds, int changes) {
1709 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1710 server.saveparams[server.saveparamslen].seconds = seconds;
1711 server.saveparams[server.saveparamslen].changes = changes;
1712 server.saveparamslen++;
1713}
1714
bcfc686d 1715static void resetServerSaveParams() {
ed9b544e 1716 zfree(server.saveparams);
1717 server.saveparams = NULL;
1718 server.saveparamslen = 0;
1719}
1720
1721static void initServerConfig() {
1722 server.dbnum = REDIS_DEFAULT_DBNUM;
1723 server.port = REDIS_SERVERPORT;
f870935d 1724 server.verbosity = REDIS_VERBOSE;
ed9b544e 1725 server.maxidletime = REDIS_MAXIDLETIME;
1726 server.saveparams = NULL;
1727 server.logfile = NULL; /* NULL = log on standard output */
1728 server.bindaddr = NULL;
1729 server.glueoutputbuf = 1;
1730 server.daemonize = 0;
44b38ef4 1731 server.appendonly = 0;
1b677732 1732 server.appendfsync = APPENDFSYNC_EVERYSEC;
38db9171 1733 server.no_appendfsync_on_rewrite = 0;
48f0308a 1734 server.lastfsync = time(NULL);
44b38ef4 1735 server.appendfd = -1;
1736 server.appendseldb = -1; /* Make sure the first time will not match */
500ece7c 1737 server.pidfile = zstrdup("/var/run/redis.pid");
1738 server.dbfilename = zstrdup("dump.rdb");
1739 server.appendfilename = zstrdup("appendonly.aof");
abcb223e 1740 server.requirepass = NULL;
b0553789 1741 server.rdbcompression = 1;
8ca3e9d1 1742 server.activerehashing = 1;
285add55 1743 server.maxclients = 0;
d5d55fc3 1744 server.blpop_blocked_clients = 0;
3fd78bcd 1745 server.maxmemory = 0;
75680a3c 1746 server.vm_enabled = 0;
054e426d 1747 server.vm_swap_file = zstrdup("/tmp/redis-%p.vm");
75680a3c 1748 server.vm_page_size = 256; /* 256 bytes per page */
1749 server.vm_pages = 1024*1024*100; /* 104 millions of pages */
1750 server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
92f8e882 1751 server.vm_max_threads = 4;
d5d55fc3 1752 server.vm_blocked_clients = 0;
cbba7dd7 1753 server.hash_max_zipmap_entries = REDIS_HASH_MAX_ZIPMAP_ENTRIES;
1754 server.hash_max_zipmap_value = REDIS_HASH_MAX_ZIPMAP_VALUE;
fab43727 1755 server.shutdown_asap = 0;
75680a3c 1756
bcfc686d 1757 resetServerSaveParams();
ed9b544e 1758
1759 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1760 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1761 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1762 /* Replication related */
1763 server.isslave = 0;
d0ccebcf 1764 server.masterauth = NULL;
ed9b544e 1765 server.masterhost = NULL;
1766 server.masterport = 6379;
1767 server.master = NULL;
1768 server.replstate = REDIS_REPL_NONE;
a7866db6 1769
1770 /* Double constants initialization */
1771 R_Zero = 0.0;
1772 R_PosInf = 1.0/R_Zero;
1773 R_NegInf = -1.0/R_Zero;
1774 R_Nan = R_Zero/R_Zero;
ed9b544e 1775}
1776
1777static void initServer() {
1778 int j;
1779
1780 signal(SIGHUP, SIG_IGN);
1781 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1782 setupSigSegvAction();
ed9b544e 1783
b9bc0eef 1784 server.devnull = fopen("/dev/null","w");
1785 if (server.devnull == NULL) {
1786 redisLog(REDIS_WARNING, "Can't open /dev/null: %s", server.neterr);
1787 exit(1);
1788 }
ed9b544e 1789 server.clients = listCreate();
1790 server.slaves = listCreate();
87eca727 1791 server.monitors = listCreate();
ed9b544e 1792 server.objfreelist = listCreate();
1793 createSharedObjects();
1794 server.el = aeCreateEventLoop();
3305306f 1795 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
ed9b544e 1796 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1797 if (server.fd == -1) {
1798 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1799 exit(1);
1800 }
3305306f 1801 for (j = 0; j < server.dbnum; j++) {
5234952b 1802 server.db[j].dict = dictCreate(&dbDictType,NULL);
f2d9f50f 1803 server.db[j].expires = dictCreate(&keyptrDictType,NULL);
37ab76c9 1804 server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1805 server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
d5d55fc3 1806 if (server.vm_enabled)
1807 server.db[j].io_keys = dictCreate(&keylistDictType,NULL);
3305306f 1808 server.db[j].id = j;
1809 }
ffc6b7f8 1810 server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1811 server.pubsub_patterns = listCreate();
1812 listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1813 listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
ed9b544e 1814 server.cronloops = 0;
9f3c422c 1815 server.bgsavechildpid = -1;
9d65a1bb 1816 server.bgrewritechildpid = -1;
1817 server.bgrewritebuf = sdsempty();
28ed1f33 1818 server.aofbuf = sdsempty();
ed9b544e 1819 server.lastsave = time(NULL);
1820 server.dirty = 0;
ed9b544e 1821 server.stat_numcommands = 0;
1822 server.stat_numconnections = 0;
2a6a2ed1 1823 server.stat_expiredkeys = 0;
ed9b544e 1824 server.stat_starttime = time(NULL);
3a66edc7 1825 server.unixtime = time(NULL);
d8f8b666 1826 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
996cb5f7 1827 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
1828 acceptHandler, NULL) == AE_ERR) oom("creating file event");
44b38ef4 1829
1830 if (server.appendonly) {
3bb225d6 1831 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
44b38ef4 1832 if (server.appendfd == -1) {
1833 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1834 strerror(errno));
1835 exit(1);
1836 }
1837 }
75680a3c 1838
1839 if (server.vm_enabled) vmInit();
ed9b544e 1840}
1841
1842/* Empty the whole database */
ca37e9cd 1843static long long emptyDb() {
ed9b544e 1844 int j;
ca37e9cd 1845 long long removed = 0;
ed9b544e 1846
3305306f 1847 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1848 removed += dictSize(server.db[j].dict);
3305306f 1849 dictEmpty(server.db[j].dict);
1850 dictEmpty(server.db[j].expires);
1851 }
ca37e9cd 1852 return removed;
ed9b544e 1853}
1854
85dd2f3a 1855static int yesnotoi(char *s) {
1856 if (!strcasecmp(s,"yes")) return 1;
1857 else if (!strcasecmp(s,"no")) return 0;
1858 else return -1;
1859}
1860
ed9b544e 1861/* I agree, this is a very rudimental way to load a configuration...
1862 will improve later if the config gets more complex */
1863static void loadServerConfig(char *filename) {
c9a111ac 1864 FILE *fp;
ed9b544e 1865 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1866 int linenum = 0;
1867 sds line = NULL;
c9a111ac 1868
1869 if (filename[0] == '-' && filename[1] == '\0')
1870 fp = stdin;
1871 else {
1872 if ((fp = fopen(filename,"r")) == NULL) {
9a22de82 1873 redisLog(REDIS_WARNING, "Fatal error, can't open config file '%s'", filename);
c9a111ac 1874 exit(1);
1875 }
ed9b544e 1876 }
c9a111ac 1877
ed9b544e 1878 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1879 sds *argv;
1880 int argc, j;
1881
1882 linenum++;
1883 line = sdsnew(buf);
1884 line = sdstrim(line," \t\r\n");
1885
1886 /* Skip comments and blank lines*/
1887 if (line[0] == '#' || line[0] == '\0') {
1888 sdsfree(line);
1889 continue;
1890 }
1891
1892 /* Split into arguments */
1893 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1894 sdstolower(argv[0]);
1895
1896 /* Execute config directives */
bb0b03a3 1897 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1898 server.maxidletime = atoi(argv[1]);
0150db36 1899 if (server.maxidletime < 0) {
ed9b544e 1900 err = "Invalid timeout value"; goto loaderr;
1901 }
bb0b03a3 1902 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1903 server.port = atoi(argv[1]);
1904 if (server.port < 1 || server.port > 65535) {
1905 err = "Invalid port"; goto loaderr;
1906 }
bb0b03a3 1907 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1908 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1909 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1910 int seconds = atoi(argv[1]);
1911 int changes = atoi(argv[2]);
1912 if (seconds < 1 || changes < 0) {
1913 err = "Invalid save parameters"; goto loaderr;
1914 }
1915 appendServerSaveParams(seconds,changes);
bb0b03a3 1916 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1917 if (chdir(argv[1]) == -1) {
1918 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1919 argv[1], strerror(errno));
1920 exit(1);
1921 }
bb0b03a3 1922 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1923 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
f870935d 1924 else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
bb0b03a3 1925 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1926 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1927 else {
1928 err = "Invalid log level. Must be one of debug, notice, warning";
1929 goto loaderr;
1930 }
bb0b03a3 1931 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1932 FILE *logfp;
ed9b544e 1933
1934 server.logfile = zstrdup(argv[1]);
bb0b03a3 1935 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1936 zfree(server.logfile);
1937 server.logfile = NULL;
1938 }
1939 if (server.logfile) {
1940 /* Test if we are able to open the file. The server will not
1941 * be able to abort just for this problem later... */
c9a111ac 1942 logfp = fopen(server.logfile,"a");
1943 if (logfp == NULL) {
ed9b544e 1944 err = sdscatprintf(sdsempty(),
1945 "Can't open the log file: %s", strerror(errno));
1946 goto loaderr;
1947 }
c9a111ac 1948 fclose(logfp);
ed9b544e 1949 }
bb0b03a3 1950 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1951 server.dbnum = atoi(argv[1]);
1952 if (server.dbnum < 1) {
1953 err = "Invalid number of databases"; goto loaderr;
1954 }
b3f83f12
JZ
1955 } else if (!strcasecmp(argv[0],"include") && argc == 2) {
1956 loadServerConfig(argv[1]);
285add55 1957 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1958 server.maxclients = atoi(argv[1]);
3fd78bcd 1959 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
2b619329 1960 server.maxmemory = memtoll(argv[1],NULL);
bb0b03a3 1961 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1962 server.masterhost = sdsnew(argv[1]);
1963 server.masterport = atoi(argv[2]);
1964 server.replstate = REDIS_REPL_CONNECT;
d0ccebcf 1965 } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
1966 server.masterauth = zstrdup(argv[1]);
bb0b03a3 1967 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1968 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1969 err = "argument must be 'yes' or 'no'"; goto loaderr;
1970 }
121f70cf 1971 } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
1972 if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
8ca3e9d1 1973 err = "argument must be 'yes' or 'no'"; goto loaderr;
1974 }
1975 } else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) {
1976 if ((server.activerehashing = yesnotoi(argv[1])) == -1) {
121f70cf 1977 err = "argument must be 'yes' or 'no'"; goto loaderr;
1978 }
bb0b03a3 1979 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1980 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1981 err = "argument must be 'yes' or 'no'"; goto loaderr;
1982 }
44b38ef4 1983 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1984 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1985 err = "argument must be 'yes' or 'no'"; goto loaderr;
1986 }
f3b52411
PN
1987 } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) {
1988 zfree(server.appendfilename);
1989 server.appendfilename = zstrdup(argv[1]);
38db9171 1990 } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite")
1991 && argc == 2) {
1992 if ((server.no_appendfsync_on_rewrite= yesnotoi(argv[1])) == -1) {
1993 err = "argument must be 'yes' or 'no'"; goto loaderr;
1994 }
48f0308a 1995 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1766c6da 1996 if (!strcasecmp(argv[1],"no")) {
48f0308a 1997 server.appendfsync = APPENDFSYNC_NO;
1766c6da 1998 } else if (!strcasecmp(argv[1],"always")) {
48f0308a 1999 server.appendfsync = APPENDFSYNC_ALWAYS;
1766c6da 2000 } else if (!strcasecmp(argv[1],"everysec")) {
48f0308a 2001 server.appendfsync = APPENDFSYNC_EVERYSEC;
2002 } else {
2003 err = "argument must be 'no', 'always' or 'everysec'";
2004 goto loaderr;
2005 }
bb0b03a3 2006 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
054e426d 2007 server.requirepass = zstrdup(argv[1]);
bb0b03a3 2008 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
500ece7c 2009 zfree(server.pidfile);
054e426d 2010 server.pidfile = zstrdup(argv[1]);
bb0b03a3 2011 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
500ece7c 2012 zfree(server.dbfilename);
054e426d 2013 server.dbfilename = zstrdup(argv[1]);
75680a3c 2014 } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
2015 if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
2016 err = "argument must be 'yes' or 'no'"; goto loaderr;
2017 }
054e426d 2018 } else if (!strcasecmp(argv[0],"vm-swap-file") && argc == 2) {
fefed597 2019 zfree(server.vm_swap_file);
054e426d 2020 server.vm_swap_file = zstrdup(argv[1]);
4ef8de8a 2021 } else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
2b619329 2022 server.vm_max_memory = memtoll(argv[1],NULL);
4ef8de8a 2023 } else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
2b619329 2024 server.vm_page_size = memtoll(argv[1], NULL);
4ef8de8a 2025 } else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
2b619329 2026 server.vm_pages = memtoll(argv[1], NULL);
92f8e882 2027 } else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
2028 server.vm_max_threads = strtoll(argv[1], NULL, 10);
cbba7dd7 2029 } else if (!strcasecmp(argv[0],"hash-max-zipmap-entries") && argc == 2){
2b619329 2030 server.hash_max_zipmap_entries = memtoll(argv[1], NULL);
cbba7dd7 2031 } else if (!strcasecmp(argv[0],"hash-max-zipmap-value") && argc == 2){
2b619329 2032 server.hash_max_zipmap_value = memtoll(argv[1], NULL);
ed9b544e 2033 } else {
2034 err = "Bad directive or wrong number of arguments"; goto loaderr;
2035 }
2036 for (j = 0; j < argc; j++)
2037 sdsfree(argv[j]);
2038 zfree(argv);
2039 sdsfree(line);
2040 }
c9a111ac 2041 if (fp != stdin) fclose(fp);
ed9b544e 2042 return;
2043
2044loaderr:
2045 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
2046 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
2047 fprintf(stderr, ">>> '%s'\n", line);
2048 fprintf(stderr, "%s\n", err);
2049 exit(1);
2050}
2051
2052static void freeClientArgv(redisClient *c) {
2053 int j;
2054
2055 for (j = 0; j < c->argc; j++)
2056 decrRefCount(c->argv[j]);
e8a74421 2057 for (j = 0; j < c->mbargc; j++)
2058 decrRefCount(c->mbargv[j]);
ed9b544e 2059 c->argc = 0;
e8a74421 2060 c->mbargc = 0;
ed9b544e 2061}
2062
2063static void freeClient(redisClient *c) {
2064 listNode *ln;
2065
4409877e 2066 /* Note that if the client we are freeing is blocked into a blocking
b0d8747d 2067 * call, we have to set querybuf to NULL *before* to call
2068 * unblockClientWaitingData() to avoid processInputBuffer() will get
2069 * called. Also it is important to remove the file events after
2070 * this, because this call adds the READABLE event. */
4409877e 2071 sdsfree(c->querybuf);
2072 c->querybuf = NULL;
2073 if (c->flags & REDIS_BLOCKED)
b0d8747d 2074 unblockClientWaitingData(c);
4409877e 2075
37ab76c9 2076 /* UNWATCH all the keys */
2077 unwatchAllKeys(c);
2078 listRelease(c->watched_keys);
ffc6b7f8 2079 /* Unsubscribe from all the pubsub channels */
2080 pubsubUnsubscribeAllChannels(c,0);
2081 pubsubUnsubscribeAllPatterns(c,0);
2082 dictRelease(c->pubsub_channels);
2083 listRelease(c->pubsub_patterns);
befec3cd 2084 /* Obvious cleanup */
ed9b544e 2085 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
2086 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
ed9b544e 2087 listRelease(c->reply);
2088 freeClientArgv(c);
2089 close(c->fd);
92f8e882 2090 /* Remove from the list of clients */
ed9b544e 2091 ln = listSearchKey(server.clients,c);
dfc5e96c 2092 redisAssert(ln != NULL);
ed9b544e 2093 listDelNode(server.clients,ln);
37ab76c9 2094 /* Remove from the list of clients that are now ready to be restarted
2095 * after waiting for swapped keys */
d5d55fc3 2096 if (c->flags & REDIS_IO_WAIT && listLength(c->io_keys) == 0) {
2097 ln = listSearchKey(server.io_ready_clients,c);
2098 if (ln) {
2099 listDelNode(server.io_ready_clients,ln);
2100 server.vm_blocked_clients--;
2101 }
2102 }
37ab76c9 2103 /* Remove from the list of clients waiting for swapped keys */
d5d55fc3 2104 while (server.vm_enabled && listLength(c->io_keys)) {
2105 ln = listFirst(c->io_keys);
2106 dontWaitForSwappedKey(c,ln->value);
92f8e882 2107 }
b3e3d0d7 2108 listRelease(c->io_keys);
befec3cd 2109 /* Master/slave cleanup */
ed9b544e 2110 if (c->flags & REDIS_SLAVE) {
6208b3a7 2111 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
2112 close(c->repldbfd);
87eca727 2113 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
2114 ln = listSearchKey(l,c);
dfc5e96c 2115 redisAssert(ln != NULL);
87eca727 2116 listDelNode(l,ln);
ed9b544e 2117 }
2118 if (c->flags & REDIS_MASTER) {
2119 server.master = NULL;
2120 server.replstate = REDIS_REPL_CONNECT;
2121 }
befec3cd 2122 /* Release memory */
93ea3759 2123 zfree(c->argv);
e8a74421 2124 zfree(c->mbargv);
6e469882 2125 freeClientMultiState(c);
ed9b544e 2126 zfree(c);
2127}
2128
cc30e368 2129#define GLUEREPLY_UP_TO (1024)
ed9b544e 2130static void glueReplyBuffersIfNeeded(redisClient *c) {
c28b42ac 2131 int copylen = 0;
2132 char buf[GLUEREPLY_UP_TO];
6208b3a7 2133 listNode *ln;
c7df85a4 2134 listIter li;
ed9b544e 2135 robj *o;
2136
c7df85a4 2137 listRewind(c->reply,&li);
2138 while((ln = listNext(&li))) {
c28b42ac 2139 int objlen;
2140
ed9b544e 2141 o = ln->value;
c28b42ac 2142 objlen = sdslen(o->ptr);
2143 if (copylen + objlen <= GLUEREPLY_UP_TO) {
2144 memcpy(buf+copylen,o->ptr,objlen);
2145 copylen += objlen;
ed9b544e 2146 listDelNode(c->reply,ln);
c28b42ac 2147 } else {
2148 if (copylen == 0) return;
2149 break;
ed9b544e 2150 }
ed9b544e 2151 }
c28b42ac 2152 /* Now the output buffer is empty, add the new single element */
2153 o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
2154 listAddNodeHead(c->reply,o);
ed9b544e 2155}
2156
2157static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
2158 redisClient *c = privdata;
2159 int nwritten = 0, totwritten = 0, objlen;
2160 robj *o;
2161 REDIS_NOTUSED(el);
2162 REDIS_NOTUSED(mask);
2163
2895e862 2164 /* Use writev() if we have enough buffers to send */
7ea870c0 2165 if (!server.glueoutputbuf &&
e0a62c7f 2166 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
7ea870c0 2167 !(c->flags & REDIS_MASTER))
2895e862 2168 {
2169 sendReplyToClientWritev(el, fd, privdata, mask);
2170 return;
2171 }
2895e862 2172
ed9b544e 2173 while(listLength(c->reply)) {
c28b42ac 2174 if (server.glueoutputbuf && listLength(c->reply) > 1)
2175 glueReplyBuffersIfNeeded(c);
2176
ed9b544e 2177 o = listNodeValue(listFirst(c->reply));
2178 objlen = sdslen(o->ptr);
2179
2180 if (objlen == 0) {
2181 listDelNode(c->reply,listFirst(c->reply));
2182 continue;
2183 }
2184
2185 if (c->flags & REDIS_MASTER) {
6f376729 2186 /* Don't reply to a master */
ed9b544e 2187 nwritten = objlen - c->sentlen;
2188 } else {
a4d1ba9a 2189 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 2190 if (nwritten <= 0) break;
2191 }
2192 c->sentlen += nwritten;
2193 totwritten += nwritten;
2194 /* If we fully sent the object on head go to the next one */
2195 if (c->sentlen == objlen) {
2196 listDelNode(c->reply,listFirst(c->reply));
2197 c->sentlen = 0;
2198 }
6f376729 2199 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
12f9d551 2200 * bytes, in a single threaded server it's a good idea to serve
6f376729 2201 * other clients as well, even if a very large request comes from
2202 * super fast link that is always able to accept data (in real world
12f9d551 2203 * scenario think about 'KEYS *' against the loopback interfae) */
6f376729 2204 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 2205 }
2206 if (nwritten == -1) {
2207 if (errno == EAGAIN) {
2208 nwritten = 0;
2209 } else {
f870935d 2210 redisLog(REDIS_VERBOSE,
ed9b544e 2211 "Error writing to client: %s", strerror(errno));
2212 freeClient(c);
2213 return;
2214 }
2215 }
2216 if (totwritten > 0) c->lastinteraction = time(NULL);
2217 if (listLength(c->reply) == 0) {
2218 c->sentlen = 0;
2219 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
2220 }
2221}
2222
2895e862 2223static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
2224{
2225 redisClient *c = privdata;
2226 int nwritten = 0, totwritten = 0, objlen, willwrite;
2227 robj *o;
2228 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
2229 int offset, ion = 0;
2230 REDIS_NOTUSED(el);
2231 REDIS_NOTUSED(mask);
2232
2233 listNode *node;
2234 while (listLength(c->reply)) {
2235 offset = c->sentlen;
2236 ion = 0;
2237 willwrite = 0;
2238
2239 /* fill-in the iov[] array */
2240 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
2241 o = listNodeValue(node);
2242 objlen = sdslen(o->ptr);
2243
e0a62c7f 2244 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
2895e862 2245 break;
2246
2247 if(ion == REDIS_WRITEV_IOVEC_COUNT)
2248 break; /* no more iovecs */
2249
2250 iov[ion].iov_base = ((char*)o->ptr) + offset;
2251 iov[ion].iov_len = objlen - offset;
2252 willwrite += objlen - offset;
2253 offset = 0; /* just for the first item */
2254 ion++;
2255 }
2256
2257 if(willwrite == 0)
2258 break;
2259
2260 /* write all collected blocks at once */
2261 if((nwritten = writev(fd, iov, ion)) < 0) {
2262 if (errno != EAGAIN) {
f870935d 2263 redisLog(REDIS_VERBOSE,
2895e862 2264 "Error writing to client: %s", strerror(errno));
2265 freeClient(c);
2266 return;
2267 }
2268 break;
2269 }
2270
2271 totwritten += nwritten;
2272 offset = c->sentlen;
2273
2274 /* remove written robjs from c->reply */
2275 while (nwritten && listLength(c->reply)) {
2276 o = listNodeValue(listFirst(c->reply));
2277 objlen = sdslen(o->ptr);
2278
2279 if(nwritten >= objlen - offset) {
2280 listDelNode(c->reply, listFirst(c->reply));
2281 nwritten -= objlen - offset;
2282 c->sentlen = 0;
2283 } else {
2284 /* partial write */
2285 c->sentlen += nwritten;
2286 break;
2287 }
2288 offset = 0;
2289 }
2290 }
2291
e0a62c7f 2292 if (totwritten > 0)
2895e862 2293 c->lastinteraction = time(NULL);
2294
2295 if (listLength(c->reply) == 0) {
2296 c->sentlen = 0;
2297 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
2298 }
2299}
2300
1a132bbc
PN
2301static int qsortRedisCommands(const void *r1, const void *r2) {
2302 return strcasecmp(
2303 ((struct redisCommand*)r1)->name,
2304 ((struct redisCommand*)r2)->name);
2305}
2306
2307static void sortCommandTable() {
1a132bbc
PN
2308 /* Copy and sort the read-only version of the command table */
2309 commandTable = (struct redisCommand*)malloc(sizeof(readonlyCommandTable));
2310 memcpy(commandTable,readonlyCommandTable,sizeof(readonlyCommandTable));
d55d5c5d 2311 qsort(commandTable,
2312 sizeof(readonlyCommandTable)/sizeof(struct redisCommand),
2313 sizeof(struct redisCommand),qsortRedisCommands);
1a132bbc
PN
2314}
2315
ed9b544e 2316static struct redisCommand *lookupCommand(char *name) {
1a132bbc
PN
2317 struct redisCommand tmp = {name,NULL,0,0,NULL,0,0,0};
2318 return bsearch(
2319 &tmp,
2320 commandTable,
d55d5c5d 2321 sizeof(readonlyCommandTable)/sizeof(struct redisCommand),
1a132bbc
PN
2322 sizeof(struct redisCommand),
2323 qsortRedisCommands);
ed9b544e 2324}
2325
2326/* resetClient prepare the client to process the next command */
2327static void resetClient(redisClient *c) {
2328 freeClientArgv(c);
2329 c->bulklen = -1;
e8a74421 2330 c->multibulk = 0;
ed9b544e 2331}
2332
6e469882 2333/* Call() is the core of Redis execution of a command */
2334static void call(redisClient *c, struct redisCommand *cmd) {
2335 long long dirty;
2336
2337 dirty = server.dirty;
2338 cmd->proc(c);
4005fef1 2339 dirty = server.dirty-dirty;
2340
2341 if (server.appendonly && dirty)
6e469882 2342 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
4005fef1 2343 if ((dirty || cmd->flags & REDIS_CMD_FORCE_REPLICATION) &&
2344 listLength(server.slaves))
248ea310 2345 replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc);
6e469882 2346 if (listLength(server.monitors))
dd142b9c 2347 replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc);
6e469882 2348 server.stat_numcommands++;
2349}
2350
ed9b544e 2351/* If this function gets called we already read a whole
2352 * command, argments are in the client argv/argc fields.
2353 * processCommand() execute the command or prepare the
2354 * server for a bulk read from the client.
2355 *
2356 * If 1 is returned the client is still alive and valid and
2357 * and other operations can be performed by the caller. Otherwise
2358 * if 0 is returned the client was destroied (i.e. after QUIT). */
2359static int processCommand(redisClient *c) {
2360 struct redisCommand *cmd;
ed9b544e 2361
3fd78bcd 2362 /* Free some memory if needed (maxmemory setting) */
2363 if (server.maxmemory) freeMemoryIfNeeded();
2364
e8a74421 2365 /* Handle the multi bulk command type. This is an alternative protocol
2366 * supported by Redis in order to receive commands that are composed of
2367 * multiple binary-safe "bulk" arguments. The latency of processing is
2368 * a bit higher but this allows things like multi-sets, so if this
2369 * protocol is used only for MSET and similar commands this is a big win. */
2370 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
2371 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
2372 if (c->multibulk <= 0) {
2373 resetClient(c);
2374 return 1;
2375 } else {
2376 decrRefCount(c->argv[c->argc-1]);
2377 c->argc--;
2378 return 1;
2379 }
2380 } else if (c->multibulk) {
2381 if (c->bulklen == -1) {
2382 if (((char*)c->argv[0]->ptr)[0] != '$') {
2383 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
2384 resetClient(c);
2385 return 1;
2386 } else {
2387 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
2388 decrRefCount(c->argv[0]);
2389 if (bulklen < 0 || bulklen > 1024*1024*1024) {
2390 c->argc--;
2391 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
2392 resetClient(c);
2393 return 1;
2394 }
2395 c->argc--;
2396 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
2397 return 1;
2398 }
2399 } else {
2400 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
2401 c->mbargv[c->mbargc] = c->argv[0];
2402 c->mbargc++;
2403 c->argc--;
2404 c->multibulk--;
2405 if (c->multibulk == 0) {
2406 robj **auxargv;
2407 int auxargc;
2408
2409 /* Here we need to swap the multi-bulk argc/argv with the
2410 * normal argc/argv of the client structure. */
2411 auxargv = c->argv;
2412 c->argv = c->mbargv;
2413 c->mbargv = auxargv;
2414
2415 auxargc = c->argc;
2416 c->argc = c->mbargc;
2417 c->mbargc = auxargc;
2418
2419 /* We need to set bulklen to something different than -1
2420 * in order for the code below to process the command without
2421 * to try to read the last argument of a bulk command as
2422 * a special argument. */
2423 c->bulklen = 0;
2424 /* continue below and process the command */
2425 } else {
2426 c->bulklen = -1;
2427 return 1;
2428 }
2429 }
2430 }
2431 /* -- end of multi bulk commands processing -- */
2432
ed9b544e 2433 /* The QUIT command is handled as a special case. Normal command
2434 * procs are unable to close the client connection safely */
bb0b03a3 2435 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 2436 freeClient(c);
2437 return 0;
2438 }
d5d55fc3 2439
2440 /* Now lookup the command and check ASAP about trivial error conditions
2441 * such wrong arity, bad command name and so forth. */
ed9b544e 2442 cmd = lookupCommand(c->argv[0]->ptr);
2443 if (!cmd) {
2c14807b 2444 addReplySds(c,
2445 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
2446 (char*)c->argv[0]->ptr));
ed9b544e 2447 resetClient(c);
2448 return 1;
2449 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
2450 (c->argc < -cmd->arity)) {
454d4e43 2451 addReplySds(c,
2452 sdscatprintf(sdsempty(),
2453 "-ERR wrong number of arguments for '%s' command\r\n",
2454 cmd->name));
ed9b544e 2455 resetClient(c);
2456 return 1;
2457 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
d5d55fc3 2458 /* This is a bulk command, we have to read the last argument yet. */
ed9b544e 2459 int bulklen = atoi(c->argv[c->argc-1]->ptr);
2460
2461 decrRefCount(c->argv[c->argc-1]);
2462 if (bulklen < 0 || bulklen > 1024*1024*1024) {
2463 c->argc--;
2464 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
2465 resetClient(c);
2466 return 1;
2467 }
2468 c->argc--;
2469 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
2470 /* It is possible that the bulk read is already in the
8d0490e7 2471 * buffer. Check this condition and handle it accordingly.
2472 * This is just a fast path, alternative to call processInputBuffer().
2473 * It's a good idea since the code is small and this condition
2474 * happens most of the times. */
ed9b544e 2475 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
2476 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
2477 c->argc++;
2478 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
2479 } else {
d5d55fc3 2480 /* Otherwise return... there is to read the last argument
2481 * from the socket. */
ed9b544e 2482 return 1;
2483 }
2484 }
942a3961 2485 /* Let's try to encode the bulk object to save space. */
2486 if (cmd->flags & REDIS_CMD_BULK)
05df7621 2487 c->argv[c->argc-1] = tryObjectEncoding(c->argv[c->argc-1]);
942a3961 2488
e63943a4 2489 /* Check if the user is authenticated */
2490 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
2491 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
2492 resetClient(c);
2493 return 1;
2494 }
2495
b61a28fe 2496 /* Handle the maxmemory directive */
2497 if (server.maxmemory && (cmd->flags & REDIS_CMD_DENYOOM) &&
2498 zmalloc_used_memory() > server.maxmemory)
2499 {
2500 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
2501 resetClient(c);
2502 return 1;
2503 }
2504
d6cc8867 2505 /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
e6cca5db 2506 if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0)
2507 &&
ffc6b7f8 2508 cmd->proc != subscribeCommand && cmd->proc != unsubscribeCommand &&
2509 cmd->proc != psubscribeCommand && cmd->proc != punsubscribeCommand) {
2510 addReplySds(c,sdsnew("-ERR only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context\r\n"));
d6cc8867 2511 resetClient(c);
2512 return 1;
2513 }
2514
ed9b544e 2515 /* Exec the command */
6531c94d 2516 if (c->flags & REDIS_MULTI &&
2517 cmd->proc != execCommand && cmd->proc != discardCommand &&
2518 cmd->proc != multiCommand && cmd->proc != watchCommand)
2519 {
6e469882 2520 queueMultiCommand(c,cmd);
2521 addReply(c,shared.queued);
2522 } else {
d5d55fc3 2523 if (server.vm_enabled && server.vm_max_threads > 0 &&
0a6f3f0f 2524 blockClientOnSwappedKeys(c,cmd)) return 1;
6e469882 2525 call(c,cmd);
2526 }
ed9b544e 2527
2528 /* Prepare the client for the next command */
ed9b544e 2529 resetClient(c);
2530 return 1;
2531}
2532
248ea310 2533static void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
6208b3a7 2534 listNode *ln;
c7df85a4 2535 listIter li;
ed9b544e 2536 int outc = 0, j;
93ea3759 2537 robj **outv;
248ea310 2538 /* We need 1+(ARGS*3) objects since commands are using the new protocol
2539 * and we one 1 object for the first "*<count>\r\n" multibulk count, then
2540 * for every additional object we have "$<count>\r\n" + object + "\r\n". */
2541 robj *static_outv[REDIS_STATIC_ARGS*3+1];
2542 robj *lenobj;
93ea3759 2543
2544 if (argc <= REDIS_STATIC_ARGS) {
2545 outv = static_outv;
2546 } else {
248ea310 2547 outv = zmalloc(sizeof(robj*)*(argc*3+1));
93ea3759 2548 }
248ea310 2549
2550 lenobj = createObject(REDIS_STRING,
2551 sdscatprintf(sdsempty(), "*%d\r\n", argc));
2552 lenobj->refcount = 0;
2553 outv[outc++] = lenobj;
ed9b544e 2554 for (j = 0; j < argc; j++) {
248ea310 2555 lenobj = createObject(REDIS_STRING,
2556 sdscatprintf(sdsempty(),"$%lu\r\n",
2557 (unsigned long) stringObjectLen(argv[j])));
2558 lenobj->refcount = 0;
2559 outv[outc++] = lenobj;
ed9b544e 2560 outv[outc++] = argv[j];
248ea310 2561 outv[outc++] = shared.crlf;
ed9b544e 2562 }
ed9b544e 2563
40d224a9 2564 /* Increment all the refcounts at start and decrement at end in order to
2565 * be sure to free objects if there is no slave in a replication state
2566 * able to be feed with commands */
2567 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
c7df85a4 2568 listRewind(slaves,&li);
2569 while((ln = listNext(&li))) {
ed9b544e 2570 redisClient *slave = ln->value;
40d224a9 2571
2572 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 2573 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 2574
2575 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 2576 if (slave->slaveseldb != dictid) {
2577 robj *selectcmd;
2578
2579 switch(dictid) {
2580 case 0: selectcmd = shared.select0; break;
2581 case 1: selectcmd = shared.select1; break;
2582 case 2: selectcmd = shared.select2; break;
2583 case 3: selectcmd = shared.select3; break;
2584 case 4: selectcmd = shared.select4; break;
2585 case 5: selectcmd = shared.select5; break;
2586 case 6: selectcmd = shared.select6; break;
2587 case 7: selectcmd = shared.select7; break;
2588 case 8: selectcmd = shared.select8; break;
2589 case 9: selectcmd = shared.select9; break;
2590 default:
2591 selectcmd = createObject(REDIS_STRING,
2592 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
2593 selectcmd->refcount = 0;
2594 break;
2595 }
2596 addReply(slave,selectcmd);
2597 slave->slaveseldb = dictid;
2598 }
2599 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 2600 }
40d224a9 2601 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 2602 if (outv != static_outv) zfree(outv);
ed9b544e 2603}
2604
dd142b9c 2605static sds sdscatrepr(sds s, char *p, size_t len) {
2606 s = sdscatlen(s,"\"",1);
2607 while(len--) {
2608 switch(*p) {
2609 case '\\':
2610 case '"':
2611 s = sdscatprintf(s,"\\%c",*p);
2612 break;
2613 case '\n': s = sdscatlen(s,"\\n",1); break;
2614 case '\r': s = sdscatlen(s,"\\r",1); break;
2615 case '\t': s = sdscatlen(s,"\\t",1); break;
2616 case '\a': s = sdscatlen(s,"\\a",1); break;
2617 case '\b': s = sdscatlen(s,"\\b",1); break;
2618 default:
2619 if (isprint(*p))
2620 s = sdscatprintf(s,"%c",*p);
2621 else
2622 s = sdscatprintf(s,"\\x%02x",(unsigned char)*p);
2623 break;
2624 }
2625 p++;
2626 }
2627 return sdscatlen(s,"\"",1);
2628}
2629
2630static void replicationFeedMonitors(list *monitors, int dictid, robj **argv, int argc) {
2631 listNode *ln;
2632 listIter li;
2633 int j;
2634 sds cmdrepr = sdsnew("+");
2635 robj *cmdobj;
2636 struct timeval tv;
2637
2638 gettimeofday(&tv,NULL);
2639 cmdrepr = sdscatprintf(cmdrepr,"%ld.%ld ",(long)tv.tv_sec,(long)tv.tv_usec);
2640 if (dictid != 0) cmdrepr = sdscatprintf(cmdrepr,"(db %d) ", dictid);
2641
2642 for (j = 0; j < argc; j++) {
2643 if (argv[j]->encoding == REDIS_ENCODING_INT) {
2644 cmdrepr = sdscatprintf(cmdrepr, "%ld", (long)argv[j]->ptr);
2645 } else {
2646 cmdrepr = sdscatrepr(cmdrepr,(char*)argv[j]->ptr,
2647 sdslen(argv[j]->ptr));
2648 }
2649 if (j != argc-1)
2650 cmdrepr = sdscatlen(cmdrepr," ",1);
2651 }
2652 cmdrepr = sdscatlen(cmdrepr,"\r\n",2);
2653 cmdobj = createObject(REDIS_STRING,cmdrepr);
2654
2655 listRewind(monitors,&li);
2656 while((ln = listNext(&li))) {
2657 redisClient *monitor = ln->value;
2658 addReply(monitor,cmdobj);
2659 }
2660 decrRefCount(cmdobj);
2661}
2662
638e42ac 2663static void processInputBuffer(redisClient *c) {
ed9b544e 2664again:
4409877e 2665 /* Before to process the input buffer, make sure the client is not
2666 * waitig for a blocking operation such as BLPOP. Note that the first
2667 * iteration the client is never blocked, otherwise the processInputBuffer
2668 * would not be called at all, but after the execution of the first commands
2669 * in the input buffer the client may be blocked, and the "goto again"
2670 * will try to reiterate. The following line will make it return asap. */
92f8e882 2671 if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return;
ed9b544e 2672 if (c->bulklen == -1) {
2673 /* Read the first line of the query */
2674 char *p = strchr(c->querybuf,'\n');
2675 size_t querylen;
644fafa3 2676
ed9b544e 2677 if (p) {
2678 sds query, *argv;
2679 int argc, j;
e0a62c7f 2680
ed9b544e 2681 query = c->querybuf;
2682 c->querybuf = sdsempty();
2683 querylen = 1+(p-(query));
2684 if (sdslen(query) > querylen) {
2685 /* leave data after the first line of the query in the buffer */
2686 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
2687 }
2688 *p = '\0'; /* remove "\n" */
2689 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
2690 sdsupdatelen(query);
2691
2692 /* Now we can split the query in arguments */
ed9b544e 2693 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 2694 sdsfree(query);
2695
2696 if (c->argv) zfree(c->argv);
2697 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 2698
2699 for (j = 0; j < argc; j++) {
ed9b544e 2700 if (sdslen(argv[j])) {
2701 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
2702 c->argc++;
2703 } else {
2704 sdsfree(argv[j]);
2705 }
2706 }
2707 zfree(argv);
7c49733c 2708 if (c->argc) {
2709 /* Execute the command. If the client is still valid
2710 * after processCommand() return and there is something
2711 * on the query buffer try to process the next command. */
2712 if (processCommand(c) && sdslen(c->querybuf)) goto again;
2713 } else {
2714 /* Nothing to process, argc == 0. Just process the query
2715 * buffer if it's not empty or return to the caller */
2716 if (sdslen(c->querybuf)) goto again;
2717 }
ed9b544e 2718 return;
644fafa3 2719 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
f870935d 2720 redisLog(REDIS_VERBOSE, "Client protocol error");
ed9b544e 2721 freeClient(c);
2722 return;
2723 }
2724 } else {
2725 /* Bulk read handling. Note that if we are at this point
2726 the client already sent a command terminated with a newline,
2727 we are reading the bulk data that is actually the last
2728 argument of the command. */
2729 int qbl = sdslen(c->querybuf);
2730
2731 if (c->bulklen <= qbl) {
2732 /* Copy everything but the final CRLF as final argument */
2733 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
2734 c->argc++;
2735 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 2736 /* Process the command. If the client is still valid after
2737 * the processing and there is more data in the buffer
2738 * try to parse it. */
2739 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 2740 return;
2741 }
2742 }
2743}
2744
638e42ac 2745static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
2746 redisClient *c = (redisClient*) privdata;
2747 char buf[REDIS_IOBUF_LEN];
2748 int nread;
2749 REDIS_NOTUSED(el);
2750 REDIS_NOTUSED(mask);
2751
2752 nread = read(fd, buf, REDIS_IOBUF_LEN);
2753 if (nread == -1) {
2754 if (errno == EAGAIN) {
2755 nread = 0;
2756 } else {
f870935d 2757 redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
638e42ac 2758 freeClient(c);
2759 return;
2760 }
2761 } else if (nread == 0) {
f870935d 2762 redisLog(REDIS_VERBOSE, "Client closed connection");
638e42ac 2763 freeClient(c);
2764 return;
2765 }
2766 if (nread) {
2767 c->querybuf = sdscatlen(c->querybuf, buf, nread);
2768 c->lastinteraction = time(NULL);
2769 } else {
2770 return;
2771 }
168ac5c6 2772 processInputBuffer(c);
638e42ac 2773}
2774
ed9b544e 2775static int selectDb(redisClient *c, int id) {
2776 if (id < 0 || id >= server.dbnum)
2777 return REDIS_ERR;
3305306f 2778 c->db = &server.db[id];
ed9b544e 2779 return REDIS_OK;
2780}
2781
40d224a9 2782static void *dupClientReplyValue(void *o) {
2783 incrRefCount((robj*)o);
12d090d2 2784 return o;
40d224a9 2785}
2786
ffc6b7f8 2787static int listMatchObjects(void *a, void *b) {
bf028098 2788 return equalStringObjects(a,b);
ffc6b7f8 2789}
2790
ed9b544e 2791static redisClient *createClient(int fd) {
2792 redisClient *c = zmalloc(sizeof(*c));
2793
2794 anetNonBlock(NULL,fd);
2795 anetTcpNoDelay(NULL,fd);
2796 if (!c) return NULL;
2797 selectDb(c,0);
2798 c->fd = fd;
2799 c->querybuf = sdsempty();
2800 c->argc = 0;
93ea3759 2801 c->argv = NULL;
ed9b544e 2802 c->bulklen = -1;
e8a74421 2803 c->multibulk = 0;
2804 c->mbargc = 0;
2805 c->mbargv = NULL;
ed9b544e 2806 c->sentlen = 0;
2807 c->flags = 0;
2808 c->lastinteraction = time(NULL);
abcb223e 2809 c->authenticated = 0;
40d224a9 2810 c->replstate = REDIS_REPL_NONE;
6b47e12e 2811 c->reply = listCreate();
ed9b544e 2812 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 2813 listSetDupMethod(c->reply,dupClientReplyValue);
37ab76c9 2814 c->blocking_keys = NULL;
2815 c->blocking_keys_num = 0;
92f8e882 2816 c->io_keys = listCreate();
87c68815 2817 c->watched_keys = listCreate();
92f8e882 2818 listSetFreeMethod(c->io_keys,decrRefCount);
ffc6b7f8 2819 c->pubsub_channels = dictCreate(&setDictType,NULL);
2820 c->pubsub_patterns = listCreate();
2821 listSetFreeMethod(c->pubsub_patterns,decrRefCount);
2822 listSetMatchMethod(c->pubsub_patterns,listMatchObjects);
ed9b544e 2823 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
266373b2 2824 readQueryFromClient, c) == AE_ERR) {
ed9b544e 2825 freeClient(c);
2826 return NULL;
2827 }
6b47e12e 2828 listAddNodeTail(server.clients,c);
6e469882 2829 initClientMultiState(c);
ed9b544e 2830 return c;
2831}
2832
2833static void addReply(redisClient *c, robj *obj) {
2834 if (listLength(c->reply) == 0 &&
6208b3a7 2835 (c->replstate == REDIS_REPL_NONE ||
2836 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 2837 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
266373b2 2838 sendReplyToClient, c) == AE_ERR) return;
e3cadb8a 2839
2840 if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) {
2841 obj = dupStringObject(obj);
2842 obj->refcount = 0; /* getDecodedObject() will increment the refcount */
2843 }
9d65a1bb 2844 listAddNodeTail(c->reply,getDecodedObject(obj));
ed9b544e 2845}
2846
2847static void addReplySds(redisClient *c, sds s) {
2848 robj *o = createObject(REDIS_STRING,s);
2849 addReply(c,o);
2850 decrRefCount(o);
2851}
2852
e2665397 2853static void addReplyDouble(redisClient *c, double d) {
2854 char buf[128];
2855
2856 snprintf(buf,sizeof(buf),"%.17g",d);
682ac724 2857 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
83c6a618 2858 (unsigned long) strlen(buf),buf));
e2665397 2859}
2860
aa7c2934
PN
2861static void addReplyLongLong(redisClient *c, long long ll) {
2862 char buf[128];
2863 size_t len;
2864
2865 if (ll == 0) {
2866 addReply(c,shared.czero);
2867 return;
2868 } else if (ll == 1) {
2869 addReply(c,shared.cone);
2870 return;
2871 }
482b672d 2872 buf[0] = ':';
2873 len = ll2string(buf+1,sizeof(buf)-1,ll);
2874 buf[len+1] = '\r';
2875 buf[len+2] = '\n';
2876 addReplySds(c,sdsnewlen(buf,len+3));
aa7c2934
PN
2877}
2878
92b27fe9 2879static void addReplyUlong(redisClient *c, unsigned long ul) {
2880 char buf[128];
2881 size_t len;
2882
dd88747b 2883 if (ul == 0) {
2884 addReply(c,shared.czero);
2885 return;
2886 } else if (ul == 1) {
2887 addReply(c,shared.cone);
2888 return;
2889 }
92b27fe9 2890 len = snprintf(buf,sizeof(buf),":%lu\r\n",ul);
2891 addReplySds(c,sdsnewlen(buf,len));
2892}
2893
942a3961 2894static void addReplyBulkLen(redisClient *c, robj *obj) {
482b672d 2895 size_t len, intlen;
2896 char buf[128];
942a3961 2897
2898 if (obj->encoding == REDIS_ENCODING_RAW) {
2899 len = sdslen(obj->ptr);
2900 } else {
2901 long n = (long)obj->ptr;
2902
e054afda 2903 /* Compute how many bytes will take this integer as a radix 10 string */
942a3961 2904 len = 1;
2905 if (n < 0) {
2906 len++;
2907 n = -n;
2908 }
2909 while((n = n/10) != 0) {
2910 len++;
2911 }
2912 }
482b672d 2913 buf[0] = '$';
2914 intlen = ll2string(buf+1,sizeof(buf)-1,(long long)len);
2915 buf[intlen+1] = '\r';
2916 buf[intlen+2] = '\n';
2917 addReplySds(c,sdsnewlen(buf,intlen+3));
942a3961 2918}
2919
dd88747b 2920static void addReplyBulk(redisClient *c, robj *obj) {
2921 addReplyBulkLen(c,obj);
2922 addReply(c,obj);
2923 addReply(c,shared.crlf);
2924}
2925
09241813 2926static void addReplyBulkSds(redisClient *c, sds s) {
2927 robj *o = createStringObject(s, sdslen(s));
2928 addReplyBulk(c,o);
2929 decrRefCount(o);
2930}
2931
500ece7c 2932/* In the CONFIG command we need to add vanilla C string as bulk replies */
2933static void addReplyBulkCString(redisClient *c, char *s) {
2934 if (s == NULL) {
2935 addReply(c,shared.nullbulk);
2936 } else {
2937 robj *o = createStringObject(s,strlen(s));
2938 addReplyBulk(c,o);
2939 decrRefCount(o);
2940 }
2941}
2942
ed9b544e 2943static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
2944 int cport, cfd;
2945 char cip[128];
285add55 2946 redisClient *c;
ed9b544e 2947 REDIS_NOTUSED(el);
2948 REDIS_NOTUSED(mask);
2949 REDIS_NOTUSED(privdata);
2950
2951 cfd = anetAccept(server.neterr, fd, cip, &cport);
2952 if (cfd == AE_ERR) {
f870935d 2953 redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
ed9b544e 2954 return;
2955 }
f870935d 2956 redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
285add55 2957 if ((c = createClient(cfd)) == NULL) {
ed9b544e 2958 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
2959 close(cfd); /* May be already closed, just ingore errors */
2960 return;
2961 }
285add55 2962 /* If maxclient directive is set and this is one client more... close the
2963 * connection. Note that we create the client instead to check before
2964 * for this condition, since now the socket is already set in nonblocking
2965 * mode and we can send an error for free using the Kernel I/O */
2966 if (server.maxclients && listLength(server.clients) > server.maxclients) {
2967 char *err = "-ERR max number of clients reached\r\n";
2968
2969 /* That's a best effort error message, don't check write errors */
fee803ba 2970 if (write(c->fd,err,strlen(err)) == -1) {
2971 /* Nothing to do, Just to avoid the warning... */
2972 }
285add55 2973 freeClient(c);
2974 return;
2975 }
ed9b544e 2976 server.stat_numconnections++;
2977}
2978
2979/* ======================= Redis objects implementation ===================== */
2980
2981static robj *createObject(int type, void *ptr) {
2982 robj *o;
2983
a5819310 2984 if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
ed9b544e 2985 if (listLength(server.objfreelist)) {
2986 listNode *head = listFirst(server.objfreelist);
2987 o = listNodeValue(head);
2988 listDelNode(server.objfreelist,head);
a5819310 2989 if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
ed9b544e 2990 } else {
560db612 2991 if (server.vm_enabled)
a5819310 2992 pthread_mutex_unlock(&server.obj_freelist_mutex);
560db612 2993 o = zmalloc(sizeof(*o));
ed9b544e 2994 }
ed9b544e 2995 o->type = type;
942a3961 2996 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 2997 o->ptr = ptr;
2998 o->refcount = 1;
3a66edc7 2999 if (server.vm_enabled) {
1064ef87 3000 /* Note that this code may run in the context of an I/O thread
560db612 3001 * and accessing server.lruclock in theory is an error
1064ef87 3002 * (no locks). But in practice this is safe, and even if we read
560db612 3003 * garbage Redis will not fail. */
3004 o->lru = server.lruclock;
3a66edc7 3005 o->storage = REDIS_VM_MEMORY;
3006 }
ed9b544e 3007 return o;
3008}
3009
3010static robj *createStringObject(char *ptr, size_t len) {
3011 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
3012}
3013
3f973463
PN
3014static robj *createStringObjectFromLongLong(long long value) {
3015 robj *o;
3016 if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
3017 incrRefCount(shared.integers[value]);
3018 o = shared.integers[value];
3019 } else {
3f973463 3020 if (value >= LONG_MIN && value <= LONG_MAX) {
10dea8dc 3021 o = createObject(REDIS_STRING, NULL);
3f973463
PN
3022 o->encoding = REDIS_ENCODING_INT;
3023 o->ptr = (void*)((long)value);
3024 } else {
ee14da56 3025 o = createObject(REDIS_STRING,sdsfromlonglong(value));
3f973463
PN
3026 }
3027 }
3028 return o;
3029}
3030
4ef8de8a 3031static robj *dupStringObject(robj *o) {
b9bc0eef 3032 assert(o->encoding == REDIS_ENCODING_RAW);
4ef8de8a 3033 return createStringObject(o->ptr,sdslen(o->ptr));
3034}
3035
ed9b544e 3036static robj *createListObject(void) {
3037 list *l = listCreate();
3038
ed9b544e 3039 listSetFreeMethod(l,decrRefCount);
3040 return createObject(REDIS_LIST,l);
3041}
3042
3043static robj *createSetObject(void) {
3044 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 3045 return createObject(REDIS_SET,d);
3046}
3047
5234952b 3048static robj *createHashObject(void) {
3049 /* All the Hashes start as zipmaps. Will be automatically converted
3050 * into hash tables if there are enough elements or big elements
3051 * inside. */
3052 unsigned char *zm = zipmapNew();
3053 robj *o = createObject(REDIS_HASH,zm);
3054 o->encoding = REDIS_ENCODING_ZIPMAP;
3055 return o;
3056}
3057
1812e024 3058static robj *createZsetObject(void) {
6b47e12e 3059 zset *zs = zmalloc(sizeof(*zs));
3060
3061 zs->dict = dictCreate(&zsetDictType,NULL);
3062 zs->zsl = zslCreate();
3063 return createObject(REDIS_ZSET,zs);
1812e024 3064}
3065
ed9b544e 3066static void freeStringObject(robj *o) {
942a3961 3067 if (o->encoding == REDIS_ENCODING_RAW) {
3068 sdsfree(o->ptr);
3069 }
ed9b544e 3070}
3071
3072static void freeListObject(robj *o) {
3073 listRelease((list*) o->ptr);
3074}
3075
3076static void freeSetObject(robj *o) {
3077 dictRelease((dict*) o->ptr);
3078}
3079
fd8ccf44 3080static void freeZsetObject(robj *o) {
3081 zset *zs = o->ptr;
3082
3083 dictRelease(zs->dict);
3084 zslFree(zs->zsl);
3085 zfree(zs);
3086}
3087
ed9b544e 3088static void freeHashObject(robj *o) {
cbba7dd7 3089 switch (o->encoding) {
3090 case REDIS_ENCODING_HT:
3091 dictRelease((dict*) o->ptr);
3092 break;
3093 case REDIS_ENCODING_ZIPMAP:
3094 zfree(o->ptr);
3095 break;
3096 default:
f83c6cb5 3097 redisPanic("Unknown hash encoding type");
cbba7dd7 3098 break;
3099 }
ed9b544e 3100}
3101
3102static void incrRefCount(robj *o) {
3103 o->refcount++;
3104}
3105
3106static void decrRefCount(void *obj) {
3107 robj *o = obj;
94754ccc 3108
560db612 3109 /* Object is a swapped out value, or in the process of being loaded. */
996cb5f7 3110 if (server.vm_enabled &&
3111 (o->storage == REDIS_VM_SWAPPED || o->storage == REDIS_VM_LOADING))
3112 {
560db612 3113 vmpointer *vp = obj;
3114 if (o->storage == REDIS_VM_LOADING) vmCancelThreadedIOJob(o);
3115 vmMarkPagesFree(vp->page,vp->usedpages);
7d98e08c 3116 server.vm_stats_swapped_objects--;
560db612 3117 zfree(vp);
a35ddf12 3118 return;
3119 }
560db612 3120
3121 if (o->refcount <= 0) redisPanic("decrRefCount against refcount <= 0");
e4ed181d 3122 /* Object is in memory, or in the process of being swapped out.
3123 *
3124 * If the object is being swapped out, abort the operation on
3125 * decrRefCount even if the refcount does not drop to 0: the object
3126 * is referenced at least two times, as value of the key AND as
3127 * job->val in the iojob. So if we don't invalidate the iojob, when it is
3128 * done but the relevant key was removed in the meantime, the
3129 * complete jobs handler will not find the key about the job and the
3130 * assert will fail. */
3131 if (server.vm_enabled && o->storage == REDIS_VM_SWAPPING)
3132 vmCancelThreadedIOJob(o);
ed9b544e 3133 if (--(o->refcount) == 0) {
3134 switch(o->type) {
3135 case REDIS_STRING: freeStringObject(o); break;
3136 case REDIS_LIST: freeListObject(o); break;
3137 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 3138 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 3139 case REDIS_HASH: freeHashObject(o); break;
f83c6cb5 3140 default: redisPanic("Unknown object type"); break;
ed9b544e 3141 }
a5819310 3142 if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
ed9b544e 3143 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
3144 !listAddNodeHead(server.objfreelist,o))
3145 zfree(o);
a5819310 3146 if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
ed9b544e 3147 }
3148}
3149
92b27fe9 3150static int checkType(redisClient *c, robj *o, int type) {
3151 if (o->type != type) {
3152 addReply(c,shared.wrongtypeerr);
3153 return 1;
3154 }
3155 return 0;
3156}
3157
724a51b1 3158/* Check if the nul-terminated string 's' can be represented by a long
3159 * (that is, is a number that fits into long without any other space or
3160 * character before or after the digits).
3161 *
3162 * If so, the function returns REDIS_OK and *longval is set to the value
3163 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 3164static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 3165 char buf[32], *endptr;
3166 long value;
3167 int slen;
e0a62c7f 3168
724a51b1 3169 value = strtol(s, &endptr, 10);
3170 if (endptr[0] != '\0') return REDIS_ERR;
ee14da56 3171 slen = ll2string(buf,32,value);
724a51b1 3172
3173 /* If the number converted back into a string is not identical
3174 * then it's not possible to encode the string as integer */
f69f2cba 3175 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 3176 if (longval) *longval = value;
3177 return REDIS_OK;
3178}
3179
942a3961 3180/* Try to encode a string object in order to save space */
05df7621 3181static robj *tryObjectEncoding(robj *o) {
942a3961 3182 long value;
942a3961 3183 sds s = o->ptr;
3305306f 3184
942a3961 3185 if (o->encoding != REDIS_ENCODING_RAW)
05df7621 3186 return o; /* Already encoded */
3305306f 3187
05df7621 3188 /* It's not safe to encode shared objects: shared objects can be shared
942a3961 3189 * everywhere in the "object space" of Redis. Encoded objects can only
3190 * appear as "values" (and not, for instance, as keys) */
05df7621 3191 if (o->refcount > 1) return o;
3305306f 3192
942a3961 3193 /* Currently we try to encode only strings */
dfc5e96c 3194 redisAssert(o->type == REDIS_STRING);
94754ccc 3195
724a51b1 3196 /* Check if we can represent this string as a long integer */
05df7621 3197 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return o;
942a3961 3198
3199 /* Ok, this object can be encoded */
05df7621 3200 if (value >= 0 && value < REDIS_SHARED_INTEGERS) {
3201 decrRefCount(o);
3202 incrRefCount(shared.integers[value]);
3203 return shared.integers[value];
3204 } else {
3205 o->encoding = REDIS_ENCODING_INT;
3206 sdsfree(o->ptr);
3207 o->ptr = (void*) value;
3208 return o;
3209 }
942a3961 3210}
3211
9d65a1bb 3212/* Get a decoded version of an encoded object (returned as a new object).
3213 * If the object is already raw-encoded just increment the ref count. */
3214static robj *getDecodedObject(robj *o) {
942a3961 3215 robj *dec;
e0a62c7f 3216
9d65a1bb 3217 if (o->encoding == REDIS_ENCODING_RAW) {
3218 incrRefCount(o);
3219 return o;
3220 }
942a3961 3221 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
3222 char buf[32];
3223
ee14da56 3224 ll2string(buf,32,(long)o->ptr);
942a3961 3225 dec = createStringObject(buf,strlen(buf));
3226 return dec;
3227 } else {
08ee9b57 3228 redisPanic("Unknown encoding type");
942a3961 3229 }
3305306f 3230}
3231
d7f43c08 3232/* Compare two string objects via strcmp() or alike.
3233 * Note that the objects may be integer-encoded. In such a case we
ee14da56 3234 * use ll2string() to get a string representation of the numbers on the stack
1fd9bc8a 3235 * and compare the strings, it's much faster than calling getDecodedObject().
3236 *
3237 * Important note: if objects are not integer encoded, but binary-safe strings,
3238 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
3239 * binary safe. */
724a51b1 3240static int compareStringObjects(robj *a, robj *b) {
dfc5e96c 3241 redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 3242 char bufa[128], bufb[128], *astr, *bstr;
3243 int bothsds = 1;
724a51b1 3244
e197b441 3245 if (a == b) return 0;
d7f43c08 3246 if (a->encoding != REDIS_ENCODING_RAW) {
ee14da56 3247 ll2string(bufa,sizeof(bufa),(long) a->ptr);
d7f43c08 3248 astr = bufa;
3249 bothsds = 0;
724a51b1 3250 } else {
d7f43c08 3251 astr = a->ptr;
724a51b1 3252 }
d7f43c08 3253 if (b->encoding != REDIS_ENCODING_RAW) {
ee14da56 3254 ll2string(bufb,sizeof(bufb),(long) b->ptr);
d7f43c08 3255 bstr = bufb;
3256 bothsds = 0;
3257 } else {
3258 bstr = b->ptr;
3259 }
3260 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 3261}
3262
bf028098 3263/* Equal string objects return 1 if the two objects are the same from the
3264 * point of view of a string comparison, otherwise 0 is returned. Note that
3265 * this function is faster then checking for (compareStringObject(a,b) == 0)
3266 * because it can perform some more optimization. */
3267static int equalStringObjects(robj *a, robj *b) {
3268 if (a->encoding != REDIS_ENCODING_RAW && b->encoding != REDIS_ENCODING_RAW){
3269 return a->ptr == b->ptr;
3270 } else {
3271 return compareStringObjects(a,b) == 0;
3272 }
3273}
3274
0ea663ea 3275static size_t stringObjectLen(robj *o) {
dfc5e96c 3276 redisAssert(o->type == REDIS_STRING);
0ea663ea 3277 if (o->encoding == REDIS_ENCODING_RAW) {
3278 return sdslen(o->ptr);
3279 } else {
3280 char buf[32];
3281
ee14da56 3282 return ll2string(buf,32,(long)o->ptr);
0ea663ea 3283 }
3284}
3285
bd79a6bd
PN
3286static int getDoubleFromObject(robj *o, double *target) {
3287 double value;
682c73e8 3288 char *eptr;
bbe025e0 3289
bd79a6bd
PN
3290 if (o == NULL) {
3291 value = 0;
3292 } else {
3293 redisAssert(o->type == REDIS_STRING);
3294 if (o->encoding == REDIS_ENCODING_RAW) {
3295 value = strtod(o->ptr, &eptr);
682c73e8 3296 if (eptr[0] != '\0') return REDIS_ERR;
bd79a6bd
PN
3297 } else if (o->encoding == REDIS_ENCODING_INT) {
3298 value = (long)o->ptr;
3299 } else {
946342c1 3300 redisPanic("Unknown string encoding");
bd79a6bd
PN
3301 }
3302 }
3303
bd79a6bd
PN
3304 *target = value;
3305 return REDIS_OK;
3306}
bbe025e0 3307
bd79a6bd
PN
3308static int getDoubleFromObjectOrReply(redisClient *c, robj *o, double *target, const char *msg) {
3309 double value;
3310 if (getDoubleFromObject(o, &value) != REDIS_OK) {
3311 if (msg != NULL) {
3312 addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
3313 } else {
3314 addReplySds(c, sdsnew("-ERR value is not a double\r\n"));
3315 }
bbe025e0
AM
3316 return REDIS_ERR;
3317 }
3318
bd79a6bd 3319 *target = value;
bbe025e0
AM
3320 return REDIS_OK;
3321}
3322
bd79a6bd
PN
3323static int getLongLongFromObject(robj *o, long long *target) {
3324 long long value;
682c73e8 3325 char *eptr;
bbe025e0 3326
bd79a6bd
PN
3327 if (o == NULL) {
3328 value = 0;
3329 } else {
3330 redisAssert(o->type == REDIS_STRING);
3331 if (o->encoding == REDIS_ENCODING_RAW) {
3332 value = strtoll(o->ptr, &eptr, 10);
682c73e8 3333 if (eptr[0] != '\0') return REDIS_ERR;
bd79a6bd
PN
3334 } else if (o->encoding == REDIS_ENCODING_INT) {
3335 value = (long)o->ptr;
3336 } else {
946342c1 3337 redisPanic("Unknown string encoding");
bd79a6bd
PN
3338 }
3339 }
3340
bd79a6bd
PN
3341 *target = value;
3342 return REDIS_OK;
3343}
bbe025e0 3344
bd79a6bd
PN
3345static int getLongLongFromObjectOrReply(redisClient *c, robj *o, long long *target, const char *msg) {
3346 long long value;
3347 if (getLongLongFromObject(o, &value) != REDIS_OK) {
3348 if (msg != NULL) {
3349 addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
3350 } else {
3351 addReplySds(c, sdsnew("-ERR value is not an integer\r\n"));
3352 }
bbe025e0
AM
3353 return REDIS_ERR;
3354 }
3355
bd79a6bd 3356 *target = value;
bbe025e0
AM
3357 return REDIS_OK;
3358}
3359
bd79a6bd
PN
3360static int getLongFromObjectOrReply(redisClient *c, robj *o, long *target, const char *msg) {
3361 long long value;
bbe025e0 3362
bd79a6bd
PN
3363 if (getLongLongFromObjectOrReply(c, o, &value, msg) != REDIS_OK) return REDIS_ERR;
3364 if (value < LONG_MIN || value > LONG_MAX) {
3365 if (msg != NULL) {
3366 addReplySds(c, sdscatprintf(sdsempty(), "-ERR %s\r\n", msg));
3367 } else {
3368 addReplySds(c, sdsnew("-ERR value is out of range\r\n"));
3369 }
bbe025e0
AM
3370 return REDIS_ERR;
3371 }
3372
bd79a6bd 3373 *target = value;
bbe025e0
AM
3374 return REDIS_OK;
3375}
3376
612e4de8 3377/* =========================== Keyspace access API ========================== */
3378
3379static robj *lookupKey(redisDb *db, robj *key) {
09241813 3380 dictEntry *de = dictFind(db->dict,key->ptr);
612e4de8 3381 if (de) {
612e4de8 3382 robj *val = dictGetEntryVal(de);
3383
3384 if (server.vm_enabled) {
3385 if (val->storage == REDIS_VM_MEMORY ||
3386 val->storage == REDIS_VM_SWAPPING)
3387 {
3388 /* If we were swapping the object out, cancel the operation */
3389 if (val->storage == REDIS_VM_SWAPPING)
3390 vmCancelThreadedIOJob(val);
09241813 3391 /* Update the access time for the aging algorithm. */
612e4de8 3392 val->lru = server.lruclock;
3393 } else {
3394 int notify = (val->storage == REDIS_VM_LOADING);
3395
3396 /* Our value was swapped on disk. Bring it at home. */
3397 redisAssert(val->type == REDIS_VMPOINTER);
3398 val = vmLoadObject(val);
3399 dictGetEntryVal(de) = val;
3400
3401 /* Clients blocked by the VM subsystem may be waiting for
3402 * this key... */
3403 if (notify) handleClientsBlockedOnSwappedKey(db,key);
3404 }
3405 }
3406 return val;
3407 } else {
3408 return NULL;
3409 }
3410}
3411
3412static robj *lookupKeyRead(redisDb *db, robj *key) {
3413 expireIfNeeded(db,key);
3414 return lookupKey(db,key);
3415}
3416
3417static robj *lookupKeyWrite(redisDb *db, robj *key) {
3418 deleteIfVolatile(db,key);
3419 touchWatchedKey(db,key);
3420 return lookupKey(db,key);
3421}
3422
3423static robj *lookupKeyReadOrReply(redisClient *c, robj *key, robj *reply) {
3424 robj *o = lookupKeyRead(c->db, key);
3425 if (!o) addReply(c,reply);
3426 return o;
3427}
3428
3429static robj *lookupKeyWriteOrReply(redisClient *c, robj *key, robj *reply) {
3430 robj *o = lookupKeyWrite(c->db, key);
3431 if (!o) addReply(c,reply);
3432 return o;
3433}
3434
09241813 3435/* Add the key to the DB. If the key already exists REDIS_ERR is returned,
3436 * otherwise REDIS_OK is returned, and the caller should increment the
3437 * refcount of 'val'. */
3438static int dbAdd(redisDb *db, robj *key, robj *val) {
3439 /* Perform a lookup before adding the key, as we need to copy the
3440 * key value. */
3441 if (dictFind(db->dict, key->ptr) != NULL) {
3442 return REDIS_ERR;
3443 } else {
3444 sds copy = sdsdup(key->ptr);
3445 dictAdd(db->dict, copy, val);
3446 return REDIS_OK;
3447 }
3448}
3449
3450/* If the key does not exist, this is just like dbAdd(). Otherwise
3451 * the value associated to the key is replaced with the new one.
3452 *
3453 * On update (key already existed) 0 is returned. Otherwise 1. */
3454static int dbReplace(redisDb *db, robj *key, robj *val) {
3455 if (dictFind(db->dict,key->ptr) == NULL) {
3456 sds copy = sdsdup(key->ptr);
3457 dictAdd(db->dict, copy, val);
3458 return 1;
3459 } else {
3460 dictReplace(db->dict, key->ptr, val);
3461 return 0;
3462 }
3463}
3464
3465static int dbExists(redisDb *db, robj *key) {
3466 return dictFind(db->dict,key->ptr) != NULL;
3467}
3468
3469/* Return a random key, in form of a Redis object.
3470 * If there are no keys, NULL is returned.
3471 *
3472 * The function makes sure to return keys not already expired. */
3473static robj *dbRandomKey(redisDb *db) {
3474 struct dictEntry *de;
3475
3476 while(1) {
3477 sds key;
3478 robj *keyobj;
3479
3480 de = dictGetRandomKey(db->dict);
3481 if (de == NULL) return NULL;
3482
3483 key = dictGetEntryKey(de);
3484 keyobj = createStringObject(key,sdslen(key));
3485 if (dictFind(db->expires,key)) {
3486 if (expireIfNeeded(db,keyobj)) {
3487 decrRefCount(keyobj);
3488 continue; /* search for another key. This expired. */
3489 }
3490 }
3491 return keyobj;
3492 }
3493}
3494
3495/* Delete a key, value, and associated expiration entry if any, from the DB */
3496static int dbDelete(redisDb *db, robj *key) {
612e4de8 3497 int retval;
3498
09241813 3499 if (dictSize(db->expires)) dictDelete(db->expires,key->ptr);
3500 retval = dictDelete(db->dict,key->ptr);
612e4de8 3501
3502 return retval == DICT_OK;
3503}
3504
06233c45 3505/*============================ RDB saving/loading =========================== */
ed9b544e 3506
f78fd11b 3507static int rdbSaveType(FILE *fp, unsigned char type) {
3508 if (fwrite(&type,1,1,fp) == 0) return -1;
3509 return 0;
3510}
3511
bb32ede5 3512static int rdbSaveTime(FILE *fp, time_t t) {
3513 int32_t t32 = (int32_t) t;
3514 if (fwrite(&t32,4,1,fp) == 0) return -1;
3515 return 0;
3516}
3517
e3566d4b 3518/* check rdbLoadLen() comments for more info */
f78fd11b 3519static int rdbSaveLen(FILE *fp, uint32_t len) {
3520 unsigned char buf[2];
3521
3522 if (len < (1<<6)) {
3523 /* Save a 6 bit len */
10c43610 3524 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 3525 if (fwrite(buf,1,1,fp) == 0) return -1;
3526 } else if (len < (1<<14)) {
3527 /* Save a 14 bit len */
10c43610 3528 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 3529 buf[1] = len&0xFF;
17be1a4a 3530 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 3531 } else {
3532 /* Save a 32 bit len */
10c43610 3533 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 3534 if (fwrite(buf,1,1,fp) == 0) return -1;
3535 len = htonl(len);
3536 if (fwrite(&len,4,1,fp) == 0) return -1;
3537 }
3538 return 0;
3539}
3540
32a66513 3541/* Encode 'value' as an integer if possible (if integer will fit the
3542 * supported range). If the function sucessful encoded the integer
3543 * then the (up to 5 bytes) encoded representation is written in the
3544 * string pointed by 'enc' and the length is returned. Otherwise
3545 * 0 is returned. */
3546static int rdbEncodeInteger(long long value, unsigned char *enc) {
e3566d4b 3547 /* Finally check if it fits in our ranges */
3548 if (value >= -(1<<7) && value <= (1<<7)-1) {
3549 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
3550 enc[1] = value&0xFF;
3551 return 2;
3552 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
3553 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
3554 enc[1] = value&0xFF;
3555 enc[2] = (value>>8)&0xFF;
3556 return 3;
3557 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
3558 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
3559 enc[1] = value&0xFF;
3560 enc[2] = (value>>8)&0xFF;
3561 enc[3] = (value>>16)&0xFF;
3562 enc[4] = (value>>24)&0xFF;
3563 return 5;
3564 } else {
3565 return 0;
3566 }
3567}
3568
32a66513 3569/* String objects in the form "2391" "-100" without any space and with a
3570 * range of values that can fit in an 8, 16 or 32 bit signed value can be
3571 * encoded as integers to save space */
3572static int rdbTryIntegerEncoding(char *s, size_t len, unsigned char *enc) {
3573 long long value;
3574 char *endptr, buf[32];
3575
3576 /* Check if it's possible to encode this value as a number */
3577 value = strtoll(s, &endptr, 10);
3578 if (endptr[0] != '\0') return 0;
3579 ll2string(buf,32,value);
3580
3581 /* If the number converted back into a string is not identical
3582 * then it's not possible to encode the string as integer */
3583 if (strlen(buf) != len || memcmp(buf,s,len)) return 0;
3584
3585 return rdbEncodeInteger(value,enc);
3586}
3587
b1befe6a 3588static int rdbSaveLzfStringObject(FILE *fp, unsigned char *s, size_t len) {
3589 size_t comprlen, outlen;
774e3047 3590 unsigned char byte;
3591 void *out;
3592
3593 /* We require at least four bytes compression for this to be worth it */
b1befe6a 3594 if (len <= 4) return 0;
3595 outlen = len-4;
3a2694c4 3596 if ((out = zmalloc(outlen+1)) == NULL) return 0;
b1befe6a 3597 comprlen = lzf_compress(s, len, out, outlen);
774e3047 3598 if (comprlen == 0) {
88e85998 3599 zfree(out);
774e3047 3600 return 0;
3601 }
3602 /* Data compressed! Let's save it on disk */
3603 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
3604 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
3605 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
b1befe6a 3606 if (rdbSaveLen(fp,len) == -1) goto writeerr;
774e3047 3607 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 3608 zfree(out);
774e3047 3609 return comprlen;
3610
3611writeerr:
88e85998 3612 zfree(out);
774e3047 3613 return -1;
3614}
3615
e3566d4b 3616/* Save a string objet as [len][data] on disk. If the object is a string
3617 * representation of an integer value we try to safe it in a special form */
b1befe6a 3618static int rdbSaveRawString(FILE *fp, unsigned char *s, size_t len) {
e3566d4b 3619 int enclen;
10c43610 3620
774e3047 3621 /* Try integer encoding */
e3566d4b 3622 if (len <= 11) {
3623 unsigned char buf[5];
b1befe6a 3624 if ((enclen = rdbTryIntegerEncoding((char*)s,len,buf)) > 0) {
e3566d4b 3625 if (fwrite(buf,enclen,1,fp) == 0) return -1;
3626 return 0;
3627 }
3628 }
774e3047 3629
3630 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 3631 * aaaaaaaaaaaaaaaaaa so skip it */
121f70cf 3632 if (server.rdbcompression && len > 20) {
774e3047 3633 int retval;
3634
b1befe6a 3635 retval = rdbSaveLzfStringObject(fp,s,len);
774e3047 3636 if (retval == -1) return -1;
3637 if (retval > 0) return 0;
3638 /* retval == 0 means data can't be compressed, save the old way */
3639 }
3640
3641 /* Store verbatim */
10c43610 3642 if (rdbSaveLen(fp,len) == -1) return -1;
b1befe6a 3643 if (len && fwrite(s,len,1,fp) == 0) return -1;
10c43610 3644 return 0;
3645}
3646
942a3961 3647/* Like rdbSaveStringObjectRaw() but handle encoded objects */
3648static int rdbSaveStringObject(FILE *fp, robj *obj) {
3649 int retval;
942a3961 3650
32a66513 3651 /* Avoid to decode the object, then encode it again, if the
3652 * object is alrady integer encoded. */
3653 if (obj->encoding == REDIS_ENCODING_INT) {
3654 long val = (long) obj->ptr;
3655 unsigned char buf[5];
3656 int enclen;
3657
3658 if ((enclen = rdbEncodeInteger(val,buf)) > 0) {
3659 if (fwrite(buf,enclen,1,fp) == 0) return -1;
3660 return 0;
3661 }
3662 /* otherwise... fall throught and continue with the usual
3663 * code path. */
3664 }
3665
f2d9f50f 3666 /* Avoid incr/decr ref count business when possible.
3667 * This plays well with copy-on-write given that we are probably
3668 * in a child process (BGSAVE). Also this makes sure key objects
3669 * of swapped objects are not incRefCount-ed (an assert does not allow
3670 * this in order to avoid bugs) */
3671 if (obj->encoding != REDIS_ENCODING_RAW) {
996cb5f7 3672 obj = getDecodedObject(obj);
b1befe6a 3673 retval = rdbSaveRawString(fp,obj->ptr,sdslen(obj->ptr));
996cb5f7 3674 decrRefCount(obj);
3675 } else {
b1befe6a 3676 retval = rdbSaveRawString(fp,obj->ptr,sdslen(obj->ptr));
996cb5f7 3677 }
9d65a1bb 3678 return retval;
942a3961 3679}
3680
a7866db6 3681/* Save a double value. Doubles are saved as strings prefixed by an unsigned
3682 * 8 bit integer specifing the length of the representation.
3683 * This 8 bit integer has special values in order to specify the following
3684 * conditions:
3685 * 253: not a number
3686 * 254: + inf
3687 * 255: - inf
3688 */
3689static int rdbSaveDoubleValue(FILE *fp, double val) {
3690 unsigned char buf[128];
3691 int len;
3692
3693 if (isnan(val)) {
3694 buf[0] = 253;
3695 len = 1;
3696 } else if (!isfinite(val)) {
3697 len = 1;
3698 buf[0] = (val < 0) ? 255 : 254;
3699 } else {
88e8d89f 3700#if (DBL_MANT_DIG >= 52) && (LLONG_MAX == 0x7fffffffffffffffLL)
fe244589 3701 /* Check if the float is in a safe range to be casted into a
3702 * long long. We are assuming that long long is 64 bit here.
3703 * Also we are assuming that there are no implementations around where
3704 * double has precision < 52 bit.
3705 *
3706 * Under this assumptions we test if a double is inside an interval
3707 * where casting to long long is safe. Then using two castings we
3708 * make sure the decimal part is zero. If all this is true we use
3709 * integer printing function that is much faster. */
fb82e75c 3710 double min = -4503599627370495; /* (2^52)-1 */
3711 double max = 4503599627370496; /* -(2^52) */
fe244589 3712 if (val > min && val < max && val == ((double)((long long)val)))
8c096b16 3713 ll2string((char*)buf+1,sizeof(buf),(long long)val);
3714 else
88e8d89f 3715#endif
8c096b16 3716 snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
6c446631 3717 buf[0] = strlen((char*)buf+1);
a7866db6 3718 len = buf[0]+1;
3719 }
3720 if (fwrite(buf,len,1,fp) == 0) return -1;
3721 return 0;
3722}
3723
06233c45 3724/* Save a Redis object. */
3725static int rdbSaveObject(FILE *fp, robj *o) {
3726 if (o->type == REDIS_STRING) {
3727 /* Save a string value */
3728 if (rdbSaveStringObject(fp,o) == -1) return -1;
3729 } else if (o->type == REDIS_LIST) {
3730 /* Save a list value */
3731 list *list = o->ptr;
c7df85a4 3732 listIter li;
06233c45 3733 listNode *ln;
3734
06233c45 3735 if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
c7df85a4 3736 listRewind(list,&li);
3737 while((ln = listNext(&li))) {
06233c45 3738 robj *eleobj = listNodeValue(ln);
3739
3740 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
3741 }
3742 } else if (o->type == REDIS_SET) {
3743 /* Save a set value */
3744 dict *set = o->ptr;
3745 dictIterator *di = dictGetIterator(set);
3746 dictEntry *de;
3747
3748 if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
3749 while((de = dictNext(di)) != NULL) {
3750 robj *eleobj = dictGetEntryKey(de);
3751
3752 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
3753 }
3754 dictReleaseIterator(di);
3755 } else if (o->type == REDIS_ZSET) {
3756 /* Save a set value */
3757 zset *zs = o->ptr;
3758 dictIterator *di = dictGetIterator(zs->dict);
3759 dictEntry *de;
3760
3761 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
3762 while((de = dictNext(di)) != NULL) {
3763 robj *eleobj = dictGetEntryKey(de);
3764 double *score = dictGetEntryVal(de);
3765
3766 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
3767 if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
3768 }
3769 dictReleaseIterator(di);
b1befe6a 3770 } else if (o->type == REDIS_HASH) {
3771 /* Save a hash value */
3772 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
3773 unsigned char *p = zipmapRewind(o->ptr);
3774 unsigned int count = zipmapLen(o->ptr);
3775 unsigned char *key, *val;
3776 unsigned int klen, vlen;
3777
3778 if (rdbSaveLen(fp,count) == -1) return -1;
3779 while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) {
3780 if (rdbSaveRawString(fp,key,klen) == -1) return -1;
3781 if (rdbSaveRawString(fp,val,vlen) == -1) return -1;
3782 }
3783 } else {
3784 dictIterator *di = dictGetIterator(o->ptr);
3785 dictEntry *de;
3786
3787 if (rdbSaveLen(fp,dictSize((dict*)o->ptr)) == -1) return -1;
3788 while((de = dictNext(di)) != NULL) {
3789 robj *key = dictGetEntryKey(de);
3790 robj *val = dictGetEntryVal(de);
3791
3792 if (rdbSaveStringObject(fp,key) == -1) return -1;
3793 if (rdbSaveStringObject(fp,val) == -1) return -1;
3794 }
3795 dictReleaseIterator(di);
3796 }
06233c45 3797 } else {
f83c6cb5 3798 redisPanic("Unknown object type");
06233c45 3799 }
3800 return 0;
3801}
3802
3803/* Return the length the object will have on disk if saved with
3804 * the rdbSaveObject() function. Currently we use a trick to get
3805 * this length with very little changes to the code. In the future
3806 * we could switch to a faster solution. */
b9bc0eef 3807static off_t rdbSavedObjectLen(robj *o, FILE *fp) {
3808 if (fp == NULL) fp = server.devnull;
06233c45 3809 rewind(fp);
3810 assert(rdbSaveObject(fp,o) != 1);
3811 return ftello(fp);
3812}
3813
06224fec 3814/* Return the number of pages required to save this object in the swap file */
b9bc0eef 3815static off_t rdbSavedObjectPages(robj *o, FILE *fp) {
3816 off_t bytes = rdbSavedObjectLen(o,fp);
e0a62c7f 3817
06224fec 3818 return (bytes+(server.vm_page_size-1))/server.vm_page_size;
3819}
3820
ed9b544e 3821/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 3822static int rdbSave(char *filename) {
ed9b544e 3823 dictIterator *di = NULL;
3824 dictEntry *de;
ed9b544e 3825 FILE *fp;
3826 char tmpfile[256];
3827 int j;
bb32ede5 3828 time_t now = time(NULL);
ed9b544e 3829
2316bb3b 3830 /* Wait for I/O therads to terminate, just in case this is a
3831 * foreground-saving, to avoid seeking the swap file descriptor at the
3832 * same time. */
3833 if (server.vm_enabled)
3834 waitEmptyIOJobsQueue();
3835
a3b21203 3836 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 3837 fp = fopen(tmpfile,"w");
3838 if (!fp) {
3839 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
3840 return REDIS_ERR;
3841 }
f78fd11b 3842 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 3843 for (j = 0; j < server.dbnum; j++) {
bb32ede5 3844 redisDb *db = server.db+j;
3845 dict *d = db->dict;
3305306f 3846 if (dictSize(d) == 0) continue;
ed9b544e 3847 di = dictGetIterator(d);
3848 if (!di) {
3849 fclose(fp);
3850 return REDIS_ERR;
3851 }
3852
3853 /* Write the SELECT DB opcode */
f78fd11b 3854 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
3855 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 3856
3857 /* Iterate this DB writing every entry */
3858 while((de = dictNext(di)) != NULL) {
09241813 3859 sds keystr = dictGetEntryKey(de);
3860 robj key, *o = dictGetEntryVal(de);
3861 time_t expiretime;
3862
3863 initStaticStringObject(key,keystr);
3864 expiretime = getExpire(db,&key);
bb32ede5 3865
3866 /* Save the expire time */
3867 if (expiretime != -1) {
3868 /* If this key is already expired skip it */
3869 if (expiretime < now) continue;
3870 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
3871 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
3872 }
7e69548d 3873 /* Save the key and associated value. This requires special
3874 * handling if the value is swapped out. */
560db612 3875 if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY ||
3876 o->storage == REDIS_VM_SWAPPING) {
7e69548d 3877 /* Save type, key, value */
3878 if (rdbSaveType(fp,o->type) == -1) goto werr;
09241813 3879 if (rdbSaveStringObject(fp,&key) == -1) goto werr;
7e69548d 3880 if (rdbSaveObject(fp,o) == -1) goto werr;
3881 } else {
996cb5f7 3882 /* REDIS_VM_SWAPPED or REDIS_VM_LOADING */
b9bc0eef 3883 robj *po;
7e69548d 3884 /* Get a preview of the object in memory */
560db612 3885 po = vmPreviewObject(o);
7e69548d 3886 /* Save type, key, value */
560db612 3887 if (rdbSaveType(fp,po->type) == -1) goto werr;
09241813 3888 if (rdbSaveStringObject(fp,&key) == -1) goto werr;
7e69548d 3889 if (rdbSaveObject(fp,po) == -1) goto werr;
3890 /* Remove the loaded object from memory */
3891 decrRefCount(po);
7e69548d 3892 }
ed9b544e 3893 }
3894 dictReleaseIterator(di);
3895 }
3896 /* EOF opcode */
f78fd11b 3897 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
3898
3899 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 3900 fflush(fp);
3901 fsync(fileno(fp));
3902 fclose(fp);
e0a62c7f 3903
ed9b544e 3904 /* Use RENAME to make sure the DB file is changed atomically only
3905 * if the generate DB file is ok. */
3906 if (rename(tmpfile,filename) == -1) {
325d1eb4 3907 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 3908 unlink(tmpfile);
3909 return REDIS_ERR;
3910 }
3911 redisLog(REDIS_NOTICE,"DB saved on disk");
3912 server.dirty = 0;
3913 server.lastsave = time(NULL);
3914 return REDIS_OK;
3915
3916werr:
3917 fclose(fp);
3918 unlink(tmpfile);
3919 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
3920 if (di) dictReleaseIterator(di);
3921 return REDIS_ERR;
3922}
3923
f78fd11b 3924static int rdbSaveBackground(char *filename) {
ed9b544e 3925 pid_t childpid;
3926
9d65a1bb 3927 if (server.bgsavechildpid != -1) return REDIS_ERR;
054e426d 3928 if (server.vm_enabled) waitEmptyIOJobsQueue();
ed9b544e 3929 if ((childpid = fork()) == 0) {
3930 /* Child */
054e426d 3931 if (server.vm_enabled) vmReopenSwapFile();
ed9b544e 3932 close(server.fd);
f78fd11b 3933 if (rdbSave(filename) == REDIS_OK) {
478c2c6f 3934 _exit(0);
ed9b544e 3935 } else {
478c2c6f 3936 _exit(1);
ed9b544e 3937 }
3938 } else {
3939 /* Parent */
5a7c647e 3940 if (childpid == -1) {
3941 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
3942 strerror(errno));
3943 return REDIS_ERR;
3944 }
ed9b544e 3945 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
9f3c422c 3946 server.bgsavechildpid = childpid;
884d4b39 3947 updateDictResizePolicy();
ed9b544e 3948 return REDIS_OK;
3949 }
3950 return REDIS_OK; /* unreached */
3951}
3952
a3b21203 3953static void rdbRemoveTempFile(pid_t childpid) {
3954 char tmpfile[256];
3955
3956 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
3957 unlink(tmpfile);
3958}
3959
f78fd11b 3960static int rdbLoadType(FILE *fp) {
3961 unsigned char type;
7b45bfb2 3962 if (fread(&type,1,1,fp) == 0) return -1;
3963 return type;
3964}
3965
bb32ede5 3966static time_t rdbLoadTime(FILE *fp) {
3967 int32_t t32;
3968 if (fread(&t32,4,1,fp) == 0) return -1;
3969 return (time_t) t32;
3970}
3971
e3566d4b 3972/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3973 * of this file for a description of how this are stored on disk.
3974 *
3975 * isencoded is set to 1 if the readed length is not actually a length but
3976 * an "encoding type", check the above comments for more info */
c78a8ccc 3977static uint32_t rdbLoadLen(FILE *fp, int *isencoded) {
f78fd11b 3978 unsigned char buf[2];
3979 uint32_t len;
c78a8ccc 3980 int type;
f78fd11b 3981
e3566d4b 3982 if (isencoded) *isencoded = 0;
c78a8ccc 3983 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
3984 type = (buf[0]&0xC0)>>6;
3985 if (type == REDIS_RDB_6BITLEN) {
3986 /* Read a 6 bit len */
3987 return buf[0]&0x3F;
3988 } else if (type == REDIS_RDB_ENCVAL) {
3989 /* Read a 6 bit len encoding type */
3990 if (isencoded) *isencoded = 1;
3991 return buf[0]&0x3F;
3992 } else if (type == REDIS_RDB_14BITLEN) {
3993 /* Read a 14 bit len */
3994 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
3995 return ((buf[0]&0x3F)<<8)|buf[1];
3996 } else {
3997 /* Read a 32 bit len */
f78fd11b 3998 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
3999 return ntohl(len);
f78fd11b 4000 }
f78fd11b 4001}
4002
ad30aa60 4003/* Load an integer-encoded object from file 'fp', with the specified
4004 * encoding type 'enctype'. If encode is true the function may return
4005 * an integer-encoded object as reply, otherwise the returned object
4006 * will always be encoded as a raw string. */
4007static robj *rdbLoadIntegerObject(FILE *fp, int enctype, int encode) {
e3566d4b 4008 unsigned char enc[4];
4009 long long val;
4010
4011 if (enctype == REDIS_RDB_ENC_INT8) {
4012 if (fread(enc,1,1,fp) == 0) return NULL;
4013 val = (signed char)enc[0];
4014 } else if (enctype == REDIS_RDB_ENC_INT16) {
4015 uint16_t v;
4016 if (fread(enc,2,1,fp) == 0) return NULL;
4017 v = enc[0]|(enc[1]<<8);
4018 val = (int16_t)v;
4019 } else if (enctype == REDIS_RDB_ENC_INT32) {
4020 uint32_t v;
4021 if (fread(enc,4,1,fp) == 0) return NULL;
4022 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
4023 val = (int32_t)v;
4024 } else {
4025 val = 0; /* anti-warning */
f83c6cb5 4026 redisPanic("Unknown RDB integer encoding type");
e3566d4b 4027 }
ad30aa60 4028 if (encode)
4029 return createStringObjectFromLongLong(val);
4030 else
4031 return createObject(REDIS_STRING,sdsfromlonglong(val));
e3566d4b 4032}
4033
c78a8ccc 4034static robj *rdbLoadLzfStringObject(FILE*fp) {
88e85998 4035 unsigned int len, clen;
4036 unsigned char *c = NULL;
4037 sds val = NULL;
4038
c78a8ccc 4039 if ((clen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
4040 if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
88e85998 4041 if ((c = zmalloc(clen)) == NULL) goto err;
4042 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
4043 if (fread(c,clen,1,fp) == 0) goto err;
4044 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 4045 zfree(c);
88e85998 4046 return createObject(REDIS_STRING,val);
4047err:
4048 zfree(c);
4049 sdsfree(val);
4050 return NULL;
4051}
4052
ad30aa60 4053static robj *rdbGenericLoadStringObject(FILE*fp, int encode) {
e3566d4b 4054 int isencoded;
4055 uint32_t len;
f78fd11b 4056 sds val;
4057
c78a8ccc 4058 len = rdbLoadLen(fp,&isencoded);
e3566d4b 4059 if (isencoded) {
4060 switch(len) {
4061 case REDIS_RDB_ENC_INT8:
4062 case REDIS_RDB_ENC_INT16:
4063 case REDIS_RDB_ENC_INT32:
ad30aa60 4064 return rdbLoadIntegerObject(fp,len,encode);
88e85998 4065 case REDIS_RDB_ENC_LZF:
bdcb92f2 4066 return rdbLoadLzfStringObject(fp);
e3566d4b 4067 default:
f83c6cb5 4068 redisPanic("Unknown RDB encoding type");
e3566d4b 4069 }
4070 }
4071
f78fd11b 4072 if (len == REDIS_RDB_LENERR) return NULL;
4073 val = sdsnewlen(NULL,len);
4074 if (len && fread(val,len,1,fp) == 0) {
4075 sdsfree(val);
4076 return NULL;
4077 }
bdcb92f2 4078 return createObject(REDIS_STRING,val);
f78fd11b 4079}
4080
ad30aa60 4081static robj *rdbLoadStringObject(FILE *fp) {
4082 return rdbGenericLoadStringObject(fp,0);
4083}
4084
4085static robj *rdbLoadEncodedStringObject(FILE *fp) {
4086 return rdbGenericLoadStringObject(fp,1);
4087}
4088
a7866db6 4089/* For information about double serialization check rdbSaveDoubleValue() */
4090static int rdbLoadDoubleValue(FILE *fp, double *val) {
4091 char buf[128];
4092 unsigned char len;
4093
4094 if (fread(&len,1,1,fp) == 0) return -1;
4095 switch(len) {
4096 case 255: *val = R_NegInf; return 0;
4097 case 254: *val = R_PosInf; return 0;
4098 case 253: *val = R_Nan; return 0;
4099 default:
4100 if (fread(buf,len,1,fp) == 0) return -1;
231d758e 4101 buf[len] = '\0';
a7866db6 4102 sscanf(buf, "%lg", val);
4103 return 0;
4104 }
4105}
4106
c78a8ccc 4107/* Load a Redis object of the specified type from the specified file.
4108 * On success a newly allocated object is returned, otherwise NULL. */
4109static robj *rdbLoadObject(int type, FILE *fp) {
4110 robj *o;
4111
bcd11906 4112 redisLog(REDIS_DEBUG,"LOADING OBJECT %d (at %d)\n",type,ftell(fp));
c78a8ccc 4113 if (type == REDIS_STRING) {
4114 /* Read string value */
ad30aa60 4115 if ((o = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
05df7621 4116 o = tryObjectEncoding(o);
c78a8ccc 4117 } else if (type == REDIS_LIST || type == REDIS_SET) {
4118 /* Read list/set value */
4119 uint32_t listlen;
4120
4121 if ((listlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
4122 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
3c68de9b 4123 /* It's faster to expand the dict to the right size asap in order
4124 * to avoid rehashing */
4125 if (type == REDIS_SET && listlen > DICT_HT_INITIAL_SIZE)
4126 dictExpand(o->ptr,listlen);
c78a8ccc 4127 /* Load every single element of the list/set */
4128 while(listlen--) {
4129 robj *ele;
4130
ad30aa60 4131 if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
05df7621 4132 ele = tryObjectEncoding(ele);
c78a8ccc 4133 if (type == REDIS_LIST) {
4134 listAddNodeTail((list*)o->ptr,ele);
4135 } else {
4136 dictAdd((dict*)o->ptr,ele,NULL);
4137 }
4138 }
4139 } else if (type == REDIS_ZSET) {
4140 /* Read list/set value */
ada386b2 4141 size_t zsetlen;
c78a8ccc 4142 zset *zs;
4143
4144 if ((zsetlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
4145 o = createZsetObject();
4146 zs = o->ptr;
4147 /* Load every single element of the list/set */
4148 while(zsetlen--) {
4149 robj *ele;
4150 double *score = zmalloc(sizeof(double));
4151
ad30aa60 4152 if ((ele = rdbLoadEncodedStringObject(fp)) == NULL) return NULL;
05df7621 4153 ele = tryObjectEncoding(ele);
c78a8ccc 4154 if (rdbLoadDoubleValue(fp,score) == -1) return NULL;
4155 dictAdd(zs->dict,ele,score);
4156 zslInsert(zs->zsl,*score,ele);
4157 incrRefCount(ele); /* added to skiplist */
4158 }
ada386b2 4159 } else if (type == REDIS_HASH) {
4160 size_t hashlen;
4161
4162 if ((hashlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
4163 o = createHashObject();
4164 /* Too many entries? Use an hash table. */
4165 if (hashlen > server.hash_max_zipmap_entries)
4166 convertToRealHash(o);
4167 /* Load every key/value, then set it into the zipmap or hash
4168 * table, as needed. */
4169 while(hashlen--) {
4170 robj *key, *val;
4171
4172 if ((key = rdbLoadStringObject(fp)) == NULL) return NULL;
4173 if ((val = rdbLoadStringObject(fp)) == NULL) return NULL;
4174 /* If we are using a zipmap and there are too big values
4175 * the object is converted to real hash table encoding. */
4176 if (o->encoding != REDIS_ENCODING_HT &&
4177 (sdslen(key->ptr) > server.hash_max_zipmap_value ||
4178 sdslen(val->ptr) > server.hash_max_zipmap_value))
4179 {
4180 convertToRealHash(o);
4181 }
4182
4183 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
4184 unsigned char *zm = o->ptr;
4185
4186 zm = zipmapSet(zm,key->ptr,sdslen(key->ptr),
4187 val->ptr,sdslen(val->ptr),NULL);
4188 o->ptr = zm;
4189 decrRefCount(key);
4190 decrRefCount(val);
4191 } else {
05df7621 4192 key = tryObjectEncoding(key);
4193 val = tryObjectEncoding(val);
ada386b2 4194 dictAdd((dict*)o->ptr,key,val);
ada386b2 4195 }
4196 }
c78a8ccc 4197 } else {
f83c6cb5 4198 redisPanic("Unknown object type");
c78a8ccc 4199 }
4200 return o;
4201}
4202
f78fd11b 4203static int rdbLoad(char *filename) {
ed9b544e 4204 FILE *fp;
f78fd11b 4205 uint32_t dbid;
bb32ede5 4206 int type, retval, rdbver;
585af7e2 4207 int swap_all_values = 0;
bb32ede5 4208 redisDb *db = server.db+0;
f78fd11b 4209 char buf[1024];
242a64f3 4210 time_t expiretime, now = time(NULL);
bb32ede5 4211
ed9b544e 4212 fp = fopen(filename,"r");
4213 if (!fp) return REDIS_ERR;
4214 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 4215 buf[9] = '\0';
4216 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 4217 fclose(fp);
4218 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
4219 return REDIS_ERR;
4220 }
f78fd11b 4221 rdbver = atoi(buf+5);
c78a8ccc 4222 if (rdbver != 1) {
f78fd11b 4223 fclose(fp);
4224 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
4225 return REDIS_ERR;
4226 }
ed9b544e 4227 while(1) {
585af7e2 4228 robj *key, *val;
7e02fe32 4229 int force_swapout;
ed9b544e 4230
585af7e2 4231 expiretime = -1;
ed9b544e 4232 /* Read type. */
f78fd11b 4233 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 4234 if (type == REDIS_EXPIRETIME) {
4235 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
4236 /* We read the time so we need to read the object type again */
4237 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
4238 }
ed9b544e 4239 if (type == REDIS_EOF) break;
4240 /* Handle SELECT DB opcode as a special case */
4241 if (type == REDIS_SELECTDB) {
c78a8ccc 4242 if ((dbid = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR)
e3566d4b 4243 goto eoferr;
ed9b544e 4244 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 4245 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 4246 exit(1);
4247 }
bb32ede5 4248 db = server.db+dbid;
ed9b544e 4249 continue;
4250 }
4251 /* Read key */
585af7e2 4252 if ((key = rdbLoadStringObject(fp)) == NULL) goto eoferr;
c78a8ccc 4253 /* Read value */
585af7e2 4254 if ((val = rdbLoadObject(type,fp)) == NULL) goto eoferr;
89e689c5 4255 /* Check if the key already expired */
4256 if (expiretime != -1 && expiretime < now) {
4257 decrRefCount(key);
4258 decrRefCount(val);
4259 continue;
4260 }
ed9b544e 4261 /* Add the new object in the hash table */
09241813 4262 retval = dbAdd(db,key,val);
4263 if (retval == REDIS_ERR) {
585af7e2 4264 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", key->ptr);
ed9b544e 4265 exit(1);
4266 }
bb32ede5 4267 /* Set the expire time if needed */
89e689c5 4268 if (expiretime != -1) setExpire(db,key,expiretime);
242a64f3 4269
b492cf00 4270 /* Handle swapping while loading big datasets when VM is on */
242a64f3 4271
4272 /* If we detecter we are hopeless about fitting something in memory
4273 * we just swap every new key on disk. Directly...
4274 * Note that's important to check for this condition before resorting
4275 * to random sampling, otherwise we may try to swap already
4276 * swapped keys. */
585af7e2 4277 if (swap_all_values) {
09241813 4278 dictEntry *de = dictFind(db->dict,key->ptr);
242a64f3 4279
4280 /* de may be NULL since the key already expired */
4281 if (de) {
560db612 4282 vmpointer *vp;
585af7e2 4283 val = dictGetEntryVal(de);
242a64f3 4284
560db612 4285 if (val->refcount == 1 &&
4286 (vp = vmSwapObjectBlocking(val)) != NULL)
4287 dictGetEntryVal(de) = vp;
242a64f3 4288 }
09241813 4289 decrRefCount(key);
242a64f3 4290 continue;
4291 }
09241813 4292 decrRefCount(key);
242a64f3 4293
a89b7013 4294 /* Flush data on disk once 32 MB of additional RAM are used... */
7e02fe32 4295 force_swapout = 0;
4296 if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32)
4297 force_swapout = 1;
4298
242a64f3 4299 /* If we have still some hope of having some value fitting memory
4300 * then we try random sampling. */
7e02fe32 4301 if (!swap_all_values && server.vm_enabled && force_swapout) {
b492cf00 4302 while (zmalloc_used_memory() > server.vm_max_memory) {
a69a0c9c 4303 if (vmSwapOneObjectBlocking() == REDIS_ERR) break;
b492cf00 4304 }
242a64f3 4305 if (zmalloc_used_memory() > server.vm_max_memory)
585af7e2 4306 swap_all_values = 1; /* We are already using too much mem */
b492cf00 4307 }
ed9b544e 4308 }
4309 fclose(fp);
4310 return REDIS_OK;
4311
4312eoferr: /* unexpected end of file is handled here with a fatal exit */
f80dff62 4313 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
ed9b544e 4314 exit(1);
4315 return REDIS_ERR; /* Just to avoid warning */
4316}
4317
b58ba105 4318/*================================== Shutdown =============================== */
fab43727 4319static int prepareForShutdown() {
b58ba105
AM
4320 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
4321 /* Kill the saving child if there is a background saving in progress.
4322 We want to avoid race conditions, for instance our saving child may
4323 overwrite the synchronous saving did by SHUTDOWN. */
4324 if (server.bgsavechildpid != -1) {
4325 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
4326 kill(server.bgsavechildpid,SIGKILL);
4327 rdbRemoveTempFile(server.bgsavechildpid);
4328 }
4329 if (server.appendonly) {
4330 /* Append only file: fsync() the AOF and exit */
b0bd87f6 4331 aof_fsync(server.appendfd);
b58ba105 4332 if (server.vm_enabled) unlink(server.vm_swap_file);
b58ba105
AM
4333 } else {
4334 /* Snapshotting. Perform a SYNC SAVE and exit */
4335 if (rdbSave(server.dbfilename) == REDIS_OK) {
4336 if (server.daemonize)
4337 unlink(server.pidfile);
4338 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
b58ba105
AM
4339 } else {
4340 /* Ooops.. error saving! The best we can do is to continue
4341 * operating. Note that if there was a background saving process,
4342 * in the next cron() Redis will be notified that the background
4343 * saving aborted, handling special stuff like slaves pending for
4344 * synchronization... */
4345 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
fab43727 4346 return REDIS_ERR;
b58ba105
AM
4347 }
4348 }
8513a757 4349 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
fab43727 4350 return REDIS_OK;
b58ba105
AM
4351}
4352
ed9b544e 4353/*================================== Commands =============================== */
4354
abcb223e 4355static void authCommand(redisClient *c) {
2e77c2ee 4356 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
4357 c->authenticated = 1;
4358 addReply(c,shared.ok);
4359 } else {
4360 c->authenticated = 0;
fa4c0aba 4361 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
4362 }
4363}
4364
ed9b544e 4365static void pingCommand(redisClient *c) {
4366 addReply(c,shared.pong);
4367}
4368
4369static void echoCommand(redisClient *c) {
dd88747b 4370 addReplyBulk(c,c->argv[1]);
ed9b544e 4371}
4372
4373/*=================================== Strings =============================== */
4374
526d00a5 4375static void setGenericCommand(redisClient *c, int nx, robj *key, robj *val, robj *expire) {
ed9b544e 4376 int retval;
10ce1276 4377 long seconds = 0; /* initialized to avoid an harmness warning */
ed9b544e 4378
526d00a5 4379 if (expire) {
4380 if (getLongFromObjectOrReply(c, expire, &seconds, NULL) != REDIS_OK)
4381 return;
4382 if (seconds <= 0) {
4383 addReplySds(c,sdsnew("-ERR invalid expire time in SETEX\r\n"));
4384 return;
4385 }
4386 }
4387
37ab76c9 4388 touchWatchedKey(c->db,key);
526d00a5 4389 if (nx) deleteIfVolatile(c->db,key);
09241813 4390 retval = dbAdd(c->db,key,val);
4391 if (retval == REDIS_ERR) {
ed9b544e 4392 if (!nx) {
09241813 4393 dbReplace(c->db,key,val);
526d00a5 4394 incrRefCount(val);
ed9b544e 4395 } else {
c937aa89 4396 addReply(c,shared.czero);
ed9b544e 4397 return;
4398 }
4399 } else {
526d00a5 4400 incrRefCount(val);
ed9b544e 4401 }
4402 server.dirty++;
526d00a5 4403 removeExpire(c->db,key);
4404 if (expire) setExpire(c->db,key,time(NULL)+seconds);
c937aa89 4405 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 4406}
4407
4408static void setCommand(redisClient *c) {
526d00a5 4409 setGenericCommand(c,0,c->argv[1],c->argv[2],NULL);
ed9b544e 4410}
4411
4412static void setnxCommand(redisClient *c) {
526d00a5 4413 setGenericCommand(c,1,c->argv[1],c->argv[2],NULL);
4414}
4415
4416static void setexCommand(redisClient *c) {
4417 setGenericCommand(c,0,c->argv[1],c->argv[3],c->argv[2]);
ed9b544e 4418}
4419
322fc7d8 4420static int getGenericCommand(redisClient *c) {
dd88747b 4421 robj *o;
e0a62c7f 4422
dd88747b 4423 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL)
322fc7d8 4424 return REDIS_OK;
dd88747b 4425
4426 if (o->type != REDIS_STRING) {
4427 addReply(c,shared.wrongtypeerr);
4428 return REDIS_ERR;
ed9b544e 4429 } else {
dd88747b 4430 addReplyBulk(c,o);
4431 return REDIS_OK;
ed9b544e 4432 }
4433}
4434
322fc7d8 4435static void getCommand(redisClient *c) {
4436 getGenericCommand(c);
4437}
4438
f6b141c5 4439static void getsetCommand(redisClient *c) {
322fc7d8 4440 if (getGenericCommand(c) == REDIS_ERR) return;
09241813 4441 dbReplace(c->db,c->argv[1],c->argv[2]);
a431eb74 4442 incrRefCount(c->argv[2]);
4443 server.dirty++;
4444 removeExpire(c->db,c->argv[1]);
4445}
4446
70003d28 4447static void mgetCommand(redisClient *c) {
70003d28 4448 int j;
e0a62c7f 4449
c937aa89 4450 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 4451 for (j = 1; j < c->argc; j++) {
3305306f 4452 robj *o = lookupKeyRead(c->db,c->argv[j]);
4453 if (o == NULL) {
c937aa89 4454 addReply(c,shared.nullbulk);
70003d28 4455 } else {
70003d28 4456 if (o->type != REDIS_STRING) {
c937aa89 4457 addReply(c,shared.nullbulk);
70003d28 4458 } else {
dd88747b 4459 addReplyBulk(c,o);
70003d28 4460 }
4461 }
4462 }
4463}
4464
6c446631 4465static void msetGenericCommand(redisClient *c, int nx) {
906573e7 4466 int j, busykeys = 0;
6c446631 4467
4468 if ((c->argc % 2) == 0) {
454d4e43 4469 addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
6c446631 4470 return;
4471 }
4472 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4473 * set nothing at all if at least one already key exists. */
4474 if (nx) {
4475 for (j = 1; j < c->argc; j += 2) {
906573e7 4476 if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
4477 busykeys++;
6c446631 4478 }
4479 }
4480 }
906573e7 4481 if (busykeys) {
4482 addReply(c, shared.czero);
4483 return;
4484 }
6c446631 4485
4486 for (j = 1; j < c->argc; j += 2) {
05df7621 4487 c->argv[j+1] = tryObjectEncoding(c->argv[j+1]);
09241813 4488 dbReplace(c->db,c->argv[j],c->argv[j+1]);
4489 incrRefCount(c->argv[j+1]);
6c446631 4490 removeExpire(c->db,c->argv[j]);
4491 }
4492 server.dirty += (c->argc-1)/2;
4493 addReply(c, nx ? shared.cone : shared.ok);
4494}
4495
4496static void msetCommand(redisClient *c) {
4497 msetGenericCommand(c,0);
4498}
4499
4500static void msetnxCommand(redisClient *c) {
4501 msetGenericCommand(c,1);
4502}
4503
d68ed120 4504static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 4505 long long value;
ed9b544e 4506 robj *o;
e0a62c7f 4507
3305306f 4508 o = lookupKeyWrite(c->db,c->argv[1]);
6485f293
PN
4509 if (o != NULL && checkType(c,o,REDIS_STRING)) return;
4510 if (getLongLongFromObjectOrReply(c,o,&value,NULL) != REDIS_OK) return;
ed9b544e 4511
4512 value += incr;
d6f4c262 4513 o = createStringObjectFromLongLong(value);
09241813 4514 dbReplace(c->db,c->argv[1],o);
ed9b544e 4515 server.dirty++;
c937aa89 4516 addReply(c,shared.colon);
ed9b544e 4517 addReply(c,o);
4518 addReply(c,shared.crlf);
4519}
4520
4521static void incrCommand(redisClient *c) {
a4d1ba9a 4522 incrDecrCommand(c,1);
ed9b544e 4523}
4524
4525static void decrCommand(redisClient *c) {
a4d1ba9a 4526 incrDecrCommand(c,-1);
ed9b544e 4527}
4528
4529static void incrbyCommand(redisClient *c) {
bbe025e0
AM
4530 long long incr;
4531
bd79a6bd 4532 if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
a4d1ba9a 4533 incrDecrCommand(c,incr);
ed9b544e 4534}
4535
4536static void decrbyCommand(redisClient *c) {
bbe025e0
AM
4537 long long incr;
4538
bd79a6bd 4539 if (getLongLongFromObjectOrReply(c, c->argv[2], &incr, NULL) != REDIS_OK) return;
a4d1ba9a 4540 incrDecrCommand(c,-incr);
ed9b544e 4541}
4542
4b00bebd 4543static void appendCommand(redisClient *c) {
4544 int retval;
4545 size_t totlen;
4546 robj *o;
4547
4548 o = lookupKeyWrite(c->db,c->argv[1]);
4549 if (o == NULL) {
4550 /* Create the key */
09241813 4551 retval = dbAdd(c->db,c->argv[1],c->argv[2]);
4b00bebd 4552 incrRefCount(c->argv[2]);
4553 totlen = stringObjectLen(c->argv[2]);
4554 } else {
4b00bebd 4555 if (o->type != REDIS_STRING) {
4556 addReply(c,shared.wrongtypeerr);
4557 return;
4558 }
4559 /* If the object is specially encoded or shared we have to make
4560 * a copy */
4561 if (o->refcount != 1 || o->encoding != REDIS_ENCODING_RAW) {
4562 robj *decoded = getDecodedObject(o);
4563
4564 o = createStringObject(decoded->ptr, sdslen(decoded->ptr));
4565 decrRefCount(decoded);
09241813 4566 dbReplace(c->db,c->argv[1],o);
4b00bebd 4567 }
4568 /* APPEND! */
4569 if (c->argv[2]->encoding == REDIS_ENCODING_RAW) {
4570 o->ptr = sdscatlen(o->ptr,
4571 c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
4572 } else {
4573 o->ptr = sdscatprintf(o->ptr, "%ld",
4574 (unsigned long) c->argv[2]->ptr);
4575 }
4576 totlen = sdslen(o->ptr);
4577 }
4578 server.dirty++;
4579 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",(unsigned long)totlen));
4580}
4581
39191553 4582static void substrCommand(redisClient *c) {
4583 robj *o;
4584 long start = atoi(c->argv[2]->ptr);
4585 long end = atoi(c->argv[3]->ptr);
dd88747b 4586 size_t rangelen, strlen;
4587 sds range;
39191553 4588
dd88747b 4589 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
4590 checkType(c,o,REDIS_STRING)) return;
39191553 4591
dd88747b 4592 o = getDecodedObject(o);
4593 strlen = sdslen(o->ptr);
8fe7fad7 4594
dd88747b 4595 /* convert negative indexes */
4596 if (start < 0) start = strlen+start;
4597 if (end < 0) end = strlen+end;
4598 if (start < 0) start = 0;
4599 if (end < 0) end = 0;
39191553 4600
dd88747b 4601 /* indexes sanity checks */
4602 if (start > end || (size_t)start >= strlen) {
4603 /* Out of range start or start > end result in null reply */
4604 addReply(c,shared.nullbulk);
4605 decrRefCount(o);
4606 return;
39191553 4607 }
dd88747b 4608 if ((size_t)end >= strlen) end = strlen-1;
4609 rangelen = (end-start)+1;
4610
4611 /* Return the result */
4612 addReplySds(c,sdscatprintf(sdsempty(),"$%zu\r\n",rangelen));
4613 range = sdsnewlen((char*)o->ptr+start,rangelen);
4614 addReplySds(c,range);
4615 addReply(c,shared.crlf);
4616 decrRefCount(o);
39191553 4617}
4618
ed9b544e 4619/* ========================= Type agnostic commands ========================= */
4620
4621static void delCommand(redisClient *c) {
5109cdff 4622 int deleted = 0, j;
4623
4624 for (j = 1; j < c->argc; j++) {
09241813 4625 if (dbDelete(c->db,c->argv[j])) {
37ab76c9 4626 touchWatchedKey(c->db,c->argv[j]);
5109cdff 4627 server.dirty++;
4628 deleted++;
4629 }
4630 }
482b672d 4631 addReplyLongLong(c,deleted);
ed9b544e 4632}
4633
4634static void existsCommand(redisClient *c) {
f4f06efc 4635 expireIfNeeded(c->db,c->argv[1]);
09241813 4636 if (dbExists(c->db,c->argv[1])) {
f4f06efc
PN
4637 addReply(c, shared.cone);
4638 } else {
4639 addReply(c, shared.czero);
4640 }
ed9b544e 4641}
4642
4643static void selectCommand(redisClient *c) {
4644 int id = atoi(c->argv[1]->ptr);
e0a62c7f 4645
ed9b544e 4646 if (selectDb(c,id) == REDIS_ERR) {
774e3047 4647 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 4648 } else {
4649 addReply(c,shared.ok);
4650 }
4651}
4652
4653static void randomkeyCommand(redisClient *c) {
dc4be23e 4654 robj *key;
e0a62c7f 4655
09241813 4656 if ((key = dbRandomKey(c->db)) == NULL) {
dc4be23e 4657 addReply(c,shared.nullbulk);
4658 return;
4659 }
4660
09241813 4661 addReplyBulk(c,key);
4662 decrRefCount(key);
ed9b544e 4663}
4664
4665static void keysCommand(redisClient *c) {
4666 dictIterator *di;
4667 dictEntry *de;
4668 sds pattern = c->argv[1]->ptr;
4669 int plen = sdslen(pattern);
a3f9eec2 4670 unsigned long numkeys = 0;
ed9b544e 4671 robj *lenobj = createObject(REDIS_STRING,NULL);
4672
3305306f 4673 di = dictGetIterator(c->db->dict);
ed9b544e 4674 addReply(c,lenobj);
4675 decrRefCount(lenobj);
4676 while((de = dictNext(di)) != NULL) {
09241813 4677 sds key = dictGetEntryKey(de);
4678 robj *keyobj;
3305306f 4679
ed9b544e 4680 if ((pattern[0] == '*' && pattern[1] == '\0') ||
4681 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
09241813 4682 keyobj = createStringObject(key,sdslen(key));
3305306f 4683 if (expireIfNeeded(c->db,keyobj) == 0) {
dd88747b 4684 addReplyBulk(c,keyobj);
3305306f 4685 numkeys++;
3305306f 4686 }
09241813 4687 decrRefCount(keyobj);
ed9b544e 4688 }
4689 }
4690 dictReleaseIterator(di);
a3f9eec2 4691 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",numkeys);
ed9b544e 4692}
4693
4694static void dbsizeCommand(redisClient *c) {
4695 addReplySds(c,
3305306f 4696 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 4697}
4698
4699static void lastsaveCommand(redisClient *c) {
4700 addReplySds(c,
c937aa89 4701 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 4702}
4703
4704static void typeCommand(redisClient *c) {
3305306f 4705 robj *o;
ed9b544e 4706 char *type;
3305306f 4707
4708 o = lookupKeyRead(c->db,c->argv[1]);
4709 if (o == NULL) {
c937aa89 4710 type = "+none";
ed9b544e 4711 } else {
ed9b544e 4712 switch(o->type) {
c937aa89 4713 case REDIS_STRING: type = "+string"; break;
4714 case REDIS_LIST: type = "+list"; break;
4715 case REDIS_SET: type = "+set"; break;
412a8bce 4716 case REDIS_ZSET: type = "+zset"; break;
ada386b2 4717 case REDIS_HASH: type = "+hash"; break;
4718 default: type = "+unknown"; break;
ed9b544e 4719 }
4720 }
4721 addReplySds(c,sdsnew(type));
4722 addReply(c,shared.crlf);
4723}
4724
4725static void saveCommand(redisClient *c) {
9d65a1bb 4726 if (server.bgsavechildpid != -1) {
05557f6d 4727 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
4728 return;
4729 }
f78fd11b 4730 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 4731 addReply(c,shared.ok);
4732 } else {
4733 addReply(c,shared.err);
4734 }
4735}
4736
4737static void bgsaveCommand(redisClient *c) {
9d65a1bb 4738 if (server.bgsavechildpid != -1) {
ed9b544e 4739 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
4740 return;
4741 }
f78fd11b 4742 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
49b99ab4 4743 char *status = "+Background saving started\r\n";
4744 addReplySds(c,sdsnew(status));
ed9b544e 4745 } else {
4746 addReply(c,shared.err);
4747 }
4748}
4749
4750static void shutdownCommand(redisClient *c) {
fab43727 4751 if (prepareForShutdown() == REDIS_OK)
4752 exit(0);
4753 addReplySds(c, sdsnew("-ERR Errors trying to SHUTDOWN. Check logs.\r\n"));
ed9b544e 4754}
4755
4756static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 4757 robj *o;
4758
4759 /* To use the same key as src and dst is probably an error */
4760 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 4761 addReply(c,shared.sameobjecterr);
ed9b544e 4762 return;
4763 }
4764
dd88747b 4765 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL)
ed9b544e 4766 return;
dd88747b 4767
ed9b544e 4768 incrRefCount(o);
3305306f 4769 deleteIfVolatile(c->db,c->argv[2]);
09241813 4770 if (dbAdd(c->db,c->argv[2],o) == REDIS_ERR) {
ed9b544e 4771 if (nx) {
4772 decrRefCount(o);
c937aa89 4773 addReply(c,shared.czero);
ed9b544e 4774 return;
4775 }
09241813 4776 dbReplace(c->db,c->argv[2],o);
ed9b544e 4777 }
09241813 4778 dbDelete(c->db,c->argv[1]);
b167f877 4779 touchWatchedKey(c->db,c->argv[2]);
ed9b544e 4780 server.dirty++;
c937aa89 4781 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 4782}
4783
4784static void renameCommand(redisClient *c) {
4785 renameGenericCommand(c,0);
4786}
4787
4788static void renamenxCommand(redisClient *c) {
4789 renameGenericCommand(c,1);
4790}
4791
4792static void moveCommand(redisClient *c) {
3305306f 4793 robj *o;
4794 redisDb *src, *dst;
ed9b544e 4795 int srcid;
4796
4797 /* Obtain source and target DB pointers */
3305306f 4798 src = c->db;
4799 srcid = c->db->id;
ed9b544e 4800 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 4801 addReply(c,shared.outofrangeerr);
ed9b544e 4802 return;
4803 }
3305306f 4804 dst = c->db;
4805 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 4806
4807 /* If the user is moving using as target the same
4808 * DB as the source DB it is probably an error. */
4809 if (src == dst) {
c937aa89 4810 addReply(c,shared.sameobjecterr);
ed9b544e 4811 return;
4812 }
4813
4814 /* Check if the element exists and get a reference */
3305306f 4815 o = lookupKeyWrite(c->db,c->argv[1]);
4816 if (!o) {
c937aa89 4817 addReply(c,shared.czero);
ed9b544e 4818 return;
4819 }
4820
4821 /* Try to add the element to the target DB */
3305306f 4822 deleteIfVolatile(dst,c->argv[1]);
09241813 4823 if (dbAdd(dst,c->argv[1],o) == REDIS_ERR) {
c937aa89 4824 addReply(c,shared.czero);
ed9b544e 4825 return;
4826 }
ed9b544e 4827 incrRefCount(o);
4828
4829 /* OK! key moved, free the entry in the source DB */
09241813 4830 dbDelete(src,c->argv[1]);
ed9b544e 4831 server.dirty++;
c937aa89 4832 addReply(c,shared.cone);
ed9b544e 4833}
4834
4835/* =================================== Lists ================================ */
4836static void pushGenericCommand(redisClient *c, int where) {
4837 robj *lobj;
ed9b544e 4838 list *list;
3305306f 4839
4840 lobj = lookupKeyWrite(c->db,c->argv[1]);
4841 if (lobj == NULL) {
95242ab5 4842 if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
520b5a33 4843 addReply(c,shared.cone);
95242ab5 4844 return;
4845 }
ed9b544e 4846 lobj = createListObject();
4847 list = lobj->ptr;
4848 if (where == REDIS_HEAD) {
6b47e12e 4849 listAddNodeHead(list,c->argv[2]);
ed9b544e 4850 } else {
6b47e12e 4851 listAddNodeTail(list,c->argv[2]);
ed9b544e 4852 }
ed9b544e 4853 incrRefCount(c->argv[2]);
09241813 4854 dbAdd(c->db,c->argv[1],lobj);
ed9b544e 4855 } else {
ed9b544e 4856 if (lobj->type != REDIS_LIST) {
4857 addReply(c,shared.wrongtypeerr);
4858 return;
4859 }
95242ab5 4860 if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
520b5a33 4861 addReply(c,shared.cone);
95242ab5 4862 return;
4863 }
ed9b544e 4864 list = lobj->ptr;
4865 if (where == REDIS_HEAD) {
6b47e12e 4866 listAddNodeHead(list,c->argv[2]);
ed9b544e 4867 } else {
6b47e12e 4868 listAddNodeTail(list,c->argv[2]);
ed9b544e 4869 }
4870 incrRefCount(c->argv[2]);
4871 }
4872 server.dirty++;
482b672d 4873 addReplyLongLong(c,listLength(list));
ed9b544e 4874}
4875
4876static void lpushCommand(redisClient *c) {
4877 pushGenericCommand(c,REDIS_HEAD);
4878}
4879
4880static void rpushCommand(redisClient *c) {
4881 pushGenericCommand(c,REDIS_TAIL);
4882}
4883
4884static void llenCommand(redisClient *c) {
3305306f 4885 robj *o;
ed9b544e 4886 list *l;
dd88747b 4887
4888 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
4889 checkType(c,o,REDIS_LIST)) return;
e0a62c7f 4890
dd88747b 4891 l = o->ptr;
4892 addReplyUlong(c,listLength(l));
ed9b544e 4893}
4894
4895static void lindexCommand(redisClient *c) {
3305306f 4896 robj *o;
ed9b544e 4897 int index = atoi(c->argv[2]->ptr);
dd88747b 4898 list *list;
4899 listNode *ln;
4900
4901 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
4902 checkType(c,o,REDIS_LIST)) return;
4903 list = o->ptr;
4904
4905 ln = listIndex(list, index);
4906 if (ln == NULL) {
c937aa89 4907 addReply(c,shared.nullbulk);
ed9b544e 4908 } else {
dd88747b 4909 robj *ele = listNodeValue(ln);
4910 addReplyBulk(c,ele);
ed9b544e 4911 }
4912}
4913
4914static void lsetCommand(redisClient *c) {
3305306f 4915 robj *o;
ed9b544e 4916 int index = atoi(c->argv[2]->ptr);
dd88747b 4917 list *list;
4918 listNode *ln;
4919
4920 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nokeyerr)) == NULL ||
4921 checkType(c,o,REDIS_LIST)) return;
4922 list = o->ptr;
4923
4924 ln = listIndex(list, index);
4925 if (ln == NULL) {
4926 addReply(c,shared.outofrangeerr);
ed9b544e 4927 } else {
dd88747b 4928 robj *ele = listNodeValue(ln);
ed9b544e 4929
dd88747b 4930 decrRefCount(ele);
4931 listNodeValue(ln) = c->argv[3];
4932 incrRefCount(c->argv[3]);
4933 addReply(c,shared.ok);
4934 server.dirty++;
ed9b544e 4935 }
4936}
4937
4938static void popGenericCommand(redisClient *c, int where) {
3305306f 4939 robj *o;
dd88747b 4940 list *list;
4941 listNode *ln;
3305306f 4942
dd88747b 4943 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
4944 checkType(c,o,REDIS_LIST)) return;
4945 list = o->ptr;
ed9b544e 4946
dd88747b 4947 if (where == REDIS_HEAD)
4948 ln = listFirst(list);
4949 else
4950 ln = listLast(list);
ed9b544e 4951
dd88747b 4952 if (ln == NULL) {
4953 addReply(c,shared.nullbulk);
4954 } else {
4955 robj *ele = listNodeValue(ln);
4956 addReplyBulk(c,ele);
4957 listDelNode(list,ln);
09241813 4958 if (listLength(list) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 4959 server.dirty++;
ed9b544e 4960 }
4961}
4962
4963static void lpopCommand(redisClient *c) {
4964 popGenericCommand(c,REDIS_HEAD);
4965}
4966
4967static void rpopCommand(redisClient *c) {
4968 popGenericCommand(c,REDIS_TAIL);
4969}
4970
4971static void lrangeCommand(redisClient *c) {
3305306f 4972 robj *o;
ed9b544e 4973 int start = atoi(c->argv[2]->ptr);
4974 int end = atoi(c->argv[3]->ptr);
dd88747b 4975 int llen;
4976 int rangelen, j;
4977 list *list;
4978 listNode *ln;
4979 robj *ele;
4980
4e27f268 4981 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
4982 || checkType(c,o,REDIS_LIST)) return;
dd88747b 4983 list = o->ptr;
4984 llen = listLength(list);
4985
4986 /* convert negative indexes */
4987 if (start < 0) start = llen+start;
4988 if (end < 0) end = llen+end;
4989 if (start < 0) start = 0;
4990 if (end < 0) end = 0;
4991
4992 /* indexes sanity checks */
4993 if (start > end || start >= llen) {
4994 /* Out of range start or start > end result in empty list */
4995 addReply(c,shared.emptymultibulk);
4996 return;
4997 }
4998 if (end >= llen) end = llen-1;
4999 rangelen = (end-start)+1;
3305306f 5000
dd88747b 5001 /* Return the result in form of a multi-bulk reply */
5002 ln = listIndex(list, start);
5003 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
5004 for (j = 0; j < rangelen; j++) {
5005 ele = listNodeValue(ln);
5006 addReplyBulk(c,ele);
5007 ln = ln->next;
ed9b544e 5008 }
5009}
5010
5011static void ltrimCommand(redisClient *c) {
3305306f 5012 robj *o;
ed9b544e 5013 int start = atoi(c->argv[2]->ptr);
5014 int end = atoi(c->argv[3]->ptr);
dd88747b 5015 int llen;
5016 int j, ltrim, rtrim;
5017 list *list;
5018 listNode *ln;
5019
5020 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.ok)) == NULL ||
5021 checkType(c,o,REDIS_LIST)) return;
5022 list = o->ptr;
5023 llen = listLength(list);
5024
5025 /* convert negative indexes */
5026 if (start < 0) start = llen+start;
5027 if (end < 0) end = llen+end;
5028 if (start < 0) start = 0;
5029 if (end < 0) end = 0;
5030
5031 /* indexes sanity checks */
5032 if (start > end || start >= llen) {
5033 /* Out of range start or start > end result in empty list */
5034 ltrim = llen;
5035 rtrim = 0;
ed9b544e 5036 } else {
dd88747b 5037 if (end >= llen) end = llen-1;
5038 ltrim = start;
5039 rtrim = llen-end-1;
5040 }
ed9b544e 5041
dd88747b 5042 /* Remove list elements to perform the trim */
5043 for (j = 0; j < ltrim; j++) {
5044 ln = listFirst(list);
5045 listDelNode(list,ln);
5046 }
5047 for (j = 0; j < rtrim; j++) {
5048 ln = listLast(list);
5049 listDelNode(list,ln);
ed9b544e 5050 }
09241813 5051 if (listLength(list) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5052 server.dirty++;
5053 addReply(c,shared.ok);
ed9b544e 5054}
5055
5056static void lremCommand(redisClient *c) {
3305306f 5057 robj *o;
dd88747b 5058 list *list;
5059 listNode *ln, *next;
5060 int toremove = atoi(c->argv[2]->ptr);
5061 int removed = 0;
5062 int fromtail = 0;
a4d1ba9a 5063
dd88747b 5064 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
5065 checkType(c,o,REDIS_LIST)) return;
5066 list = o->ptr;
5067
5068 if (toremove < 0) {
5069 toremove = -toremove;
5070 fromtail = 1;
5071 }
5072 ln = fromtail ? list->tail : list->head;
5073 while (ln) {
5074 robj *ele = listNodeValue(ln);
5075
5076 next = fromtail ? ln->prev : ln->next;
bf028098 5077 if (equalStringObjects(ele,c->argv[3])) {
dd88747b 5078 listDelNode(list,ln);
5079 server.dirty++;
5080 removed++;
5081 if (toremove && removed == toremove) break;
ed9b544e 5082 }
dd88747b 5083 ln = next;
ed9b544e 5084 }
09241813 5085 if (listLength(list) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5086 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 5087}
5088
12f9d551 5089/* This is the semantic of this command:
0f5f7e9a 5090 * RPOPLPUSH srclist dstlist:
12f9d551 5091 * IF LLEN(srclist) > 0
5092 * element = RPOP srclist
5093 * LPUSH dstlist element
5094 * RETURN element
5095 * ELSE
5096 * RETURN nil
5097 * END
5098 * END
5099 *
5100 * The idea is to be able to get an element from a list in a reliable way
5101 * since the element is not just returned but pushed against another list
5102 * as well. This command was originally proposed by Ezra Zygmuntowicz.
5103 */
0f5f7e9a 5104static void rpoplpushcommand(redisClient *c) {
12f9d551 5105 robj *sobj;
dd88747b 5106 list *srclist;
5107 listNode *ln;
5108
5109 if ((sobj = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
5110 checkType(c,sobj,REDIS_LIST)) return;
5111 srclist = sobj->ptr;
5112 ln = listLast(srclist);
12f9d551 5113
dd88747b 5114 if (ln == NULL) {
12f9d551 5115 addReply(c,shared.nullbulk);
5116 } else {
dd88747b 5117 robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
5118 robj *ele = listNodeValue(ln);
5119 list *dstlist;
e20fb74f 5120
dd88747b 5121 if (dobj && dobj->type != REDIS_LIST) {
5122 addReply(c,shared.wrongtypeerr);
5123 return;
5124 }
12f9d551 5125
dd88747b 5126 /* Add the element to the target list (unless it's directly
5127 * passed to some BLPOP-ing client */
5128 if (!handleClientsWaitingListPush(c,c->argv[2],ele)) {
5129 if (dobj == NULL) {
5130 /* Create the list if the key does not exist */
5131 dobj = createListObject();
09241813 5132 dbAdd(c->db,c->argv[2],dobj);
12f9d551 5133 }
dd88747b 5134 dstlist = dobj->ptr;
5135 listAddNodeHead(dstlist,ele);
5136 incrRefCount(ele);
12f9d551 5137 }
dd88747b 5138
5139 /* Send the element to the client as reply as well */
5140 addReplyBulk(c,ele);
5141
5142 /* Finally remove the element from the source list */
5143 listDelNode(srclist,ln);
09241813 5144 if (listLength(srclist) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5145 server.dirty++;
12f9d551 5146 }
5147}
5148
ed9b544e 5149/* ==================================== Sets ================================ */
5150
5151static void saddCommand(redisClient *c) {
ed9b544e 5152 robj *set;
5153
3305306f 5154 set = lookupKeyWrite(c->db,c->argv[1]);
5155 if (set == NULL) {
ed9b544e 5156 set = createSetObject();
09241813 5157 dbAdd(c->db,c->argv[1],set);
ed9b544e 5158 } else {
ed9b544e 5159 if (set->type != REDIS_SET) {
c937aa89 5160 addReply(c,shared.wrongtypeerr);
ed9b544e 5161 return;
5162 }
5163 }
5164 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
5165 incrRefCount(c->argv[2]);
5166 server.dirty++;
c937aa89 5167 addReply(c,shared.cone);
ed9b544e 5168 } else {
c937aa89 5169 addReply(c,shared.czero);
ed9b544e 5170 }
5171}
5172
5173static void sremCommand(redisClient *c) {
3305306f 5174 robj *set;
ed9b544e 5175
dd88747b 5176 if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
5177 checkType(c,set,REDIS_SET)) return;
5178
5179 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
5180 server.dirty++;
5181 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
09241813 5182 if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5183 addReply(c,shared.cone);
ed9b544e 5184 } else {
dd88747b 5185 addReply(c,shared.czero);
ed9b544e 5186 }
5187}
5188
a4460ef4 5189static void smoveCommand(redisClient *c) {
5190 robj *srcset, *dstset;
5191
5192 srcset = lookupKeyWrite(c->db,c->argv[1]);
5193 dstset = lookupKeyWrite(c->db,c->argv[2]);
5194
5195 /* If the source key does not exist return 0, if it's of the wrong type
5196 * raise an error */
5197 if (srcset == NULL || srcset->type != REDIS_SET) {
5198 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
5199 return;
5200 }
5201 /* Error if the destination key is not a set as well */
5202 if (dstset && dstset->type != REDIS_SET) {
5203 addReply(c,shared.wrongtypeerr);
5204 return;
5205 }
5206 /* Remove the element from the source set */
5207 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
5208 /* Key not found in the src set! return zero */
5209 addReply(c,shared.czero);
5210 return;
5211 }
3ea27d37 5212 if (dictSize((dict*)srcset->ptr) == 0 && srcset != dstset)
09241813 5213 dbDelete(c->db,c->argv[1]);
a4460ef4 5214 server.dirty++;
5215 /* Add the element to the destination set */
5216 if (!dstset) {
5217 dstset = createSetObject();
09241813 5218 dbAdd(c->db,c->argv[2],dstset);
a4460ef4 5219 }
5220 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
5221 incrRefCount(c->argv[3]);
5222 addReply(c,shared.cone);
5223}
5224
ed9b544e 5225static void sismemberCommand(redisClient *c) {
3305306f 5226 robj *set;
ed9b544e 5227
dd88747b 5228 if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
5229 checkType(c,set,REDIS_SET)) return;
5230
5231 if (dictFind(set->ptr,c->argv[2]))
5232 addReply(c,shared.cone);
5233 else
c937aa89 5234 addReply(c,shared.czero);
ed9b544e 5235}
5236
5237static void scardCommand(redisClient *c) {
3305306f 5238 robj *o;
ed9b544e 5239 dict *s;
dd88747b 5240
5241 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
5242 checkType(c,o,REDIS_SET)) return;
e0a62c7f 5243
dd88747b 5244 s = o->ptr;
5245 addReplyUlong(c,dictSize(s));
ed9b544e 5246}
5247
12fea928 5248static void spopCommand(redisClient *c) {
5249 robj *set;
5250 dictEntry *de;
5251
dd88747b 5252 if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
5253 checkType(c,set,REDIS_SET)) return;
5254
5255 de = dictGetRandomKey(set->ptr);
5256 if (de == NULL) {
12fea928 5257 addReply(c,shared.nullbulk);
5258 } else {
dd88747b 5259 robj *ele = dictGetEntryKey(de);
12fea928 5260
dd88747b 5261 addReplyBulk(c,ele);
5262 dictDelete(set->ptr,ele);
5263 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
09241813 5264 if (dictSize((dict*)set->ptr) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5265 server.dirty++;
12fea928 5266 }
5267}
5268
2abb95a9 5269static void srandmemberCommand(redisClient *c) {
5270 robj *set;
5271 dictEntry *de;
5272
dd88747b 5273 if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
5274 checkType(c,set,REDIS_SET)) return;
5275
5276 de = dictGetRandomKey(set->ptr);
5277 if (de == NULL) {
2abb95a9 5278 addReply(c,shared.nullbulk);
5279 } else {
dd88747b 5280 robj *ele = dictGetEntryKey(de);
2abb95a9 5281
dd88747b 5282 addReplyBulk(c,ele);
2abb95a9 5283 }
5284}
5285
ed9b544e 5286static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
5287 dict **d1 = (void*) s1, **d2 = (void*) s2;
5288
3305306f 5289 return dictSize(*d1)-dictSize(*d2);
ed9b544e 5290}
5291
682ac724 5292static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) {
ed9b544e 5293 dict **dv = zmalloc(sizeof(dict*)*setsnum);
5294 dictIterator *di;
5295 dictEntry *de;
5296 robj *lenobj = NULL, *dstset = NULL;
682ac724 5297 unsigned long j, cardinality = 0;
ed9b544e 5298
ed9b544e 5299 for (j = 0; j < setsnum; j++) {
5300 robj *setobj;
3305306f 5301
5302 setobj = dstkey ?
5303 lookupKeyWrite(c->db,setskeys[j]) :
5304 lookupKeyRead(c->db,setskeys[j]);
5305 if (!setobj) {
ed9b544e 5306 zfree(dv);
5faa6025 5307 if (dstkey) {
09241813 5308 if (dbDelete(c->db,dstkey))
fdcaae84 5309 server.dirty++;
0d36ded0 5310 addReply(c,shared.czero);
5faa6025 5311 } else {
4e27f268 5312 addReply(c,shared.emptymultibulk);
5faa6025 5313 }
ed9b544e 5314 return;
5315 }
ed9b544e 5316 if (setobj->type != REDIS_SET) {
5317 zfree(dv);
c937aa89 5318 addReply(c,shared.wrongtypeerr);
ed9b544e 5319 return;
5320 }
5321 dv[j] = setobj->ptr;
5322 }
5323 /* Sort sets from the smallest to largest, this will improve our
5324 * algorithm's performace */
5325 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
5326
5327 /* The first thing we should output is the total number of elements...
5328 * since this is a multi-bulk write, but at this stage we don't know
5329 * the intersection set size, so we use a trick, append an empty object
5330 * to the output list and save the pointer to later modify it with the
5331 * right length */
5332 if (!dstkey) {
5333 lenobj = createObject(REDIS_STRING,NULL);
5334 addReply(c,lenobj);
5335 decrRefCount(lenobj);
5336 } else {
5337 /* If we have a target key where to store the resulting set
5338 * create this key with an empty set inside */
5339 dstset = createSetObject();
ed9b544e 5340 }
5341
5342 /* Iterate all the elements of the first (smallest) set, and test
5343 * the element against all the other sets, if at least one set does
5344 * not include the element it is discarded */
5345 di = dictGetIterator(dv[0]);
ed9b544e 5346
5347 while((de = dictNext(di)) != NULL) {
5348 robj *ele;
5349
5350 for (j = 1; j < setsnum; j++)
5351 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
5352 if (j != setsnum)
5353 continue; /* at least one set does not contain the member */
5354 ele = dictGetEntryKey(de);
5355 if (!dstkey) {
dd88747b 5356 addReplyBulk(c,ele);
ed9b544e 5357 cardinality++;
5358 } else {
5359 dictAdd(dstset->ptr,ele,NULL);
5360 incrRefCount(ele);
5361 }
5362 }
5363 dictReleaseIterator(di);
5364
83cdfe18 5365 if (dstkey) {
3ea27d37 5366 /* Store the resulting set into the target, if the intersection
5367 * is not an empty set. */
09241813 5368 dbDelete(c->db,dstkey);
3ea27d37 5369 if (dictSize((dict*)dstset->ptr) > 0) {
09241813 5370 dbAdd(c->db,dstkey,dstset);
482b672d 5371 addReplyLongLong(c,dictSize((dict*)dstset->ptr));
3ea27d37 5372 } else {
5373 decrRefCount(dstset);
d36c4e97 5374 addReply(c,shared.czero);
3ea27d37 5375 }
40d224a9 5376 server.dirty++;
d36c4e97 5377 } else {
5378 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality);
40d224a9 5379 }
ed9b544e 5380 zfree(dv);
5381}
5382
5383static void sinterCommand(redisClient *c) {
5384 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
5385}
5386
5387static void sinterstoreCommand(redisClient *c) {
5388 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
5389}
5390
f4f56e1d 5391#define REDIS_OP_UNION 0
5392#define REDIS_OP_DIFF 1
2830ca53 5393#define REDIS_OP_INTER 2
f4f56e1d 5394
5395static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 5396 dict **dv = zmalloc(sizeof(dict*)*setsnum);
5397 dictIterator *di;
5398 dictEntry *de;
f4f56e1d 5399 robj *dstset = NULL;
40d224a9 5400 int j, cardinality = 0;
5401
40d224a9 5402 for (j = 0; j < setsnum; j++) {
5403 robj *setobj;
5404
5405 setobj = dstkey ?
5406 lookupKeyWrite(c->db,setskeys[j]) :
5407 lookupKeyRead(c->db,setskeys[j]);
5408 if (!setobj) {
5409 dv[j] = NULL;
5410 continue;
5411 }
5412 if (setobj->type != REDIS_SET) {
5413 zfree(dv);
5414 addReply(c,shared.wrongtypeerr);
5415 return;
5416 }
5417 dv[j] = setobj->ptr;
5418 }
5419
5420 /* We need a temp set object to store our union. If the dstkey
5421 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
5422 * this set object will be the resulting object to set into the target key*/
5423 dstset = createSetObject();
5424
40d224a9 5425 /* Iterate all the elements of all the sets, add every element a single
5426 * time to the result set */
5427 for (j = 0; j < setsnum; j++) {
51829ed3 5428 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 5429 if (!dv[j]) continue; /* non existing keys are like empty sets */
5430
5431 di = dictGetIterator(dv[j]);
40d224a9 5432
5433 while((de = dictNext(di)) != NULL) {
5434 robj *ele;
5435
5436 /* dictAdd will not add the same element multiple times */
5437 ele = dictGetEntryKey(de);
f4f56e1d 5438 if (op == REDIS_OP_UNION || j == 0) {
5439 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
5440 incrRefCount(ele);
40d224a9 5441 cardinality++;
5442 }
f4f56e1d 5443 } else if (op == REDIS_OP_DIFF) {
5444 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
5445 cardinality--;
5446 }
40d224a9 5447 }
5448 }
5449 dictReleaseIterator(di);
51829ed3 5450
d36c4e97 5451 /* result set is empty? Exit asap. */
5452 if (op == REDIS_OP_DIFF && cardinality == 0) break;
40d224a9 5453 }
5454
f4f56e1d 5455 /* Output the content of the resulting set, if not in STORE mode */
5456 if (!dstkey) {
5457 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
5458 di = dictGetIterator(dstset->ptr);
f4f56e1d 5459 while((de = dictNext(di)) != NULL) {
5460 robj *ele;
5461
5462 ele = dictGetEntryKey(de);
dd88747b 5463 addReplyBulk(c,ele);
f4f56e1d 5464 }
5465 dictReleaseIterator(di);
d36c4e97 5466 decrRefCount(dstset);
83cdfe18
AG
5467 } else {
5468 /* If we have a target key where to store the resulting set
5469 * create this key with the result set inside */
09241813 5470 dbDelete(c->db,dstkey);
3ea27d37 5471 if (dictSize((dict*)dstset->ptr) > 0) {
09241813 5472 dbAdd(c->db,dstkey,dstset);
482b672d 5473 addReplyLongLong(c,dictSize((dict*)dstset->ptr));
3ea27d37 5474 } else {
5475 decrRefCount(dstset);
d36c4e97 5476 addReply(c,shared.czero);
3ea27d37 5477 }
40d224a9 5478 server.dirty++;
5479 }
5480 zfree(dv);
5481}
5482
5483static void sunionCommand(redisClient *c) {
f4f56e1d 5484 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 5485}
5486
5487static void sunionstoreCommand(redisClient *c) {
f4f56e1d 5488 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
5489}
5490
5491static void sdiffCommand(redisClient *c) {
5492 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
5493}
5494
5495static void sdiffstoreCommand(redisClient *c) {
5496 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 5497}
5498
6b47e12e 5499/* ==================================== ZSets =============================== */
5500
5501/* ZSETs are ordered sets using two data structures to hold the same elements
5502 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
5503 * data structure.
5504 *
5505 * The elements are added to an hash table mapping Redis objects to scores.
5506 * At the same time the elements are added to a skip list mapping scores
5507 * to Redis objects (so objects are sorted by scores in this "view"). */
5508
5509/* This skiplist implementation is almost a C translation of the original
5510 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
5511 * Alternative to Balanced Trees", modified in three ways:
5512 * a) this implementation allows for repeated values.
5513 * b) the comparison is not just by key (our 'score') but by satellite data.
5514 * c) there is a back pointer, so it's a doubly linked list with the back
5515 * pointers being only at "level 1". This allows to traverse the list
5516 * from tail to head, useful for ZREVRANGE. */
5517
5518static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
5519 zskiplistNode *zn = zmalloc(sizeof(*zn));
5520
5521 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
2f4dd7e0 5522 if (level > 1)
2b37892e 5523 zn->span = zmalloc(sizeof(unsigned int) * (level - 1));
2f4dd7e0 5524 else
5525 zn->span = NULL;
6b47e12e 5526 zn->score = score;
5527 zn->obj = obj;
5528 return zn;
5529}
5530
5531static zskiplist *zslCreate(void) {
5532 int j;
5533 zskiplist *zsl;
e0a62c7f 5534
6b47e12e 5535 zsl = zmalloc(sizeof(*zsl));
5536 zsl->level = 1;
cc812361 5537 zsl->length = 0;
6b47e12e 5538 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
69d95c3e 5539 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
6b47e12e 5540 zsl->header->forward[j] = NULL;
94e543b5 5541
5542 /* span has space for ZSKIPLIST_MAXLEVEL-1 elements */
5543 if (j < ZSKIPLIST_MAXLEVEL-1)
5544 zsl->header->span[j] = 0;
69d95c3e 5545 }
e3870fab 5546 zsl->header->backward = NULL;
5547 zsl->tail = NULL;
6b47e12e 5548 return zsl;
5549}
5550
fd8ccf44 5551static void zslFreeNode(zskiplistNode *node) {
5552 decrRefCount(node->obj);
ad807e6f 5553 zfree(node->forward);
69d95c3e 5554 zfree(node->span);
fd8ccf44 5555 zfree(node);
5556}
5557
5558static void zslFree(zskiplist *zsl) {
ad807e6f 5559 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 5560
ad807e6f 5561 zfree(zsl->header->forward);
69d95c3e 5562 zfree(zsl->header->span);
ad807e6f 5563 zfree(zsl->header);
fd8ccf44 5564 while(node) {
599379dd 5565 next = node->forward[0];
fd8ccf44 5566 zslFreeNode(node);
5567 node = next;
5568 }
ad807e6f 5569 zfree(zsl);
fd8ccf44 5570}
5571
6b47e12e 5572static int zslRandomLevel(void) {
5573 int level = 1;
5574 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
5575 level += 1;
10c2baa5 5576 return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
6b47e12e 5577}
5578
5579static void zslInsert(zskiplist *zsl, double score, robj *obj) {
5580 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
2b37892e 5581 unsigned int rank[ZSKIPLIST_MAXLEVEL];
6b47e12e 5582 int i, level;
5583
5584 x = zsl->header;
5585 for (i = zsl->level-1; i >= 0; i--) {
2b37892e
PN
5586 /* store rank that is crossed to reach the insert position */
5587 rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
69d95c3e 5588
9d60e6e4 5589 while (x->forward[i] &&
5590 (x->forward[i]->score < score ||
5591 (x->forward[i]->score == score &&
69d95c3e 5592 compareStringObjects(x->forward[i]->obj,obj) < 0))) {
a50ea45c 5593 rank[i] += i > 0 ? x->span[i-1] : 1;
6b47e12e 5594 x = x->forward[i];
69d95c3e 5595 }
6b47e12e 5596 update[i] = x;
5597 }
6b47e12e 5598 /* we assume the key is not already inside, since we allow duplicated
5599 * scores, and the re-insertion of score and redis object should never
5600 * happpen since the caller of zslInsert() should test in the hash table
5601 * if the element is already inside or not. */
5602 level = zslRandomLevel();
5603 if (level > zsl->level) {
69d95c3e 5604 for (i = zsl->level; i < level; i++) {
2b37892e 5605 rank[i] = 0;
6b47e12e 5606 update[i] = zsl->header;
2b37892e 5607 update[i]->span[i-1] = zsl->length;
69d95c3e 5608 }
6b47e12e 5609 zsl->level = level;
5610 }
5611 x = zslCreateNode(level,score,obj);
5612 for (i = 0; i < level; i++) {
5613 x->forward[i] = update[i]->forward[i];
5614 update[i]->forward[i] = x;
69d95c3e
PN
5615
5616 /* update span covered by update[i] as x is inserted here */
2b37892e
PN
5617 if (i > 0) {
5618 x->span[i-1] = update[i]->span[i-1] - (rank[0] - rank[i]);
5619 update[i]->span[i-1] = (rank[0] - rank[i]) + 1;
5620 }
6b47e12e 5621 }
69d95c3e
PN
5622
5623 /* increment span for untouched levels */
5624 for (i = level; i < zsl->level; i++) {
2b37892e 5625 update[i]->span[i-1]++;
69d95c3e
PN
5626 }
5627
bb975144 5628 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 5629 if (x->forward[0])
5630 x->forward[0]->backward = x;
5631 else
5632 zsl->tail = x;
cc812361 5633 zsl->length++;
6b47e12e 5634}
5635
84105336
PN
5636/* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */
5637void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
5638 int i;
5639 for (i = 0; i < zsl->level; i++) {
5640 if (update[i]->forward[i] == x) {
5641 if (i > 0) {
5642 update[i]->span[i-1] += x->span[i-1] - 1;
5643 }
5644 update[i]->forward[i] = x->forward[i];
5645 } else {
5646 /* invariant: i > 0, because update[0]->forward[0]
5647 * is always equal to x */
5648 update[i]->span[i-1] -= 1;
5649 }
5650 }
5651 if (x->forward[0]) {
5652 x->forward[0]->backward = x->backward;
5653 } else {
5654 zsl->tail = x->backward;
5655 }
5656 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
5657 zsl->level--;
5658 zsl->length--;
5659}
5660
50c55df5 5661/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 5662static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 5663 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
5664 int i;
5665
5666 x = zsl->header;
5667 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 5668 while (x->forward[i] &&
5669 (x->forward[i]->score < score ||
5670 (x->forward[i]->score == score &&
5671 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 5672 x = x->forward[i];
5673 update[i] = x;
5674 }
5675 /* We may have multiple elements with the same score, what we need
5676 * is to find the element with both the right score and object. */
5677 x = x->forward[0];
bf028098 5678 if (x && score == x->score && equalStringObjects(x->obj,obj)) {
84105336 5679 zslDeleteNode(zsl, x, update);
9d60e6e4 5680 zslFreeNode(x);
9d60e6e4 5681 return 1;
5682 } else {
5683 return 0; /* not found */
e197b441 5684 }
5685 return 0; /* not found */
fd8ccf44 5686}
5687
1807985b 5688/* Delete all the elements with score between min and max from the skiplist.
5689 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
5690 * Note that this function takes the reference to the hash table view of the
5691 * sorted set, in order to remove the elements from the hash table too. */
f84d3933 5692static unsigned long zslDeleteRangeByScore(zskiplist *zsl, double min, double max, dict *dict) {
1807985b 5693 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
5694 unsigned long removed = 0;
5695 int i;
5696
5697 x = zsl->header;
5698 for (i = zsl->level-1; i >= 0; i--) {
5699 while (x->forward[i] && x->forward[i]->score < min)
5700 x = x->forward[i];
5701 update[i] = x;
5702 }
5703 /* We may have multiple elements with the same score, what we need
5704 * is to find the element with both the right score and object. */
5705 x = x->forward[0];
5706 while (x && x->score <= max) {
84105336
PN
5707 zskiplistNode *next = x->forward[0];
5708 zslDeleteNode(zsl, x, update);
1807985b 5709 dictDelete(dict,x->obj);
5710 zslFreeNode(x);
1807985b 5711 removed++;
5712 x = next;
5713 }
5714 return removed; /* not found */
5715}
1807985b 5716
9212eafd 5717/* Delete all the elements with rank between start and end from the skiplist.
2424490f 5718 * Start and end are inclusive. Note that start and end need to be 1-based */
9212eafd
PN
5719static unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
5720 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
5721 unsigned long traversed = 0, removed = 0;
5722 int i;
5723
9212eafd
PN
5724 x = zsl->header;
5725 for (i = zsl->level-1; i >= 0; i--) {
5726 while (x->forward[i] && (traversed + (i > 0 ? x->span[i-1] : 1)) < start) {
5727 traversed += i > 0 ? x->span[i-1] : 1;
5728 x = x->forward[i];
1807985b 5729 }
9212eafd
PN
5730 update[i] = x;
5731 }
5732
5733 traversed++;
5734 x = x->forward[0];
5735 while (x && traversed <= end) {
84105336
PN
5736 zskiplistNode *next = x->forward[0];
5737 zslDeleteNode(zsl, x, update);
1807985b 5738 dictDelete(dict,x->obj);
5739 zslFreeNode(x);
1807985b 5740 removed++;
9212eafd 5741 traversed++;
1807985b 5742 x = next;
5743 }
9212eafd 5744 return removed;
1807985b 5745}
5746
50c55df5 5747/* Find the first node having a score equal or greater than the specified one.
5748 * Returns NULL if there is no match. */
5749static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
5750 zskiplistNode *x;
5751 int i;
5752
5753 x = zsl->header;
5754 for (i = zsl->level-1; i >= 0; i--) {
5755 while (x->forward[i] && x->forward[i]->score < score)
5756 x = x->forward[i];
5757 }
5758 /* We may have multiple elements with the same score, what we need
5759 * is to find the element with both the right score and object. */
5760 return x->forward[0];
5761}
5762
27b0ccca
PN
5763/* Find the rank for an element by both score and key.
5764 * Returns 0 when the element cannot be found, rank otherwise.
5765 * Note that the rank is 1-based due to the span of zsl->header to the
5766 * first element. */
5767static unsigned long zslGetRank(zskiplist *zsl, double score, robj *o) {
5768 zskiplistNode *x;
5769 unsigned long rank = 0;
5770 int i;
5771
5772 x = zsl->header;
5773 for (i = zsl->level-1; i >= 0; i--) {
5774 while (x->forward[i] &&
5775 (x->forward[i]->score < score ||
5776 (x->forward[i]->score == score &&
5777 compareStringObjects(x->forward[i]->obj,o) <= 0))) {
a50ea45c 5778 rank += i > 0 ? x->span[i-1] : 1;
27b0ccca
PN
5779 x = x->forward[i];
5780 }
5781
5782 /* x might be equal to zsl->header, so test if obj is non-NULL */
bf028098 5783 if (x->obj && equalStringObjects(x->obj,o)) {
27b0ccca
PN
5784 return rank;
5785 }
5786 }
5787 return 0;
5788}
5789
e74825c2
PN
5790/* Finds an element by its rank. The rank argument needs to be 1-based. */
5791zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
5792 zskiplistNode *x;
5793 unsigned long traversed = 0;
5794 int i;
5795
5796 x = zsl->header;
5797 for (i = zsl->level-1; i >= 0; i--) {
dd88747b 5798 while (x->forward[i] && (traversed + (i>0 ? x->span[i-1] : 1)) <= rank)
5799 {
a50ea45c 5800 traversed += i > 0 ? x->span[i-1] : 1;
e74825c2
PN
5801 x = x->forward[i];
5802 }
e74825c2
PN
5803 if (traversed == rank) {
5804 return x;
5805 }
5806 }
5807 return NULL;
5808}
5809
fd8ccf44 5810/* The actual Z-commands implementations */
5811
7db723ad 5812/* This generic command implements both ZADD and ZINCRBY.
e2665397 5813 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
7db723ad 5814 * the increment if the operation is a ZINCRBY (doincrement == 1). */
e2665397 5815static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
fd8ccf44 5816 robj *zsetobj;
5817 zset *zs;
5818 double *score;
5819
5fc9229c 5820 if (isnan(scoreval)) {
5821 addReplySds(c,sdsnew("-ERR provide score is Not A Number (nan)\r\n"));
5822 return;
5823 }
5824
e2665397 5825 zsetobj = lookupKeyWrite(c->db,key);
fd8ccf44 5826 if (zsetobj == NULL) {
5827 zsetobj = createZsetObject();
09241813 5828 dbAdd(c->db,key,zsetobj);
fd8ccf44 5829 } else {
5830 if (zsetobj->type != REDIS_ZSET) {
5831 addReply(c,shared.wrongtypeerr);
5832 return;
5833 }
5834 }
fd8ccf44 5835 zs = zsetobj->ptr;
e2665397 5836
7db723ad 5837 /* Ok now since we implement both ZADD and ZINCRBY here the code
e2665397 5838 * needs to handle the two different conditions. It's all about setting
5839 * '*score', that is, the new score to set, to the right value. */
5840 score = zmalloc(sizeof(double));
5841 if (doincrement) {
5842 dictEntry *de;
5843
5844 /* Read the old score. If the element was not present starts from 0 */
5845 de = dictFind(zs->dict,ele);
5846 if (de) {
5847 double *oldscore = dictGetEntryVal(de);
5848 *score = *oldscore + scoreval;
5849 } else {
5850 *score = scoreval;
5851 }
5fc9229c 5852 if (isnan(*score)) {
5853 addReplySds(c,
5854 sdsnew("-ERR resulting score is Not A Number (nan)\r\n"));
5855 zfree(score);
5856 /* Note that we don't need to check if the zset may be empty and
5857 * should be removed here, as we can only obtain Nan as score if
5858 * there was already an element in the sorted set. */
5859 return;
5860 }
e2665397 5861 } else {
5862 *score = scoreval;
5863 }
5864
5865 /* What follows is a simple remove and re-insert operation that is common
7db723ad 5866 * to both ZADD and ZINCRBY... */
e2665397 5867 if (dictAdd(zs->dict,ele,score) == DICT_OK) {
fd8ccf44 5868 /* case 1: New element */
e2665397 5869 incrRefCount(ele); /* added to hash */
5870 zslInsert(zs->zsl,*score,ele);
5871 incrRefCount(ele); /* added to skiplist */
fd8ccf44 5872 server.dirty++;
e2665397 5873 if (doincrement)
e2665397 5874 addReplyDouble(c,*score);
91d71bfc 5875 else
5876 addReply(c,shared.cone);
fd8ccf44 5877 } else {
5878 dictEntry *de;
5879 double *oldscore;
e0a62c7f 5880
fd8ccf44 5881 /* case 2: Score update operation */
e2665397 5882 de = dictFind(zs->dict,ele);
dfc5e96c 5883 redisAssert(de != NULL);
fd8ccf44 5884 oldscore = dictGetEntryVal(de);
5885 if (*score != *oldscore) {
5886 int deleted;
5887
e2665397 5888 /* Remove and insert the element in the skip list with new score */
5889 deleted = zslDelete(zs->zsl,*oldscore,ele);
dfc5e96c 5890 redisAssert(deleted != 0);
e2665397 5891 zslInsert(zs->zsl,*score,ele);
5892 incrRefCount(ele);
5893 /* Update the score in the hash table */
5894 dictReplace(zs->dict,ele,score);
fd8ccf44 5895 server.dirty++;
2161a965 5896 } else {
5897 zfree(score);
fd8ccf44 5898 }
e2665397 5899 if (doincrement)
5900 addReplyDouble(c,*score);
5901 else
5902 addReply(c,shared.czero);
fd8ccf44 5903 }
5904}
5905
e2665397 5906static void zaddCommand(redisClient *c) {
5907 double scoreval;
5908
bd79a6bd 5909 if (getDoubleFromObjectOrReply(c, c->argv[2], &scoreval, NULL) != REDIS_OK) return;
e2665397 5910 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
5911}
5912
7db723ad 5913static void zincrbyCommand(redisClient *c) {
e2665397 5914 double scoreval;
5915
bd79a6bd 5916 if (getDoubleFromObjectOrReply(c, c->argv[2], &scoreval, NULL) != REDIS_OK) return;
e2665397 5917 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
5918}
5919
1b7106e7 5920static void zremCommand(redisClient *c) {
5921 robj *zsetobj;
5922 zset *zs;
dd88747b 5923 dictEntry *de;
5924 double *oldscore;
5925 int deleted;
1b7106e7 5926
dd88747b 5927 if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
5928 checkType(c,zsetobj,REDIS_ZSET)) return;
1b7106e7 5929
dd88747b 5930 zs = zsetobj->ptr;
5931 de = dictFind(zs->dict,c->argv[2]);
5932 if (de == NULL) {
5933 addReply(c,shared.czero);
5934 return;
1b7106e7 5935 }
dd88747b 5936 /* Delete from the skiplist */
5937 oldscore = dictGetEntryVal(de);
5938 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
5939 redisAssert(deleted != 0);
5940
5941 /* Delete from the hash table */
5942 dictDelete(zs->dict,c->argv[2]);
5943 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
09241813 5944 if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5945 server.dirty++;
5946 addReply(c,shared.cone);
1b7106e7 5947}
5948
1807985b 5949static void zremrangebyscoreCommand(redisClient *c) {
bbe025e0
AM
5950 double min;
5951 double max;
dd88747b 5952 long deleted;
1807985b 5953 robj *zsetobj;
5954 zset *zs;
5955
bd79a6bd
PN
5956 if ((getDoubleFromObjectOrReply(c, c->argv[2], &min, NULL) != REDIS_OK) ||
5957 (getDoubleFromObjectOrReply(c, c->argv[3], &max, NULL) != REDIS_OK)) return;
bbe025e0 5958
dd88747b 5959 if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
5960 checkType(c,zsetobj,REDIS_ZSET)) return;
1807985b 5961
dd88747b 5962 zs = zsetobj->ptr;
5963 deleted = zslDeleteRangeByScore(zs->zsl,min,max,zs->dict);
5964 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
09241813 5965 if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 5966 server.dirty += deleted;
482b672d 5967 addReplyLongLong(c,deleted);
1807985b 5968}
5969
9212eafd 5970static void zremrangebyrankCommand(redisClient *c) {
bbe025e0
AM
5971 long start;
5972 long end;
dd88747b 5973 int llen;
5974 long deleted;
9212eafd
PN
5975 robj *zsetobj;
5976 zset *zs;
5977
bd79a6bd
PN
5978 if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
5979 (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
bbe025e0 5980
dd88747b 5981 if ((zsetobj = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
5982 checkType(c,zsetobj,REDIS_ZSET)) return;
5983 zs = zsetobj->ptr;
5984 llen = zs->zsl->length;
9212eafd 5985
dd88747b 5986 /* convert negative indexes */
5987 if (start < 0) start = llen+start;
5988 if (end < 0) end = llen+end;
5989 if (start < 0) start = 0;
5990 if (end < 0) end = 0;
9212eafd 5991
dd88747b 5992 /* indexes sanity checks */
5993 if (start > end || start >= llen) {
5994 addReply(c,shared.czero);
5995 return;
9212eafd 5996 }
dd88747b 5997 if (end >= llen) end = llen-1;
5998
5999 /* increment start and end because zsl*Rank functions
6000 * use 1-based rank */
6001 deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict);
6002 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
09241813 6003 if (dictSize(zs->dict) == 0) dbDelete(c->db,c->argv[1]);
dd88747b 6004 server.dirty += deleted;
482b672d 6005 addReplyLongLong(c, deleted);
9212eafd
PN
6006}
6007
8f92e768
PN
6008typedef struct {
6009 dict *dict;
6010 double weight;
6011} zsetopsrc;
6012
6013static int qsortCompareZsetopsrcByCardinality(const void *s1, const void *s2) {
6014 zsetopsrc *d1 = (void*) s1, *d2 = (void*) s2;
6015 unsigned long size1, size2;
6016 size1 = d1->dict ? dictSize(d1->dict) : 0;
6017 size2 = d2->dict ? dictSize(d2->dict) : 0;
6018 return size1 - size2;
6019}
6020
d2764cd6
PN
6021#define REDIS_AGGR_SUM 1
6022#define REDIS_AGGR_MIN 2
6023#define REDIS_AGGR_MAX 3
bc000c1d 6024#define zunionInterDictValue(_e) (dictGetEntryVal(_e) == NULL ? 1.0 : *(double*)dictGetEntryVal(_e))
d2764cd6
PN
6025
6026inline static void zunionInterAggregate(double *target, double val, int aggregate) {
6027 if (aggregate == REDIS_AGGR_SUM) {
6028 *target = *target + val;
6029 } else if (aggregate == REDIS_AGGR_MIN) {
6030 *target = val < *target ? val : *target;
6031 } else if (aggregate == REDIS_AGGR_MAX) {
6032 *target = val > *target ? val : *target;
6033 } else {
6034 /* safety net */
f83c6cb5 6035 redisPanic("Unknown ZUNION/INTER aggregate type");
d2764cd6
PN
6036 }
6037}
6038
2830ca53 6039static void zunionInterGenericCommand(redisClient *c, robj *dstkey, int op) {
bc000c1d 6040 int i, j, setnum;
d2764cd6 6041 int aggregate = REDIS_AGGR_SUM;
8f92e768 6042 zsetopsrc *src;
2830ca53
PN
6043 robj *dstobj;
6044 zset *dstzset;
b287c9bb
PN
6045 dictIterator *di;
6046 dictEntry *de;
6047
bc000c1d
JC
6048 /* expect setnum input keys to be given */
6049 setnum = atoi(c->argv[2]->ptr);
6050 if (setnum < 1) {
5d373da9 6051 addReplySds(c,sdsnew("-ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE\r\n"));
2830ca53 6052 return;
b287c9bb 6053 }
2830ca53
PN
6054
6055 /* test if the expected number of keys would overflow */
bc000c1d 6056 if (3+setnum > c->argc) {
b287c9bb
PN
6057 addReply(c,shared.syntaxerr);
6058 return;
6059 }
6060
2830ca53 6061 /* read keys to be used for input */
bc000c1d
JC
6062 src = zmalloc(sizeof(zsetopsrc) * setnum);
6063 for (i = 0, j = 3; i < setnum; i++, j++) {
6064 robj *obj = lookupKeyWrite(c->db,c->argv[j]);
6065 if (!obj) {
8f92e768 6066 src[i].dict = NULL;
b287c9bb 6067 } else {
bc000c1d
JC
6068 if (obj->type == REDIS_ZSET) {
6069 src[i].dict = ((zset*)obj->ptr)->dict;
6070 } else if (obj->type == REDIS_SET) {
6071 src[i].dict = (obj->ptr);
6072 } else {
8f92e768 6073 zfree(src);
b287c9bb
PN
6074 addReply(c,shared.wrongtypeerr);
6075 return;
6076 }
b287c9bb 6077 }
2830ca53
PN
6078
6079 /* default all weights to 1 */
8f92e768 6080 src[i].weight = 1.0;
b287c9bb
PN
6081 }
6082
2830ca53
PN
6083 /* parse optional extra arguments */
6084 if (j < c->argc) {
d2764cd6 6085 int remaining = c->argc - j;
b287c9bb 6086
2830ca53 6087 while (remaining) {
bc000c1d 6088 if (remaining >= (setnum + 1) && !strcasecmp(c->argv[j]->ptr,"weights")) {
2830ca53 6089 j++; remaining--;
bc000c1d 6090 for (i = 0; i < setnum; i++, j++, remaining--) {
bd79a6bd 6091 if (getDoubleFromObjectOrReply(c, c->argv[j], &src[i].weight, NULL) != REDIS_OK)
bbe025e0 6092 return;
2830ca53 6093 }
d2764cd6
PN
6094 } else if (remaining >= 2 && !strcasecmp(c->argv[j]->ptr,"aggregate")) {
6095 j++; remaining--;
6096 if (!strcasecmp(c->argv[j]->ptr,"sum")) {
6097 aggregate = REDIS_AGGR_SUM;
6098 } else if (!strcasecmp(c->argv[j]->ptr,"min")) {
6099 aggregate = REDIS_AGGR_MIN;
6100 } else if (!strcasecmp(c->argv[j]->ptr,"max")) {
6101 aggregate = REDIS_AGGR_MAX;
6102 } else {
6103 zfree(src);
6104 addReply(c,shared.syntaxerr);
6105 return;
6106 }
6107 j++; remaining--;
2830ca53 6108 } else {
8f92e768 6109 zfree(src);
2830ca53
PN
6110 addReply(c,shared.syntaxerr);
6111 return;
6112 }
6113 }
6114 }
b287c9bb 6115
d2764cd6
PN
6116 /* sort sets from the smallest to largest, this will improve our
6117 * algorithm's performance */
bc000c1d 6118 qsort(src,setnum,sizeof(zsetopsrc),qsortCompareZsetopsrcByCardinality);
d2764cd6 6119
2830ca53
PN
6120 dstobj = createZsetObject();
6121 dstzset = dstobj->ptr;
6122
6123 if (op == REDIS_OP_INTER) {
8f92e768
PN
6124 /* skip going over all entries if the smallest zset is NULL or empty */
6125 if (src[0].dict && dictSize(src[0].dict) > 0) {
6126 /* precondition: as src[0].dict is non-empty and the zsets are ordered
6127 * from small to large, all src[i > 0].dict are non-empty too */
6128 di = dictGetIterator(src[0].dict);
2830ca53 6129 while((de = dictNext(di)) != NULL) {
d2764cd6 6130 double *score = zmalloc(sizeof(double)), value;
bc000c1d 6131 *score = src[0].weight * zunionInterDictValue(de);
2830ca53 6132
bc000c1d 6133 for (j = 1; j < setnum; j++) {
d2764cd6 6134 dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de));
2830ca53 6135 if (other) {
bc000c1d 6136 value = src[j].weight * zunionInterDictValue(other);
d2764cd6 6137 zunionInterAggregate(score, value, aggregate);
2830ca53
PN
6138 } else {
6139 break;
6140 }
6141 }
b287c9bb 6142
2830ca53 6143 /* skip entry when not present in every source dict */
bc000c1d 6144 if (j != setnum) {
2830ca53
PN
6145 zfree(score);
6146 } else {
6147 robj *o = dictGetEntryKey(de);
6148 dictAdd(dstzset->dict,o,score);
6149 incrRefCount(o); /* added to dictionary */
6150 zslInsert(dstzset->zsl,*score,o);
6151 incrRefCount(o); /* added to skiplist */
b287c9bb
PN
6152 }
6153 }
2830ca53
PN
6154 dictReleaseIterator(di);
6155 }
6156 } else if (op == REDIS_OP_UNION) {
bc000c1d 6157 for (i = 0; i < setnum; i++) {
8f92e768 6158 if (!src[i].dict) continue;
2830ca53 6159
8f92e768 6160 di = dictGetIterator(src[i].dict);
2830ca53
PN
6161 while((de = dictNext(di)) != NULL) {
6162 /* skip key when already processed */
6163 if (dictFind(dstzset->dict,dictGetEntryKey(de)) != NULL) continue;
6164
d2764cd6 6165 double *score = zmalloc(sizeof(double)), value;
bc000c1d 6166 *score = src[i].weight * zunionInterDictValue(de);
2830ca53 6167
d2764cd6
PN
6168 /* because the zsets are sorted by size, its only possible
6169 * for sets at larger indices to hold this entry */
bc000c1d 6170 for (j = (i+1); j < setnum; j++) {
d2764cd6 6171 dictEntry *other = dictFind(src[j].dict,dictGetEntryKey(de));
2830ca53 6172 if (other) {
bc000c1d 6173 value = src[j].weight * zunionInterDictValue(other);
d2764cd6 6174 zunionInterAggregate(score, value, aggregate);
2830ca53
PN
6175 }
6176 }
b287c9bb 6177
2830ca53
PN
6178 robj *o = dictGetEntryKey(de);
6179 dictAdd(dstzset->dict,o,score);
6180 incrRefCount(o); /* added to dictionary */
6181 zslInsert(dstzset->zsl,*score,o);
6182 incrRefCount(o); /* added to skiplist */
6183 }
6184 dictReleaseIterator(di);
b287c9bb 6185 }
2830ca53
PN
6186 } else {
6187 /* unknown operator */
6188 redisAssert(op == REDIS_OP_INTER || op == REDIS_OP_UNION);
b287c9bb
PN
6189 }
6190
09241813 6191 dbDelete(c->db,dstkey);
3ea27d37 6192 if (dstzset->zsl->length) {
09241813 6193 dbAdd(c->db,dstkey,dstobj);
482b672d 6194 addReplyLongLong(c, dstzset->zsl->length);
3ea27d37 6195 server.dirty++;
6196 } else {
8bca8773 6197 decrRefCount(dstobj);
3ea27d37 6198 addReply(c, shared.czero);
6199 }
8f92e768 6200 zfree(src);
b287c9bb
PN
6201}
6202
5d373da9 6203static void zunionstoreCommand(redisClient *c) {
2830ca53 6204 zunionInterGenericCommand(c,c->argv[1], REDIS_OP_UNION);
b287c9bb
PN
6205}
6206
5d373da9 6207static void zinterstoreCommand(redisClient *c) {
2830ca53 6208 zunionInterGenericCommand(c,c->argv[1], REDIS_OP_INTER);
b287c9bb
PN
6209}
6210
e3870fab 6211static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 6212 robj *o;
bbe025e0
AM
6213 long start;
6214 long end;
752da584 6215 int withscores = 0;
dd88747b 6216 int llen;
6217 int rangelen, j;
6218 zset *zsetobj;
6219 zskiplist *zsl;
6220 zskiplistNode *ln;
6221 robj *ele;
752da584 6222
bd79a6bd
PN
6223 if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) ||
6224 (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return;
bbe025e0 6225
752da584 6226 if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
6227 withscores = 1;
6228 } else if (c->argc >= 5) {
6229 addReply(c,shared.syntaxerr);
6230 return;
6231 }
cc812361 6232
4e27f268 6233 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
6234 || checkType(c,o,REDIS_ZSET)) return;
dd88747b 6235 zsetobj = o->ptr;
6236 zsl = zsetobj->zsl;
6237 llen = zsl->length;
cc812361 6238
dd88747b 6239 /* convert negative indexes */
6240 if (start < 0) start = llen+start;
6241 if (end < 0) end = llen+end;
6242 if (start < 0) start = 0;
6243 if (end < 0) end = 0;
cc812361 6244
dd88747b 6245 /* indexes sanity checks */
6246 if (start > end || start >= llen) {
6247 /* Out of range start or start > end result in empty list */
6248 addReply(c,shared.emptymultibulk);
6249 return;
6250 }
6251 if (end >= llen) end = llen-1;
6252 rangelen = (end-start)+1;
cc812361 6253
dd88747b 6254 /* check if starting point is trivial, before searching
6255 * the element in log(N) time */
6256 if (reverse) {
6257 ln = start == 0 ? zsl->tail : zslGetElementByRank(zsl, llen-start);
6258 } else {
6259 ln = start == 0 ?
6260 zsl->header->forward[0] : zslGetElementByRank(zsl, start+1);
6261 }
cc812361 6262
dd88747b 6263 /* Return the result in form of a multi-bulk reply */
6264 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
6265 withscores ? (rangelen*2) : rangelen));
6266 for (j = 0; j < rangelen; j++) {
6267 ele = ln->obj;
6268 addReplyBulk(c,ele);
6269 if (withscores)
6270 addReplyDouble(c,ln->score);
6271 ln = reverse ? ln->backward : ln->forward[0];
cc812361 6272 }
6273}
6274
e3870fab 6275static void zrangeCommand(redisClient *c) {
6276 zrangeGenericCommand(c,0);
6277}
6278
6279static void zrevrangeCommand(redisClient *c) {
6280 zrangeGenericCommand(c,1);
6281}
6282
f44dd428 6283/* This command implements both ZRANGEBYSCORE and ZCOUNT.
6284 * If justcount is non-zero, just the count is returned. */
6285static void genericZrangebyscoreCommand(redisClient *c, int justcount) {
50c55df5 6286 robj *o;
f44dd428 6287 double min, max;
6288 int minex = 0, maxex = 0; /* are min or max exclusive? */
80181f78 6289 int offset = 0, limit = -1;
0500ef27
SH
6290 int withscores = 0;
6291 int badsyntax = 0;
6292
f44dd428 6293 /* Parse the min-max interval. If one of the values is prefixed
6294 * by the "(" character, it's considered "open". For instance
6295 * ZRANGEBYSCORE zset (1.5 (2.5 will match min < x < max
6296 * ZRANGEBYSCORE zset 1.5 2.5 will instead match min <= x <= max */
6297 if (((char*)c->argv[2]->ptr)[0] == '(') {
6298 min = strtod((char*)c->argv[2]->ptr+1,NULL);
6299 minex = 1;
6300 } else {
6301 min = strtod(c->argv[2]->ptr,NULL);
6302 }
6303 if (((char*)c->argv[3]->ptr)[0] == '(') {
6304 max = strtod((char*)c->argv[3]->ptr+1,NULL);
6305 maxex = 1;
6306 } else {
6307 max = strtod(c->argv[3]->ptr,NULL);
6308 }
6309
6310 /* Parse "WITHSCORES": note that if the command was called with
6311 * the name ZCOUNT then we are sure that c->argc == 4, so we'll never
6312 * enter the following paths to parse WITHSCORES and LIMIT. */
0500ef27 6313 if (c->argc == 5 || c->argc == 8) {
3a3978b1 6314 if (strcasecmp(c->argv[c->argc-1]->ptr,"withscores") == 0)
6315 withscores = 1;
6316 else
6317 badsyntax = 1;
0500ef27 6318 }
3a3978b1 6319 if (c->argc != (4 + withscores) && c->argc != (7 + withscores))
0500ef27 6320 badsyntax = 1;
0500ef27 6321 if (badsyntax) {
454d4e43 6322 addReplySds(c,
6323 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
80181f78 6324 return;
0500ef27
SH
6325 }
6326
f44dd428 6327 /* Parse "LIMIT" */
0500ef27 6328 if (c->argc == (7 + withscores) && strcasecmp(c->argv[4]->ptr,"limit")) {
80181f78 6329 addReply(c,shared.syntaxerr);
6330 return;
0500ef27 6331 } else if (c->argc == (7 + withscores)) {
80181f78 6332 offset = atoi(c->argv[5]->ptr);
6333 limit = atoi(c->argv[6]->ptr);
0b13687c 6334 if (offset < 0) offset = 0;
80181f78 6335 }
50c55df5 6336
f44dd428 6337 /* Ok, lookup the key and get the range */
50c55df5 6338 o = lookupKeyRead(c->db,c->argv[1]);
6339 if (o == NULL) {
4e27f268 6340 addReply(c,justcount ? shared.czero : shared.emptymultibulk);
50c55df5 6341 } else {
6342 if (o->type != REDIS_ZSET) {
6343 addReply(c,shared.wrongtypeerr);
6344 } else {
6345 zset *zsetobj = o->ptr;
6346 zskiplist *zsl = zsetobj->zsl;
6347 zskiplistNode *ln;
f44dd428 6348 robj *ele, *lenobj = NULL;
6349 unsigned long rangelen = 0;
50c55df5 6350
f44dd428 6351 /* Get the first node with the score >= min, or with
6352 * score > min if 'minex' is true. */
50c55df5 6353 ln = zslFirstWithScore(zsl,min);
f44dd428 6354 while (minex && ln && ln->score == min) ln = ln->forward[0];
6355
50c55df5 6356 if (ln == NULL) {
6357 /* No element matching the speciifed interval */
f44dd428 6358 addReply(c,justcount ? shared.czero : shared.emptymultibulk);
50c55df5 6359 return;
6360 }
6361
6362 /* We don't know in advance how many matching elements there
6363 * are in the list, so we push this object that will represent
6364 * the multi-bulk length in the output buffer, and will "fix"
6365 * it later */
f44dd428 6366 if (!justcount) {
6367 lenobj = createObject(REDIS_STRING,NULL);
6368 addReply(c,lenobj);
6369 decrRefCount(lenobj);
6370 }
50c55df5 6371
f44dd428 6372 while(ln && (maxex ? (ln->score < max) : (ln->score <= max))) {
80181f78 6373 if (offset) {
6374 offset--;
6375 ln = ln->forward[0];
6376 continue;
6377 }
6378 if (limit == 0) break;
f44dd428 6379 if (!justcount) {
6380 ele = ln->obj;
dd88747b 6381 addReplyBulk(c,ele);
f44dd428 6382 if (withscores)
6383 addReplyDouble(c,ln->score);
6384 }
50c55df5 6385 ln = ln->forward[0];
6386 rangelen++;
80181f78 6387 if (limit > 0) limit--;
50c55df5 6388 }
f44dd428 6389 if (justcount) {
482b672d 6390 addReplyLongLong(c,(long)rangelen);
f44dd428 6391 } else {
6392 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",
6393 withscores ? (rangelen*2) : rangelen);
6394 }
50c55df5 6395 }
6396 }
6397}
6398
f44dd428 6399static void zrangebyscoreCommand(redisClient *c) {
6400 genericZrangebyscoreCommand(c,0);
6401}
6402
6403static void zcountCommand(redisClient *c) {
6404 genericZrangebyscoreCommand(c,1);
6405}
6406
3c41331e 6407static void zcardCommand(redisClient *c) {
e197b441 6408 robj *o;
6409 zset *zs;
dd88747b 6410
6411 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
6412 checkType(c,o,REDIS_ZSET)) return;
6413
6414 zs = o->ptr;
6415 addReplyUlong(c,zs->zsl->length);
e197b441 6416}
6417
6e333bbe 6418static void zscoreCommand(redisClient *c) {
6419 robj *o;
6420 zset *zs;
dd88747b 6421 dictEntry *de;
6422
6423 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
6424 checkType(c,o,REDIS_ZSET)) return;
6425
6426 zs = o->ptr;
6427 de = dictFind(zs->dict,c->argv[2]);
6428 if (!de) {
96d8b4ee 6429 addReply(c,shared.nullbulk);
6e333bbe 6430 } else {
dd88747b 6431 double *score = dictGetEntryVal(de);
6e333bbe 6432
dd88747b 6433 addReplyDouble(c,*score);
6e333bbe 6434 }
6435}
6436
798d9e55 6437static void zrankGenericCommand(redisClient *c, int reverse) {
69d95c3e 6438 robj *o;
dd88747b 6439 zset *zs;
6440 zskiplist *zsl;
6441 dictEntry *de;
6442 unsigned long rank;
6443 double *score;
6444
6445 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
6446 checkType(c,o,REDIS_ZSET)) return;
6447
6448 zs = o->ptr;
6449 zsl = zs->zsl;
6450 de = dictFind(zs->dict,c->argv[2]);
6451 if (!de) {
69d95c3e
PN
6452 addReply(c,shared.nullbulk);
6453 return;
6454 }
69d95c3e 6455
dd88747b 6456 score = dictGetEntryVal(de);
6457 rank = zslGetRank(zsl, *score, c->argv[2]);
6458 if (rank) {
6459 if (reverse) {
482b672d 6460 addReplyLongLong(c, zsl->length - rank);
27b0ccca 6461 } else {
482b672d 6462 addReplyLongLong(c, rank-1);
69d95c3e 6463 }
dd88747b 6464 } else {
6465 addReply(c,shared.nullbulk);
978c2c94 6466 }
6467}
6468
798d9e55
PN
6469static void zrankCommand(redisClient *c) {
6470 zrankGenericCommand(c, 0);
6471}
6472
6473static void zrevrankCommand(redisClient *c) {
6474 zrankGenericCommand(c, 1);
6475}
6476
7fb16bac
PN
6477/* ========================= Hashes utility functions ======================= */
6478#define REDIS_HASH_KEY 1
6479#define REDIS_HASH_VALUE 2
978c2c94 6480
7fb16bac
PN
6481/* Check the length of a number of objects to see if we need to convert a
6482 * zipmap to a real hash. Note that we only check string encoded objects
6483 * as their string length can be queried in constant time. */
6484static void hashTryConversion(robj *subject, robj **argv, int start, int end) {
6485 int i;
6486 if (subject->encoding != REDIS_ENCODING_ZIPMAP) return;
978c2c94 6487
7fb16bac
PN
6488 for (i = start; i <= end; i++) {
6489 if (argv[i]->encoding == REDIS_ENCODING_RAW &&
6490 sdslen(argv[i]->ptr) > server.hash_max_zipmap_value)
6491 {
6492 convertToRealHash(subject);
978c2c94 6493 return;
6494 }
6495 }
7fb16bac 6496}
bae2c7ec 6497
97224de7
PN
6498/* Encode given objects in-place when the hash uses a dict. */
6499static void hashTryObjectEncoding(robj *subject, robj **o1, robj **o2) {
6500 if (subject->encoding == REDIS_ENCODING_HT) {
3f973463
PN
6501 if (o1) *o1 = tryObjectEncoding(*o1);
6502 if (o2) *o2 = tryObjectEncoding(*o2);
97224de7
PN
6503 }
6504}
6505
7fb16bac 6506/* Get the value from a hash identified by key. Returns either a string
a3f3af86
PN
6507 * object or NULL if the value cannot be found. The refcount of the object
6508 * is always increased by 1 when the value was found. */
7fb16bac
PN
6509static robj *hashGet(robj *o, robj *key) {
6510 robj *value = NULL;
978c2c94 6511 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
7fb16bac
PN
6512 unsigned char *v;
6513 unsigned int vlen;
6514 key = getDecodedObject(key);
6515 if (zipmapGet(o->ptr,key->ptr,sdslen(key->ptr),&v,&vlen)) {
6516 value = createStringObject((char*)v,vlen);
6517 }
6518 decrRefCount(key);
6519 } else {
6520 dictEntry *de = dictFind(o->ptr,key);
6521 if (de != NULL) {
6522 value = dictGetEntryVal(de);
a3f3af86 6523 incrRefCount(value);
7fb16bac
PN
6524 }
6525 }
6526 return value;
6527}
978c2c94 6528
7fb16bac
PN
6529/* Test if the key exists in the given hash. Returns 1 if the key
6530 * exists and 0 when it doesn't. */
6531static int hashExists(robj *o, robj *key) {
6532 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
6533 key = getDecodedObject(key);
6534 if (zipmapExists(o->ptr,key->ptr,sdslen(key->ptr))) {
6535 decrRefCount(key);
6536 return 1;
6537 }
6538 decrRefCount(key);
6539 } else {
6540 if (dictFind(o->ptr,key) != NULL) {
6541 return 1;
6542 }
6543 }
6544 return 0;
6545}
bae2c7ec 6546
7fb16bac
PN
6547/* Add an element, discard the old if the key already exists.
6548 * Return 0 on insert and 1 on update. */
feb8d7e6 6549static int hashSet(robj *o, robj *key, robj *value) {
7fb16bac
PN
6550 int update = 0;
6551 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
6552 key = getDecodedObject(key);
6553 value = getDecodedObject(value);
6554 o->ptr = zipmapSet(o->ptr,
6555 key->ptr,sdslen(key->ptr),
6556 value->ptr,sdslen(value->ptr), &update);
6557 decrRefCount(key);
6558 decrRefCount(value);
6559
6560 /* Check if the zipmap needs to be upgraded to a real hash table */
6561 if (zipmapLen(o->ptr) > server.hash_max_zipmap_entries)
bae2c7ec 6562 convertToRealHash(o);
978c2c94 6563 } else {
7fb16bac
PN
6564 if (dictReplace(o->ptr,key,value)) {
6565 /* Insert */
6566 incrRefCount(key);
978c2c94 6567 } else {
7fb16bac 6568 /* Update */
978c2c94 6569 update = 1;
6570 }
7fb16bac 6571 incrRefCount(value);
978c2c94 6572 }
7fb16bac 6573 return update;
978c2c94 6574}
6575
7fb16bac
PN
6576/* Delete an element from a hash.
6577 * Return 1 on deleted and 0 on not found. */
6578static int hashDelete(robj *o, robj *key) {
6579 int deleted = 0;
6580 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
6581 key = getDecodedObject(key);
6582 o->ptr = zipmapDel(o->ptr,key->ptr,sdslen(key->ptr), &deleted);
6583 decrRefCount(key);
6584 } else {
6585 deleted = dictDelete((dict*)o->ptr,key) == DICT_OK;
6586 /* Always check if the dictionary needs a resize after a delete. */
6587 if (deleted && htNeedsResize(o->ptr)) dictResize(o->ptr);
d33278d1 6588 }
7fb16bac
PN
6589 return deleted;
6590}
d33278d1 6591
7fb16bac 6592/* Return the number of elements in a hash. */
c811bb38 6593static unsigned long hashLength(robj *o) {
7fb16bac
PN
6594 return (o->encoding == REDIS_ENCODING_ZIPMAP) ?
6595 zipmapLen((unsigned char*)o->ptr) : dictSize((dict*)o->ptr);
6596}
6597
6598/* Structure to hold hash iteration abstration. Note that iteration over
6599 * hashes involves both fields and values. Because it is possible that
6600 * not both are required, store pointers in the iterator to avoid
6601 * unnecessary memory allocation for fields/values. */
6602typedef struct {
6603 int encoding;
6604 unsigned char *zi;
6605 unsigned char *zk, *zv;
6606 unsigned int zklen, zvlen;
6607
6608 dictIterator *di;
6609 dictEntry *de;
6610} hashIterator;
6611
c44d3b56
PN
6612static hashIterator *hashInitIterator(robj *subject) {
6613 hashIterator *hi = zmalloc(sizeof(hashIterator));
7fb16bac
PN
6614 hi->encoding = subject->encoding;
6615 if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
6616 hi->zi = zipmapRewind(subject->ptr);
6617 } else if (hi->encoding == REDIS_ENCODING_HT) {
6618 hi->di = dictGetIterator(subject->ptr);
d33278d1 6619 } else {
7fb16bac 6620 redisAssert(NULL);
d33278d1 6621 }
c44d3b56 6622 return hi;
7fb16bac 6623}
d33278d1 6624
7fb16bac
PN
6625static void hashReleaseIterator(hashIterator *hi) {
6626 if (hi->encoding == REDIS_ENCODING_HT) {
6627 dictReleaseIterator(hi->di);
d33278d1 6628 }
c44d3b56 6629 zfree(hi);
7fb16bac 6630}
d33278d1 6631
7fb16bac
PN
6632/* Move to the next entry in the hash. Return REDIS_OK when the next entry
6633 * could be found and REDIS_ERR when the iterator reaches the end. */
c811bb38 6634static int hashNext(hashIterator *hi) {
7fb16bac
PN
6635 if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
6636 if ((hi->zi = zipmapNext(hi->zi, &hi->zk, &hi->zklen,
6637 &hi->zv, &hi->zvlen)) == NULL) return REDIS_ERR;
6638 } else {
6639 if ((hi->de = dictNext(hi->di)) == NULL) return REDIS_ERR;
6640 }
6641 return REDIS_OK;
6642}
d33278d1 6643
0c390abc 6644/* Get key or value object at current iteration position.
a3f3af86 6645 * This increases the refcount of the field object by 1. */
c811bb38 6646static robj *hashCurrent(hashIterator *hi, int what) {
7fb16bac
PN
6647 robj *o;
6648 if (hi->encoding == REDIS_ENCODING_ZIPMAP) {
6649 if (what & REDIS_HASH_KEY) {
6650 o = createStringObject((char*)hi->zk,hi->zklen);
6651 } else {
6652 o = createStringObject((char*)hi->zv,hi->zvlen);
d33278d1 6653 }
d33278d1 6654 } else {
7fb16bac
PN
6655 if (what & REDIS_HASH_KEY) {
6656 o = dictGetEntryKey(hi->de);
6657 } else {
6658 o = dictGetEntryVal(hi->de);
d33278d1 6659 }
a3f3af86 6660 incrRefCount(o);
d33278d1 6661 }
7fb16bac 6662 return o;
d33278d1
PN
6663}
6664
7fb16bac
PN
6665static robj *hashLookupWriteOrCreate(redisClient *c, robj *key) {
6666 robj *o = lookupKeyWrite(c->db,key);
01426b05
PN
6667 if (o == NULL) {
6668 o = createHashObject();
09241813 6669 dbAdd(c->db,key,o);
01426b05
PN
6670 } else {
6671 if (o->type != REDIS_HASH) {
6672 addReply(c,shared.wrongtypeerr);
7fb16bac 6673 return NULL;
01426b05
PN
6674 }
6675 }
7fb16bac
PN
6676 return o;
6677}
01426b05 6678
7fb16bac
PN
6679/* ============================= Hash commands ============================== */
6680static void hsetCommand(redisClient *c) {
6e9e463f 6681 int update;
7fb16bac 6682 robj *o;
bbe025e0 6683
7fb16bac
PN
6684 if ((o = hashLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
6685 hashTryConversion(o,c->argv,2,3);
97224de7 6686 hashTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
feb8d7e6 6687 update = hashSet(o,c->argv[2],c->argv[3]);
6e9e463f 6688 addReply(c, update ? shared.czero : shared.cone);
7fb16bac
PN
6689 server.dirty++;
6690}
01426b05 6691
1f1c7695
PN
6692static void hsetnxCommand(redisClient *c) {
6693 robj *o;
6694 if ((o = hashLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
6695 hashTryConversion(o,c->argv,2,3);
6696
6697 if (hashExists(o, c->argv[2])) {
6698 addReply(c, shared.czero);
01426b05 6699 } else {
97224de7 6700 hashTryObjectEncoding(o,&c->argv[2], &c->argv[3]);
feb8d7e6 6701 hashSet(o,c->argv[2],c->argv[3]);
1f1c7695
PN
6702 addReply(c, shared.cone);
6703 server.dirty++;
6704 }
6705}
01426b05 6706
7fb16bac
PN
6707static void hmsetCommand(redisClient *c) {
6708 int i;
6709 robj *o;
01426b05 6710
7fb16bac
PN
6711 if ((c->argc % 2) == 1) {
6712 addReplySds(c,sdsnew("-ERR wrong number of arguments for HMSET\r\n"));
6713 return;
6714 }
01426b05 6715
7fb16bac
PN
6716 if ((o = hashLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
6717 hashTryConversion(o,c->argv,2,c->argc-1);
6718 for (i = 2; i < c->argc; i += 2) {
97224de7 6719 hashTryObjectEncoding(o,&c->argv[i], &c->argv[i+1]);
feb8d7e6 6720 hashSet(o,c->argv[i],c->argv[i+1]);
7fb16bac
PN
6721 }
6722 addReply(c, shared.ok);
edc2f63a 6723 server.dirty++;
7fb16bac
PN
6724}
6725
6726static void hincrbyCommand(redisClient *c) {
6727 long long value, incr;
6728 robj *o, *current, *new;
6729
bd79a6bd 6730 if (getLongLongFromObjectOrReply(c,c->argv[3],&incr,NULL) != REDIS_OK) return;
7fb16bac
PN
6731 if ((o = hashLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
6732 if ((current = hashGet(o,c->argv[2])) != NULL) {
946342c1
PN
6733 if (getLongLongFromObjectOrReply(c,current,&value,
6734 "hash value is not an integer") != REDIS_OK) {
6735 decrRefCount(current);
6736 return;
6737 }
a3f3af86 6738 decrRefCount(current);
7fb16bac
PN
6739 } else {
6740 value = 0;
01426b05
PN
6741 }
6742
7fb16bac 6743 value += incr;
3f973463
PN
6744 new = createStringObjectFromLongLong(value);
6745 hashTryObjectEncoding(o,&c->argv[2],NULL);
feb8d7e6 6746 hashSet(o,c->argv[2],new);
7fb16bac
PN
6747 decrRefCount(new);
6748 addReplyLongLong(c,value);
01426b05 6749 server.dirty++;
01426b05
PN
6750}
6751
978c2c94 6752static void hgetCommand(redisClient *c) {
7fb16bac 6753 robj *o, *value;
dd88747b 6754 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
6755 checkType(c,o,REDIS_HASH)) return;
6756
7fb16bac
PN
6757 if ((value = hashGet(o,c->argv[2])) != NULL) {
6758 addReplyBulk(c,value);
a3f3af86 6759 decrRefCount(value);
dd88747b 6760 } else {
7fb16bac 6761 addReply(c,shared.nullbulk);
69d95c3e 6762 }
69d95c3e
PN
6763}
6764
09aeb579
PN
6765static void hmgetCommand(redisClient *c) {
6766 int i;
7fb16bac
PN
6767 robj *o, *value;
6768 o = lookupKeyRead(c->db,c->argv[1]);
6769 if (o != NULL && o->type != REDIS_HASH) {
6770 addReply(c,shared.wrongtypeerr);
09aeb579
PN
6771 }
6772
7fb16bac
PN
6773 /* Note the check for o != NULL happens inside the loop. This is
6774 * done because objects that cannot be found are considered to be
6775 * an empty hash. The reply should then be a series of NULLs. */
09aeb579 6776 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-2));
7fb16bac
PN
6777 for (i = 2; i < c->argc; i++) {
6778 if (o != NULL && (value = hashGet(o,c->argv[i])) != NULL) {
6779 addReplyBulk(c,value);
a3f3af86 6780 decrRefCount(value);
7fb16bac
PN
6781 } else {
6782 addReply(c,shared.nullbulk);
09aeb579
PN
6783 }
6784 }
6785}
6786
07efaf74 6787static void hdelCommand(redisClient *c) {
dd88747b 6788 robj *o;
dd88747b 6789 if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
6790 checkType(c,o,REDIS_HASH)) return;
07efaf74 6791
7fb16bac 6792 if (hashDelete(o,c->argv[2])) {
09241813 6793 if (hashLength(o) == 0) dbDelete(c->db,c->argv[1]);
7fb16bac
PN
6794 addReply(c,shared.cone);
6795 server.dirty++;
dd88747b 6796 } else {
7fb16bac 6797 addReply(c,shared.czero);
07efaf74 6798 }
6799}
6800
92b27fe9 6801static void hlenCommand(redisClient *c) {
6802 robj *o;
dd88747b 6803 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
92b27fe9 6804 checkType(c,o,REDIS_HASH)) return;
6805
7fb16bac 6806 addReplyUlong(c,hashLength(o));
92b27fe9 6807}
6808
78409a0f 6809static void genericHgetallCommand(redisClient *c, int flags) {
7fb16bac 6810 robj *o, *lenobj, *obj;
78409a0f 6811 unsigned long count = 0;
c44d3b56 6812 hashIterator *hi;
78409a0f 6813
4e27f268 6814 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk)) == NULL
78409a0f 6815 || checkType(c,o,REDIS_HASH)) return;
6816
6817 lenobj = createObject(REDIS_STRING,NULL);
6818 addReply(c,lenobj);
6819 decrRefCount(lenobj);
6820
c44d3b56
PN
6821 hi = hashInitIterator(o);
6822 while (hashNext(hi) != REDIS_ERR) {
7fb16bac 6823 if (flags & REDIS_HASH_KEY) {
c44d3b56 6824 obj = hashCurrent(hi,REDIS_HASH_KEY);
7fb16bac 6825 addReplyBulk(c,obj);
a3f3af86 6826 decrRefCount(obj);
7fb16bac 6827 count++;
78409a0f 6828 }
7fb16bac 6829 if (flags & REDIS_HASH_VALUE) {
c44d3b56 6830 obj = hashCurrent(hi,REDIS_HASH_VALUE);
7fb16bac 6831 addReplyBulk(c,obj);
a3f3af86 6832 decrRefCount(obj);
7fb16bac 6833 count++;
78409a0f 6834 }
78409a0f 6835 }
c44d3b56 6836 hashReleaseIterator(hi);
7fb16bac 6837
78409a0f 6838 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",count);
6839}
6840
6841static void hkeysCommand(redisClient *c) {
7fb16bac 6842 genericHgetallCommand(c,REDIS_HASH_KEY);
78409a0f 6843}
6844
6845static void hvalsCommand(redisClient *c) {
7fb16bac 6846 genericHgetallCommand(c,REDIS_HASH_VALUE);
78409a0f 6847}
6848
6849static void hgetallCommand(redisClient *c) {
7fb16bac 6850 genericHgetallCommand(c,REDIS_HASH_KEY|REDIS_HASH_VALUE);
78409a0f 6851}
6852
a86f14b1 6853static void hexistsCommand(redisClient *c) {
6854 robj *o;
a86f14b1 6855 if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
6856 checkType(c,o,REDIS_HASH)) return;
6857
7fb16bac 6858 addReply(c, hashExists(o,c->argv[2]) ? shared.cone : shared.czero);
a86f14b1 6859}
6860
ada386b2 6861static void convertToRealHash(robj *o) {
6862 unsigned char *key, *val, *p, *zm = o->ptr;
6863 unsigned int klen, vlen;
6864 dict *dict = dictCreate(&hashDictType,NULL);
6865
6866 assert(o->type == REDIS_HASH && o->encoding != REDIS_ENCODING_HT);
6867 p = zipmapRewind(zm);
6868 while((p = zipmapNext(p,&key,&klen,&val,&vlen)) != NULL) {
6869 robj *keyobj, *valobj;
6870
6871 keyobj = createStringObject((char*)key,klen);
6872 valobj = createStringObject((char*)val,vlen);
05df7621 6873 keyobj = tryObjectEncoding(keyobj);
6874 valobj = tryObjectEncoding(valobj);
ada386b2 6875 dictAdd(dict,keyobj,valobj);
6876 }
6877 o->encoding = REDIS_ENCODING_HT;
6878 o->ptr = dict;
6879 zfree(zm);
6880}
6881
6b47e12e 6882/* ========================= Non type-specific commands ==================== */
6883
ed9b544e 6884static void flushdbCommand(redisClient *c) {
ca37e9cd 6885 server.dirty += dictSize(c->db->dict);
9b30e1a2 6886 touchWatchedKeysOnFlush(c->db->id);
3305306f 6887 dictEmpty(c->db->dict);
6888 dictEmpty(c->db->expires);
ed9b544e 6889 addReply(c,shared.ok);
ed9b544e 6890}
6891
6892static void flushallCommand(redisClient *c) {
9b30e1a2 6893 touchWatchedKeysOnFlush(-1);
ca37e9cd 6894 server.dirty += emptyDb();
ed9b544e 6895 addReply(c,shared.ok);
500ece7c 6896 if (server.bgsavechildpid != -1) {
6897 kill(server.bgsavechildpid,SIGKILL);
6898 rdbRemoveTempFile(server.bgsavechildpid);
6899 }
f78fd11b 6900 rdbSave(server.dbfilename);
ca37e9cd 6901 server.dirty++;
ed9b544e 6902}
6903
56906eef 6904static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 6905 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 6906 so->type = type;
6907 so->pattern = pattern;
6908 return so;
6909}
6910
6911/* Return the value associated to the key with a name obtained
55017f9d
PN
6912 * substituting the first occurence of '*' in 'pattern' with 'subst'.
6913 * The returned object will always have its refcount increased by 1
6914 * when it is non-NULL. */
56906eef 6915static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
6d7d1370 6916 char *p, *f;
ed9b544e 6917 sds spat, ssub;
6d7d1370
PN
6918 robj keyobj, fieldobj, *o;
6919 int prefixlen, sublen, postfixlen, fieldlen;
ed9b544e 6920 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
6921 struct {
f1017b3f 6922 long len;
6923 long free;
ed9b544e 6924 char buf[REDIS_SORTKEY_MAX+1];
6d7d1370 6925 } keyname, fieldname;
ed9b544e 6926
28173a49 6927 /* If the pattern is "#" return the substitution object itself in order
6928 * to implement the "SORT ... GET #" feature. */
6929 spat = pattern->ptr;
6930 if (spat[0] == '#' && spat[1] == '\0') {
55017f9d 6931 incrRefCount(subst);
28173a49 6932 return subst;
6933 }
6934
6935 /* The substitution object may be specially encoded. If so we create
9d65a1bb 6936 * a decoded object on the fly. Otherwise getDecodedObject will just
6937 * increment the ref count, that we'll decrement later. */
6938 subst = getDecodedObject(subst);
942a3961 6939
ed9b544e 6940 ssub = subst->ptr;
6941 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
6942 p = strchr(spat,'*');
ed5a857a 6943 if (!p) {
6944 decrRefCount(subst);
6945 return NULL;
6946 }
ed9b544e 6947
6d7d1370
PN
6948 /* Find out if we're dealing with a hash dereference. */
6949 if ((f = strstr(p+1, "->")) != NULL) {
6950 fieldlen = sdslen(spat)-(f-spat);
6951 /* this also copies \0 character */
6952 memcpy(fieldname.buf,f+2,fieldlen-1);
6953 fieldname.len = fieldlen-2;
6954 } else {
6955 fieldlen = 0;
6956 }
6957
ed9b544e 6958 prefixlen = p-spat;
6959 sublen = sdslen(ssub);
6d7d1370 6960 postfixlen = sdslen(spat)-(prefixlen+1)-fieldlen;
ed9b544e 6961 memcpy(keyname.buf,spat,prefixlen);
6962 memcpy(keyname.buf+prefixlen,ssub,sublen);
6963 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
6964 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
6965 keyname.len = prefixlen+sublen+postfixlen;
942a3961 6966 decrRefCount(subst);
6967
6d7d1370
PN
6968 /* Lookup substituted key */
6969 initStaticStringObject(keyobj,((char*)&keyname)+(sizeof(long)*2));
6970 o = lookupKeyRead(db,&keyobj);
55017f9d
PN
6971 if (o == NULL) return NULL;
6972
6973 if (fieldlen > 0) {
6974 if (o->type != REDIS_HASH || fieldname.len < 1) return NULL;
6d7d1370 6975
705dad38
PN
6976 /* Retrieve value from hash by the field name. This operation
6977 * already increases the refcount of the returned object. */
6d7d1370
PN
6978 initStaticStringObject(fieldobj,((char*)&fieldname)+(sizeof(long)*2));
6979 o = hashGet(o, &fieldobj);
705dad38 6980 } else {
55017f9d 6981 if (o->type != REDIS_STRING) return NULL;
b6f07345 6982
705dad38
PN
6983 /* Every object that this function returns needs to have its refcount
6984 * increased. sortCommand decreases it again. */
6985 incrRefCount(o);
6d7d1370
PN
6986 }
6987
6988 return o;
ed9b544e 6989}
6990
6991/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
6992 * the additional parameter is not standard but a BSD-specific we have to
6993 * pass sorting parameters via the global 'server' structure */
6994static int sortCompare(const void *s1, const void *s2) {
6995 const redisSortObject *so1 = s1, *so2 = s2;
6996 int cmp;
6997
6998 if (!server.sort_alpha) {
6999 /* Numeric sorting. Here it's trivial as we precomputed scores */
7000 if (so1->u.score > so2->u.score) {
7001 cmp = 1;
7002 } else if (so1->u.score < so2->u.score) {
7003 cmp = -1;
7004 } else {
7005 cmp = 0;
7006 }
7007 } else {
7008 /* Alphanumeric sorting */
7009 if (server.sort_bypattern) {
7010 if (!so1->u.cmpobj || !so2->u.cmpobj) {
7011 /* At least one compare object is NULL */
7012 if (so1->u.cmpobj == so2->u.cmpobj)
7013 cmp = 0;
7014 else if (so1->u.cmpobj == NULL)
7015 cmp = -1;
7016 else
7017 cmp = 1;
7018 } else {
7019 /* We have both the objects, use strcoll */
7020 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
7021 }
7022 } else {
08ee9b57 7023 /* Compare elements directly. */
7024 cmp = compareStringObjects(so1->obj,so2->obj);
ed9b544e 7025 }
7026 }
7027 return server.sort_desc ? -cmp : cmp;
7028}
7029
7030/* The SORT command is the most complex command in Redis. Warning: this code
7031 * is optimized for speed and a bit less for readability */
7032static void sortCommand(redisClient *c) {
ed9b544e 7033 list *operations;
7034 int outputlen = 0;
7035 int desc = 0, alpha = 0;
7036 int limit_start = 0, limit_count = -1, start, end;
7037 int j, dontsort = 0, vectorlen;
7038 int getop = 0; /* GET operation counter */
443c6409 7039 robj *sortval, *sortby = NULL, *storekey = NULL;
ed9b544e 7040 redisSortObject *vector; /* Resulting vector to sort */
7041
7042 /* Lookup the key to sort. It must be of the right types */
3305306f 7043 sortval = lookupKeyRead(c->db,c->argv[1]);
7044 if (sortval == NULL) {
4e27f268 7045 addReply(c,shared.emptymultibulk);
ed9b544e 7046 return;
7047 }
a5eb649b 7048 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
7049 sortval->type != REDIS_ZSET)
7050 {
c937aa89 7051 addReply(c,shared.wrongtypeerr);
ed9b544e 7052 return;
7053 }
7054
7055 /* Create a list of operations to perform for every sorted element.
7056 * Operations can be GET/DEL/INCR/DECR */
7057 operations = listCreate();
092dac2a 7058 listSetFreeMethod(operations,zfree);
ed9b544e 7059 j = 2;
7060
7061 /* Now we need to protect sortval incrementing its count, in the future
7062 * SORT may have options able to overwrite/delete keys during the sorting
7063 * and the sorted key itself may get destroied */
7064 incrRefCount(sortval);
7065
7066 /* The SORT command has an SQL-alike syntax, parse it */
7067 while(j < c->argc) {
7068 int leftargs = c->argc-j-1;
7069 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
7070 desc = 0;
7071 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
7072 desc = 1;
7073 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
7074 alpha = 1;
7075 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
7076 limit_start = atoi(c->argv[j+1]->ptr);
7077 limit_count = atoi(c->argv[j+2]->ptr);
7078 j+=2;
443c6409 7079 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
7080 storekey = c->argv[j+1];
7081 j++;
ed9b544e 7082 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
7083 sortby = c->argv[j+1];
7084 /* If the BY pattern does not contain '*', i.e. it is constant,
7085 * we don't need to sort nor to lookup the weight keys. */
7086 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
7087 j++;
7088 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
7089 listAddNodeTail(operations,createSortOperation(
7090 REDIS_SORT_GET,c->argv[j+1]));
7091 getop++;
7092 j++;
ed9b544e 7093 } else {
7094 decrRefCount(sortval);
7095 listRelease(operations);
c937aa89 7096 addReply(c,shared.syntaxerr);
ed9b544e 7097 return;
7098 }
7099 j++;
7100 }
7101
7102 /* Load the sorting vector with all the objects to sort */
a5eb649b 7103 switch(sortval->type) {
7104 case REDIS_LIST: vectorlen = listLength((list*)sortval->ptr); break;
7105 case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break;
7106 case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
f83c6cb5 7107 default: vectorlen = 0; redisPanic("Bad SORT type"); /* Avoid GCC warning */
a5eb649b 7108 }
ed9b544e 7109 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 7110 j = 0;
a5eb649b 7111
ed9b544e 7112 if (sortval->type == REDIS_LIST) {
7113 list *list = sortval->ptr;
6208b3a7 7114 listNode *ln;
c7df85a4 7115 listIter li;
6208b3a7 7116
c7df85a4 7117 listRewind(list,&li);
7118 while((ln = listNext(&li))) {
ed9b544e 7119 robj *ele = ln->value;
7120 vector[j].obj = ele;
7121 vector[j].u.score = 0;
7122 vector[j].u.cmpobj = NULL;
ed9b544e 7123 j++;
7124 }
7125 } else {
a5eb649b 7126 dict *set;
ed9b544e 7127 dictIterator *di;
7128 dictEntry *setele;
7129
a5eb649b 7130 if (sortval->type == REDIS_SET) {
7131 set = sortval->ptr;
7132 } else {
7133 zset *zs = sortval->ptr;
7134 set = zs->dict;
7135 }
7136
ed9b544e 7137 di = dictGetIterator(set);
ed9b544e 7138 while((setele = dictNext(di)) != NULL) {
7139 vector[j].obj = dictGetEntryKey(setele);
7140 vector[j].u.score = 0;
7141 vector[j].u.cmpobj = NULL;
7142 j++;
7143 }
7144 dictReleaseIterator(di);
7145 }
dfc5e96c 7146 redisAssert(j == vectorlen);
ed9b544e 7147
7148 /* Now it's time to load the right scores in the sorting vector */
7149 if (dontsort == 0) {
7150 for (j = 0; j < vectorlen; j++) {
6d7d1370 7151 robj *byval;
ed9b544e 7152 if (sortby) {
6d7d1370 7153 /* lookup value to sort by */
3305306f 7154 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
705dad38 7155 if (!byval) continue;
ed9b544e 7156 } else {
6d7d1370
PN
7157 /* use object itself to sort by */
7158 byval = vector[j].obj;
7159 }
7160
7161 if (alpha) {
08ee9b57 7162 if (sortby) vector[j].u.cmpobj = getDecodedObject(byval);
6d7d1370
PN
7163 } else {
7164 if (byval->encoding == REDIS_ENCODING_RAW) {
7165 vector[j].u.score = strtod(byval->ptr,NULL);
16fa22f1 7166 } else if (byval->encoding == REDIS_ENCODING_INT) {
6d7d1370
PN
7167 /* Don't need to decode the object if it's
7168 * integer-encoded (the only encoding supported) so
7169 * far. We can just cast it */
16fa22f1
PN
7170 vector[j].u.score = (long)byval->ptr;
7171 } else {
7172 redisAssert(1 != 1);
942a3961 7173 }
ed9b544e 7174 }
6d7d1370 7175
705dad38
PN
7176 /* when the object was retrieved using lookupKeyByPattern,
7177 * its refcount needs to be decreased. */
7178 if (sortby) {
7179 decrRefCount(byval);
ed9b544e 7180 }
7181 }
7182 }
7183
7184 /* We are ready to sort the vector... perform a bit of sanity check
7185 * on the LIMIT option too. We'll use a partial version of quicksort. */
7186 start = (limit_start < 0) ? 0 : limit_start;
7187 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
7188 if (start >= vectorlen) {
7189 start = vectorlen-1;
7190 end = vectorlen-2;
7191 }
7192 if (end >= vectorlen) end = vectorlen-1;
7193
7194 if (dontsort == 0) {
7195 server.sort_desc = desc;
7196 server.sort_alpha = alpha;
7197 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 7198 if (sortby && (start != 0 || end != vectorlen-1))
7199 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
7200 else
7201 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 7202 }
7203
7204 /* Send command output to the output buffer, performing the specified
7205 * GET/DEL/INCR/DECR operations if any. */
7206 outputlen = getop ? getop*(end-start+1) : end-start+1;
443c6409 7207 if (storekey == NULL) {
7208 /* STORE option not specified, sent the sorting result to client */
7209 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
7210 for (j = start; j <= end; j++) {
7211 listNode *ln;
c7df85a4 7212 listIter li;
7213
dd88747b 7214 if (!getop) addReplyBulk(c,vector[j].obj);
c7df85a4 7215 listRewind(operations,&li);
7216 while((ln = listNext(&li))) {
443c6409 7217 redisSortOperation *sop = ln->value;
7218 robj *val = lookupKeyByPattern(c->db,sop->pattern,
7219 vector[j].obj);
7220
7221 if (sop->type == REDIS_SORT_GET) {
55017f9d 7222 if (!val) {
443c6409 7223 addReply(c,shared.nullbulk);
7224 } else {
dd88747b 7225 addReplyBulk(c,val);
55017f9d 7226 decrRefCount(val);
443c6409 7227 }
7228 } else {
dfc5e96c 7229 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
443c6409 7230 }
7231 }
ed9b544e 7232 }
443c6409 7233 } else {
7234 robj *listObject = createListObject();
7235 list *listPtr = (list*) listObject->ptr;
7236
7237 /* STORE option specified, set the sorting result as a List object */
7238 for (j = start; j <= end; j++) {
7239 listNode *ln;
c7df85a4 7240 listIter li;
7241
443c6409 7242 if (!getop) {
7243 listAddNodeTail(listPtr,vector[j].obj);
7244 incrRefCount(vector[j].obj);
7245 }
c7df85a4 7246 listRewind(operations,&li);
7247 while((ln = listNext(&li))) {
443c6409 7248 redisSortOperation *sop = ln->value;
7249 robj *val = lookupKeyByPattern(c->db,sop->pattern,
7250 vector[j].obj);
7251
7252 if (sop->type == REDIS_SORT_GET) {
55017f9d 7253 if (!val) {
443c6409 7254 listAddNodeTail(listPtr,createStringObject("",0));
7255 } else {
55017f9d
PN
7256 /* We should do a incrRefCount on val because it is
7257 * added to the list, but also a decrRefCount because
7258 * it is returned by lookupKeyByPattern. This results
7259 * in doing nothing at all. */
443c6409 7260 listAddNodeTail(listPtr,val);
443c6409 7261 }
ed9b544e 7262 } else {
dfc5e96c 7263 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
ed9b544e 7264 }
ed9b544e 7265 }
ed9b544e 7266 }
09241813 7267 dbReplace(c->db,storekey,listObject);
443c6409 7268 /* Note: we add 1 because the DB is dirty anyway since even if the
7269 * SORT result is empty a new key is set and maybe the old content
7270 * replaced. */
7271 server.dirty += 1+outputlen;
7272 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
ed9b544e 7273 }
7274
7275 /* Cleanup */
7276 decrRefCount(sortval);
7277 listRelease(operations);
7278 for (j = 0; j < vectorlen; j++) {
16fa22f1 7279 if (alpha && vector[j].u.cmpobj)
ed9b544e 7280 decrRefCount(vector[j].u.cmpobj);
7281 }
7282 zfree(vector);
7283}
7284
ec6c7a1d 7285/* Convert an amount of bytes into a human readable string in the form
7286 * of 100B, 2G, 100M, 4K, and so forth. */
7287static void bytesToHuman(char *s, unsigned long long n) {
7288 double d;
7289
7290 if (n < 1024) {
7291 /* Bytes */
7292 sprintf(s,"%lluB",n);
7293 return;
7294 } else if (n < (1024*1024)) {
7295 d = (double)n/(1024);
7296 sprintf(s,"%.2fK",d);
7297 } else if (n < (1024LL*1024*1024)) {
7298 d = (double)n/(1024*1024);
7299 sprintf(s,"%.2fM",d);
7300 } else if (n < (1024LL*1024*1024*1024)) {
7301 d = (double)n/(1024LL*1024*1024);
b72f6a4b 7302 sprintf(s,"%.2fG",d);
ec6c7a1d 7303 }
7304}
7305
1c85b79f 7306/* Create the string returned by the INFO command. This is decoupled
7307 * by the INFO command itself as we need to report the same information
7308 * on memory corruption problems. */
7309static sds genRedisInfoString(void) {
ed9b544e 7310 sds info;
7311 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 7312 int j;
ec6c7a1d 7313 char hmem[64];
55a8298f 7314
b72f6a4b 7315 bytesToHuman(hmem,zmalloc_used_memory());
ed9b544e 7316 info = sdscatprintf(sdsempty(),
7317 "redis_version:%s\r\n"
5436146c
PN
7318 "redis_git_sha1:%s\r\n"
7319 "redis_git_dirty:%d\r\n"
f1017b3f 7320 "arch_bits:%s\r\n"
7a932b74 7321 "multiplexing_api:%s\r\n"
0d7170a4 7322 "process_id:%ld\r\n"
682ac724 7323 "uptime_in_seconds:%ld\r\n"
7324 "uptime_in_days:%ld\r\n"
ed9b544e 7325 "connected_clients:%d\r\n"
7326 "connected_slaves:%d\r\n"
f86a74e9 7327 "blocked_clients:%d\r\n"
5fba9f71 7328 "used_memory:%zu\r\n"
ec6c7a1d 7329 "used_memory_human:%s\r\n"
ed9b544e 7330 "changes_since_last_save:%lld\r\n"
be2bb6b0 7331 "bgsave_in_progress:%d\r\n"
682ac724 7332 "last_save_time:%ld\r\n"
b3fad521 7333 "bgrewriteaof_in_progress:%d\r\n"
ed9b544e 7334 "total_connections_received:%lld\r\n"
7335 "total_commands_processed:%lld\r\n"
2a6a2ed1 7336 "expired_keys:%lld\r\n"
3be2c9d7 7337 "hash_max_zipmap_entries:%zu\r\n"
7338 "hash_max_zipmap_value:%zu\r\n"
ffc6b7f8 7339 "pubsub_channels:%ld\r\n"
7340 "pubsub_patterns:%u\r\n"
7d98e08c 7341 "vm_enabled:%d\r\n"
a0f643ea 7342 "role:%s\r\n"
ed9b544e 7343 ,REDIS_VERSION,
5436146c 7344 REDIS_GIT_SHA1,
274e45e3 7345 strtol(REDIS_GIT_DIRTY,NULL,10) > 0,
f1017b3f 7346 (sizeof(long) == 8) ? "64" : "32",
7a932b74 7347 aeGetApiName(),
0d7170a4 7348 (long) getpid(),
a0f643ea 7349 uptime,
7350 uptime/(3600*24),
ed9b544e 7351 listLength(server.clients)-listLength(server.slaves),
7352 listLength(server.slaves),
d5d55fc3 7353 server.blpop_blocked_clients,
b72f6a4b 7354 zmalloc_used_memory(),
ec6c7a1d 7355 hmem,
ed9b544e 7356 server.dirty,
9d65a1bb 7357 server.bgsavechildpid != -1,
ed9b544e 7358 server.lastsave,
b3fad521 7359 server.bgrewritechildpid != -1,
ed9b544e 7360 server.stat_numconnections,
7361 server.stat_numcommands,
2a6a2ed1 7362 server.stat_expiredkeys,
55a8298f 7363 server.hash_max_zipmap_entries,
7364 server.hash_max_zipmap_value,
ffc6b7f8 7365 dictSize(server.pubsub_channels),
7366 listLength(server.pubsub_patterns),
7d98e08c 7367 server.vm_enabled != 0,
a0f643ea 7368 server.masterhost == NULL ? "master" : "slave"
ed9b544e 7369 );
a0f643ea 7370 if (server.masterhost) {
7371 info = sdscatprintf(info,
7372 "master_host:%s\r\n"
7373 "master_port:%d\r\n"
7374 "master_link_status:%s\r\n"
7375 "master_last_io_seconds_ago:%d\r\n"
7376 ,server.masterhost,
7377 server.masterport,
7378 (server.replstate == REDIS_REPL_CONNECTED) ?
7379 "up" : "down",
f72b934d 7380 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 7381 );
7382 }
7d98e08c 7383 if (server.vm_enabled) {
1064ef87 7384 lockThreadedIO();
7d98e08c 7385 info = sdscatprintf(info,
7386 "vm_conf_max_memory:%llu\r\n"
7387 "vm_conf_page_size:%llu\r\n"
7388 "vm_conf_pages:%llu\r\n"
7389 "vm_stats_used_pages:%llu\r\n"
7390 "vm_stats_swapped_objects:%llu\r\n"
7391 "vm_stats_swappin_count:%llu\r\n"
7392 "vm_stats_swappout_count:%llu\r\n"
b9bc0eef 7393 "vm_stats_io_newjobs_len:%lu\r\n"
7394 "vm_stats_io_processing_len:%lu\r\n"
7395 "vm_stats_io_processed_len:%lu\r\n"
25fd2cb2 7396 "vm_stats_io_active_threads:%lu\r\n"
d5d55fc3 7397 "vm_stats_blocked_clients:%lu\r\n"
7d98e08c 7398 ,(unsigned long long) server.vm_max_memory,
7399 (unsigned long long) server.vm_page_size,
7400 (unsigned long long) server.vm_pages,
7401 (unsigned long long) server.vm_stats_used_pages,
7402 (unsigned long long) server.vm_stats_swapped_objects,
7403 (unsigned long long) server.vm_stats_swapins,
b9bc0eef 7404 (unsigned long long) server.vm_stats_swapouts,
7405 (unsigned long) listLength(server.io_newjobs),
7406 (unsigned long) listLength(server.io_processing),
7407 (unsigned long) listLength(server.io_processed),
d5d55fc3 7408 (unsigned long) server.io_active_threads,
7409 (unsigned long) server.vm_blocked_clients
7d98e08c 7410 );
1064ef87 7411 unlockThreadedIO();
7d98e08c 7412 }
c3cb078d 7413 for (j = 0; j < server.dbnum; j++) {
7414 long long keys, vkeys;
7415
7416 keys = dictSize(server.db[j].dict);
7417 vkeys = dictSize(server.db[j].expires);
7418 if (keys || vkeys) {
9d65a1bb 7419 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
c3cb078d 7420 j, keys, vkeys);
7421 }
7422 }
1c85b79f 7423 return info;
7424}
7425
7426static void infoCommand(redisClient *c) {
7427 sds info = genRedisInfoString();
83c6a618 7428 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
7429 (unsigned long)sdslen(info)));
ed9b544e 7430 addReplySds(c,info);
70003d28 7431 addReply(c,shared.crlf);
ed9b544e 7432}
7433
3305306f 7434static void monitorCommand(redisClient *c) {
7435 /* ignore MONITOR if aleady slave or in monitor mode */
7436 if (c->flags & REDIS_SLAVE) return;
7437
7438 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
7439 c->slaveseldb = 0;
6b47e12e 7440 listAddNodeTail(server.monitors,c);
3305306f 7441 addReply(c,shared.ok);
7442}
7443
7444/* ================================= Expire ================================= */
7445static int removeExpire(redisDb *db, robj *key) {
09241813 7446 if (dictDelete(db->expires,key->ptr) == DICT_OK) {
3305306f 7447 return 1;
7448 } else {
7449 return 0;
7450 }
7451}
7452
7453static int setExpire(redisDb *db, robj *key, time_t when) {
09241813 7454 sds copy = sdsdup(key->ptr);
7455 if (dictAdd(db->expires,copy,(void*)when) == DICT_ERR) {
7456 sdsfree(copy);
3305306f 7457 return 0;
7458 } else {
3305306f 7459 return 1;
7460 }
7461}
7462
bb32ede5 7463/* Return the expire time of the specified key, or -1 if no expire
7464 * is associated with this key (i.e. the key is non volatile) */
7465static time_t getExpire(redisDb *db, robj *key) {
7466 dictEntry *de;
7467
7468 /* No expire? return ASAP */
7469 if (dictSize(db->expires) == 0 ||
09241813 7470 (de = dictFind(db->expires,key->ptr)) == NULL) return -1;
bb32ede5 7471
7472 return (time_t) dictGetEntryVal(de);
7473}
7474
3305306f 7475static int expireIfNeeded(redisDb *db, robj *key) {
7476 time_t when;
7477 dictEntry *de;
7478
7479 /* No expire? return ASAP */
7480 if (dictSize(db->expires) == 0 ||
09241813 7481 (de = dictFind(db->expires,key->ptr)) == NULL) return 0;
3305306f 7482
7483 /* Lookup the expire */
7484 when = (time_t) dictGetEntryVal(de);
7485 if (time(NULL) <= when) return 0;
7486
7487 /* Delete the key */
09241813 7488 dbDelete(db,key);
2a6a2ed1 7489 server.stat_expiredkeys++;
09241813 7490 return 1;
3305306f 7491}
7492
7493static int deleteIfVolatile(redisDb *db, robj *key) {
7494 dictEntry *de;
7495
7496 /* No expire? return ASAP */
7497 if (dictSize(db->expires) == 0 ||
09241813 7498 (de = dictFind(db->expires,key->ptr)) == NULL) return 0;
3305306f 7499
7500 /* Delete the key */
0c66a471 7501 server.dirty++;
2a6a2ed1 7502 server.stat_expiredkeys++;
09241813 7503 dictDelete(db->expires,key->ptr);
7504 return dictDelete(db->dict,key->ptr) == DICT_OK;
3305306f 7505}
7506
bbe025e0 7507static void expireGenericCommand(redisClient *c, robj *key, robj *param, long offset) {
3305306f 7508 dictEntry *de;
bbe025e0
AM
7509 time_t seconds;
7510
bd79a6bd 7511 if (getLongFromObjectOrReply(c, param, &seconds, NULL) != REDIS_OK) return;
bbe025e0
AM
7512
7513 seconds -= offset;
3305306f 7514
09241813 7515 de = dictFind(c->db->dict,key->ptr);
3305306f 7516 if (de == NULL) {
7517 addReply(c,shared.czero);
7518 return;
7519 }
d4dd6556 7520 if (seconds <= 0) {
09241813 7521 if (dbDelete(c->db,key)) server.dirty++;
43e5ccdf 7522 addReply(c, shared.cone);
3305306f 7523 return;
7524 } else {
7525 time_t when = time(NULL)+seconds;
802e8373 7526 if (setExpire(c->db,key,when)) {
3305306f 7527 addReply(c,shared.cone);
77423026 7528 server.dirty++;
7529 } else {
3305306f 7530 addReply(c,shared.czero);
77423026 7531 }
3305306f 7532 return;
7533 }
7534}
7535
802e8373 7536static void expireCommand(redisClient *c) {
bbe025e0 7537 expireGenericCommand(c,c->argv[1],c->argv[2],0);
802e8373 7538}
7539
7540static void expireatCommand(redisClient *c) {
bbe025e0 7541 expireGenericCommand(c,c->argv[1],c->argv[2],time(NULL));
802e8373 7542}
7543
fd88489a 7544static void ttlCommand(redisClient *c) {
7545 time_t expire;
7546 int ttl = -1;
7547
7548 expire = getExpire(c->db,c->argv[1]);
7549 if (expire != -1) {
7550 ttl = (int) (expire-time(NULL));
7551 if (ttl < 0) ttl = -1;
7552 }
7553 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
7554}
7555
6e469882 7556/* ================================ MULTI/EXEC ============================== */
7557
7558/* Client state initialization for MULTI/EXEC */
7559static void initClientMultiState(redisClient *c) {
7560 c->mstate.commands = NULL;
7561 c->mstate.count = 0;
7562}
7563
7564/* Release all the resources associated with MULTI/EXEC state */
7565static void freeClientMultiState(redisClient *c) {
7566 int j;
7567
7568 for (j = 0; j < c->mstate.count; j++) {
7569 int i;
7570 multiCmd *mc = c->mstate.commands+j;
7571
7572 for (i = 0; i < mc->argc; i++)
7573 decrRefCount(mc->argv[i]);
7574 zfree(mc->argv);
7575 }
7576 zfree(c->mstate.commands);
7577}
7578
7579/* Add a new command into the MULTI commands queue */
7580static void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
7581 multiCmd *mc;
7582 int j;
7583
7584 c->mstate.commands = zrealloc(c->mstate.commands,
7585 sizeof(multiCmd)*(c->mstate.count+1));
7586 mc = c->mstate.commands+c->mstate.count;
7587 mc->cmd = cmd;
7588 mc->argc = c->argc;
7589 mc->argv = zmalloc(sizeof(robj*)*c->argc);
7590 memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
7591 for (j = 0; j < c->argc; j++)
7592 incrRefCount(mc->argv[j]);
7593 c->mstate.count++;
7594}
7595
7596static void multiCommand(redisClient *c) {
6531c94d 7597 if (c->flags & REDIS_MULTI) {
7598 addReplySds(c,sdsnew("-ERR MULTI calls can not be nested\r\n"));
7599 return;
7600 }
6e469882 7601 c->flags |= REDIS_MULTI;
36c548f0 7602 addReply(c,shared.ok);
6e469882 7603}
7604
18b6cb76
DJ
7605static void discardCommand(redisClient *c) {
7606 if (!(c->flags & REDIS_MULTI)) {
7607 addReplySds(c,sdsnew("-ERR DISCARD without MULTI\r\n"));
7608 return;
7609 }
7610
7611 freeClientMultiState(c);
7612 initClientMultiState(c);
7613 c->flags &= (~REDIS_MULTI);
7614 addReply(c,shared.ok);
7615}
7616
66c8853f 7617/* Send a MULTI command to all the slaves and AOF file. Check the execCommand
7618 * implememntation for more information. */
7619static void execCommandReplicateMulti(redisClient *c) {
7620 struct redisCommand *cmd;
7621 robj *multistring = createStringObject("MULTI",5);
7622
7623 cmd = lookupCommand("multi");
7624 if (server.appendonly)
7625 feedAppendOnlyFile(cmd,c->db->id,&multistring,1);
7626 if (listLength(server.slaves))
7627 replicationFeedSlaves(server.slaves,c->db->id,&multistring,1);
7628 decrRefCount(multistring);
7629}
7630
6e469882 7631static void execCommand(redisClient *c) {
7632 int j;
7633 robj **orig_argv;
7634 int orig_argc;
7635
7636 if (!(c->flags & REDIS_MULTI)) {
7637 addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
7638 return;
7639 }
7640
37ab76c9 7641 /* Check if we need to abort the EXEC if some WATCHed key was touched.
7642 * A failed EXEC will return a multi bulk nil object. */
7643 if (c->flags & REDIS_DIRTY_CAS) {
7644 freeClientMultiState(c);
7645 initClientMultiState(c);
7646 c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
7647 unwatchAllKeys(c);
7648 addReply(c,shared.nullmultibulk);
7649 return;
7650 }
7651
66c8853f 7652 /* Replicate a MULTI request now that we are sure the block is executed.
7653 * This way we'll deliver the MULTI/..../EXEC block as a whole and
7654 * both the AOF and the replication link will have the same consistency
7655 * and atomicity guarantees. */
7656 execCommandReplicateMulti(c);
7657
7658 /* Exec all the queued commands */
1ad4d316 7659 unwatchAllKeys(c); /* Unwatch ASAP otherwise we'll waste CPU cycles */
6e469882 7660 orig_argv = c->argv;
7661 orig_argc = c->argc;
7662 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
7663 for (j = 0; j < c->mstate.count; j++) {
7664 c->argc = c->mstate.commands[j].argc;
7665 c->argv = c->mstate.commands[j].argv;
7666 call(c,c->mstate.commands[j].cmd);
7667 }
7668 c->argv = orig_argv;
7669 c->argc = orig_argc;
7670 freeClientMultiState(c);
7671 initClientMultiState(c);
1ad4d316 7672 c->flags &= ~(REDIS_MULTI|REDIS_DIRTY_CAS);
66c8853f 7673 /* Make sure the EXEC command is always replicated / AOF, since we
7674 * always send the MULTI command (we can't know beforehand if the
7675 * next operations will contain at least a modification to the DB). */
7676 server.dirty++;
6e469882 7677}
7678
4409877e 7679/* =========================== Blocking Operations ========================= */
7680
7681/* Currently Redis blocking operations support is limited to list POP ops,
7682 * so the current implementation is not fully generic, but it is also not
7683 * completely specific so it will not require a rewrite to support new
7684 * kind of blocking operations in the future.
7685 *
7686 * Still it's important to note that list blocking operations can be already
7687 * used as a notification mechanism in order to implement other blocking
7688 * operations at application level, so there must be a very strong evidence
7689 * of usefulness and generality before new blocking operations are implemented.
7690 *
7691 * This is how the current blocking POP works, we use BLPOP as example:
7692 * - If the user calls BLPOP and the key exists and contains a non empty list
7693 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
7694 * if there is not to block.
7695 * - If instead BLPOP is called and the key does not exists or the list is
7696 * empty we need to block. In order to do so we remove the notification for
7697 * new data to read in the client socket (so that we'll not serve new
7698 * requests if the blocking request is not served). Also we put the client
37ab76c9 7699 * in a dictionary (db->blocking_keys) mapping keys to a list of clients
4409877e 7700 * blocking for this keys.
7701 * - If a PUSH operation against a key with blocked clients waiting is
7702 * performed, we serve the first in the list: basically instead to push
7703 * the new element inside the list we return it to the (first / oldest)
7704 * blocking client, unblock the client, and remove it form the list.
7705 *
7706 * The above comment and the source code should be enough in order to understand
7707 * the implementation and modify / fix it later.
7708 */
7709
7710/* Set a client in blocking mode for the specified key, with the specified
7711 * timeout */
b177fd30 7712static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
4409877e 7713 dictEntry *de;
7714 list *l;
b177fd30 7715 int j;
4409877e 7716
37ab76c9 7717 c->blocking_keys = zmalloc(sizeof(robj*)*numkeys);
7718 c->blocking_keys_num = numkeys;
4409877e 7719 c->blockingto = timeout;
b177fd30 7720 for (j = 0; j < numkeys; j++) {
7721 /* Add the key in the client structure, to map clients -> keys */
37ab76c9 7722 c->blocking_keys[j] = keys[j];
b177fd30 7723 incrRefCount(keys[j]);
4409877e 7724
b177fd30 7725 /* And in the other "side", to map keys -> clients */
37ab76c9 7726 de = dictFind(c->db->blocking_keys,keys[j]);
b177fd30 7727 if (de == NULL) {
7728 int retval;
7729
7730 /* For every key we take a list of clients blocked for it */
7731 l = listCreate();
37ab76c9 7732 retval = dictAdd(c->db->blocking_keys,keys[j],l);
b177fd30 7733 incrRefCount(keys[j]);
7734 assert(retval == DICT_OK);
7735 } else {
7736 l = dictGetEntryVal(de);
7737 }
7738 listAddNodeTail(l,c);
4409877e 7739 }
b177fd30 7740 /* Mark the client as a blocked client */
4409877e 7741 c->flags |= REDIS_BLOCKED;
d5d55fc3 7742 server.blpop_blocked_clients++;
4409877e 7743}
7744
7745/* Unblock a client that's waiting in a blocking operation such as BLPOP */
b0d8747d 7746static void unblockClientWaitingData(redisClient *c) {
4409877e 7747 dictEntry *de;
7748 list *l;
b177fd30 7749 int j;
4409877e 7750
37ab76c9 7751 assert(c->blocking_keys != NULL);
b177fd30 7752 /* The client may wait for multiple keys, so unblock it for every key. */
37ab76c9 7753 for (j = 0; j < c->blocking_keys_num; j++) {
b177fd30 7754 /* Remove this client from the list of clients waiting for this key. */
37ab76c9 7755 de = dictFind(c->db->blocking_keys,c->blocking_keys[j]);
b177fd30 7756 assert(de != NULL);
7757 l = dictGetEntryVal(de);
7758 listDelNode(l,listSearchKey(l,c));
7759 /* If the list is empty we need to remove it to avoid wasting memory */
7760 if (listLength(l) == 0)
37ab76c9 7761 dictDelete(c->db->blocking_keys,c->blocking_keys[j]);
7762 decrRefCount(c->blocking_keys[j]);
b177fd30 7763 }
7764 /* Cleanup the client structure */
37ab76c9 7765 zfree(c->blocking_keys);
7766 c->blocking_keys = NULL;
4409877e 7767 c->flags &= (~REDIS_BLOCKED);
d5d55fc3 7768 server.blpop_blocked_clients--;
5921aa36 7769 /* We want to process data if there is some command waiting
b0d8747d 7770 * in the input buffer. Note that this is safe even if
7771 * unblockClientWaitingData() gets called from freeClient() because
7772 * freeClient() will be smart enough to call this function
7773 * *after* c->querybuf was set to NULL. */
4409877e 7774 if (c->querybuf && sdslen(c->querybuf) > 0) processInputBuffer(c);
7775}
7776
7777/* This should be called from any function PUSHing into lists.
7778 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
7779 * 'ele' is the element pushed.
7780 *
7781 * If the function returns 0 there was no client waiting for a list push
7782 * against this key.
7783 *
7784 * If the function returns 1 there was a client waiting for a list push
7785 * against this key, the element was passed to this client thus it's not
7786 * needed to actually add it to the list and the caller should return asap. */
7787static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
7788 struct dictEntry *de;
7789 redisClient *receiver;
7790 list *l;
7791 listNode *ln;
7792
37ab76c9 7793 de = dictFind(c->db->blocking_keys,key);
4409877e 7794 if (de == NULL) return 0;
7795 l = dictGetEntryVal(de);
7796 ln = listFirst(l);
7797 assert(ln != NULL);
7798 receiver = ln->value;
4409877e 7799
b177fd30 7800 addReplySds(receiver,sdsnew("*2\r\n"));
dd88747b 7801 addReplyBulk(receiver,key);
7802 addReplyBulk(receiver,ele);
b0d8747d 7803 unblockClientWaitingData(receiver);
4409877e 7804 return 1;
7805}
7806
7807/* Blocking RPOP/LPOP */
7808static void blockingPopGenericCommand(redisClient *c, int where) {
7809 robj *o;
7810 time_t timeout;
b177fd30 7811 int j;
4409877e 7812
b177fd30 7813 for (j = 1; j < c->argc-1; j++) {
7814 o = lookupKeyWrite(c->db,c->argv[j]);
7815 if (o != NULL) {
7816 if (o->type != REDIS_LIST) {
7817 addReply(c,shared.wrongtypeerr);
4409877e 7818 return;
b177fd30 7819 } else {
7820 list *list = o->ptr;
7821 if (listLength(list) != 0) {
7822 /* If the list contains elements fall back to the usual
7823 * non-blocking POP operation */
7824 robj *argv[2], **orig_argv;
7825 int orig_argc;
e0a62c7f 7826
b177fd30 7827 /* We need to alter the command arguments before to call
7828 * popGenericCommand() as the command takes a single key. */
7829 orig_argv = c->argv;
7830 orig_argc = c->argc;
7831 argv[1] = c->argv[j];
7832 c->argv = argv;
7833 c->argc = 2;
7834
7835 /* Also the return value is different, we need to output
7836 * the multi bulk reply header and the key name. The
7837 * "real" command will add the last element (the value)
7838 * for us. If this souds like an hack to you it's just
7839 * because it is... */
7840 addReplySds(c,sdsnew("*2\r\n"));
dd88747b 7841 addReplyBulk(c,argv[1]);
b177fd30 7842 popGenericCommand(c,where);
7843
7844 /* Fix the client structure with the original stuff */
7845 c->argv = orig_argv;
7846 c->argc = orig_argc;
7847 return;
7848 }
4409877e 7849 }
7850 }
7851 }
7852 /* If the list is empty or the key does not exists we must block */
b177fd30 7853 timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
4409877e 7854 if (timeout > 0) timeout += time(NULL);
b177fd30 7855 blockForKeys(c,c->argv+1,c->argc-2,timeout);
4409877e 7856}
7857
7858static void blpopCommand(redisClient *c) {
7859 blockingPopGenericCommand(c,REDIS_HEAD);
7860}
7861
7862static void brpopCommand(redisClient *c) {
7863 blockingPopGenericCommand(c,REDIS_TAIL);
7864}
7865
ed9b544e 7866/* =============================== Replication ============================= */
7867
a4d1ba9a 7868static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 7869 ssize_t nwritten, ret = size;
7870 time_t start = time(NULL);
7871
7872 timeout++;
7873 while(size) {
7874 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
7875 nwritten = write(fd,ptr,size);
7876 if (nwritten == -1) return -1;
7877 ptr += nwritten;
7878 size -= nwritten;
7879 }
7880 if ((time(NULL)-start) > timeout) {
7881 errno = ETIMEDOUT;
7882 return -1;
7883 }
7884 }
7885 return ret;
7886}
7887
a4d1ba9a 7888static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 7889 ssize_t nread, totread = 0;
7890 time_t start = time(NULL);
7891
7892 timeout++;
7893 while(size) {
7894 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
7895 nread = read(fd,ptr,size);
7896 if (nread == -1) return -1;
7897 ptr += nread;
7898 size -= nread;
7899 totread += nread;
7900 }
7901 if ((time(NULL)-start) > timeout) {
7902 errno = ETIMEDOUT;
7903 return -1;
7904 }
7905 }
7906 return totread;
7907}
7908
7909static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
7910 ssize_t nread = 0;
7911
7912 size--;
7913 while(size) {
7914 char c;
7915
7916 if (syncRead(fd,&c,1,timeout) == -1) return -1;
7917 if (c == '\n') {
7918 *ptr = '\0';
7919 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
7920 return nread;
7921 } else {
7922 *ptr++ = c;
7923 *ptr = '\0';
7924 nread++;
7925 }
7926 }
7927 return nread;
7928}
7929
7930static void syncCommand(redisClient *c) {
40d224a9 7931 /* ignore SYNC if aleady slave or in monitor mode */
7932 if (c->flags & REDIS_SLAVE) return;
7933
7934 /* SYNC can't be issued when the server has pending data to send to
7935 * the client about already issued commands. We need a fresh reply
7936 * buffer registering the differences between the BGSAVE and the current
7937 * dataset, so that we can copy to other slaves if needed. */
7938 if (listLength(c->reply) != 0) {
7939 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
7940 return;
7941 }
7942
7943 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
7944 /* Here we need to check if there is a background saving operation
7945 * in progress, or if it is required to start one */
9d65a1bb 7946 if (server.bgsavechildpid != -1) {
40d224a9 7947 /* Ok a background save is in progress. Let's check if it is a good
7948 * one for replication, i.e. if there is another slave that is
7949 * registering differences since the server forked to save */
7950 redisClient *slave;
7951 listNode *ln;
c7df85a4 7952 listIter li;
40d224a9 7953
c7df85a4 7954 listRewind(server.slaves,&li);
7955 while((ln = listNext(&li))) {
40d224a9 7956 slave = ln->value;
7957 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 7958 }
7959 if (ln) {
7960 /* Perfect, the server is already registering differences for
7961 * another slave. Set the right state, and copy the buffer. */
7962 listRelease(c->reply);
7963 c->reply = listDup(slave->reply);
40d224a9 7964 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
7965 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
7966 } else {
7967 /* No way, we need to wait for the next BGSAVE in order to
7968 * register differences */
7969 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
7970 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
7971 }
7972 } else {
7973 /* Ok we don't have a BGSAVE in progress, let's start one */
7974 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
7975 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
7976 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
7977 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
7978 return;
7979 }
7980 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
7981 }
6208b3a7 7982 c->repldbfd = -1;
40d224a9 7983 c->flags |= REDIS_SLAVE;
7984 c->slaveseldb = 0;
6b47e12e 7985 listAddNodeTail(server.slaves,c);
40d224a9 7986 return;
7987}
7988
6208b3a7 7989static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
7990 redisClient *slave = privdata;
7991 REDIS_NOTUSED(el);
7992 REDIS_NOTUSED(mask);
7993 char buf[REDIS_IOBUF_LEN];
7994 ssize_t nwritten, buflen;
7995
7996 if (slave->repldboff == 0) {
7997 /* Write the bulk write count before to transfer the DB. In theory here
7998 * we don't know how much room there is in the output buffer of the
7999 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
8000 * operations) will never be smaller than the few bytes we need. */
8001 sds bulkcount;
8002
8003 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
8004 slave->repldbsize);
8005 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
8006 {
8007 sdsfree(bulkcount);
8008 freeClient(slave);
8009 return;
8010 }
8011 sdsfree(bulkcount);
8012 }
8013 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
8014 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
8015 if (buflen <= 0) {
8016 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
8017 (buflen == 0) ? "premature EOF" : strerror(errno));
8018 freeClient(slave);
8019 return;
8020 }
8021 if ((nwritten = write(fd,buf,buflen)) == -1) {
f870935d 8022 redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
6208b3a7 8023 strerror(errno));
8024 freeClient(slave);
8025 return;
8026 }
8027 slave->repldboff += nwritten;
8028 if (slave->repldboff == slave->repldbsize) {
8029 close(slave->repldbfd);
8030 slave->repldbfd = -1;
8031 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
8032 slave->replstate = REDIS_REPL_ONLINE;
8033 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
266373b2 8034 sendReplyToClient, slave) == AE_ERR) {
6208b3a7 8035 freeClient(slave);
8036 return;
8037 }
8038 addReplySds(slave,sdsempty());
8039 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
8040 }
8041}
ed9b544e 8042
a3b21203 8043/* This function is called at the end of every backgrond saving.
8044 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
8045 * otherwise REDIS_ERR is passed to the function.
8046 *
8047 * The goal of this function is to handle slaves waiting for a successful
8048 * background saving in order to perform non-blocking synchronization. */
8049static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 8050 listNode *ln;
8051 int startbgsave = 0;
c7df85a4 8052 listIter li;
ed9b544e 8053
c7df85a4 8054 listRewind(server.slaves,&li);
8055 while((ln = listNext(&li))) {
6208b3a7 8056 redisClient *slave = ln->value;
ed9b544e 8057
6208b3a7 8058 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
8059 startbgsave = 1;
8060 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
8061 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 8062 struct redis_stat buf;
e0a62c7f 8063
6208b3a7 8064 if (bgsaveerr != REDIS_OK) {
8065 freeClient(slave);
8066 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
8067 continue;
8068 }
8069 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 8070 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 8071 freeClient(slave);
8072 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
8073 continue;
8074 }
8075 slave->repldboff = 0;
8076 slave->repldbsize = buf.st_size;
8077 slave->replstate = REDIS_REPL_SEND_BULK;
8078 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
266373b2 8079 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
6208b3a7 8080 freeClient(slave);
8081 continue;
8082 }
8083 }
ed9b544e 8084 }
6208b3a7 8085 if (startbgsave) {
8086 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
c7df85a4 8087 listIter li;
8088
8089 listRewind(server.slaves,&li);
6208b3a7 8090 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
c7df85a4 8091 while((ln = listNext(&li))) {
6208b3a7 8092 redisClient *slave = ln->value;
ed9b544e 8093
6208b3a7 8094 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
8095 freeClient(slave);
8096 }
8097 }
8098 }
ed9b544e 8099}
8100
8101static int syncWithMaster(void) {
d0ccebcf 8102 char buf[1024], tmpfile[256], authcmd[1024];
18e61fa2 8103 long dumpsize;
ed9b544e 8104 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
8c5abee8 8105 int dfd, maxtries = 5;
ed9b544e 8106
8107 if (fd == -1) {
8108 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
8109 strerror(errno));
8110 return REDIS_ERR;
8111 }
d0ccebcf 8112
8113 /* AUTH with the master if required. */
8114 if(server.masterauth) {
8115 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
8116 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
8117 close(fd);
8118 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
8119 strerror(errno));
8120 return REDIS_ERR;
8121 }
8122 /* Read the AUTH result. */
8123 if (syncReadLine(fd,buf,1024,3600) == -1) {
8124 close(fd);
8125 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
8126 strerror(errno));
8127 return REDIS_ERR;
8128 }
8129 if (buf[0] != '+') {
8130 close(fd);
8131 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
8132 return REDIS_ERR;
8133 }
8134 }
8135
ed9b544e 8136 /* Issue the SYNC command */
8137 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
8138 close(fd);
8139 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
8140 strerror(errno));
8141 return REDIS_ERR;
8142 }
8143 /* Read the bulk write count */
8c4d91fc 8144 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 8145 close(fd);
8146 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
8147 strerror(errno));
8148 return REDIS_ERR;
8149 }
4aa701c1 8150 if (buf[0] != '$') {
8151 close(fd);
8152 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
8153 return REDIS_ERR;
8154 }
18e61fa2 8155 dumpsize = strtol(buf+1,NULL,10);
8156 redisLog(REDIS_NOTICE,"Receiving %ld bytes data dump from MASTER",dumpsize);
ed9b544e 8157 /* Read the bulk write data on a temp file */
8c5abee8 8158 while(maxtries--) {
8159 snprintf(tmpfile,256,
8160 "temp-%d.%ld.rdb",(int)time(NULL),(long int)getpid());
8161 dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644);
8162 if (dfd != -1) break;
5de9ad7c 8163 sleep(1);
8c5abee8 8164 }
ed9b544e 8165 if (dfd == -1) {
8166 close(fd);
8167 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
8168 return REDIS_ERR;
8169 }
8170 while(dumpsize) {
8171 int nread, nwritten;
8172
8173 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
8174 if (nread == -1) {
8175 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
8176 strerror(errno));
8177 close(fd);
8178 close(dfd);
8179 return REDIS_ERR;
8180 }
8181 nwritten = write(dfd,buf,nread);
8182 if (nwritten == -1) {
8183 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
8184 close(fd);
8185 close(dfd);
8186 return REDIS_ERR;
8187 }
8188 dumpsize -= nread;
8189 }
8190 close(dfd);
8191 if (rename(tmpfile,server.dbfilename) == -1) {
8192 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
8193 unlink(tmpfile);
8194 close(fd);
8195 return REDIS_ERR;
8196 }
8197 emptyDb();
f78fd11b 8198 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 8199 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
8200 close(fd);
8201 return REDIS_ERR;
8202 }
8203 server.master = createClient(fd);
8204 server.master->flags |= REDIS_MASTER;
179b3952 8205 server.master->authenticated = 1;
ed9b544e 8206 server.replstate = REDIS_REPL_CONNECTED;
8207 return REDIS_OK;
8208}
8209
321b0e13 8210static void slaveofCommand(redisClient *c) {
8211 if (!strcasecmp(c->argv[1]->ptr,"no") &&
8212 !strcasecmp(c->argv[2]->ptr,"one")) {
8213 if (server.masterhost) {
8214 sdsfree(server.masterhost);
8215 server.masterhost = NULL;
8216 if (server.master) freeClient(server.master);
8217 server.replstate = REDIS_REPL_NONE;
8218 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
8219 }
8220 } else {
8221 sdsfree(server.masterhost);
8222 server.masterhost = sdsdup(c->argv[1]->ptr);
8223 server.masterport = atoi(c->argv[2]->ptr);
8224 if (server.master) freeClient(server.master);
8225 server.replstate = REDIS_REPL_CONNECT;
8226 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
8227 server.masterhost, server.masterport);
8228 }
8229 addReply(c,shared.ok);
8230}
8231
3fd78bcd 8232/* ============================ Maxmemory directive ======================== */
8233
a5819310 8234/* Try to free one object form the pre-allocated objects free list.
8235 * This is useful under low mem conditions as by default we take 1 million
8236 * free objects allocated. On success REDIS_OK is returned, otherwise
8237 * REDIS_ERR. */
8238static int tryFreeOneObjectFromFreelist(void) {
f870935d 8239 robj *o;
8240
a5819310 8241 if (server.vm_enabled) pthread_mutex_lock(&server.obj_freelist_mutex);
8242 if (listLength(server.objfreelist)) {
8243 listNode *head = listFirst(server.objfreelist);
8244 o = listNodeValue(head);
8245 listDelNode(server.objfreelist,head);
8246 if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
8247 zfree(o);
8248 return REDIS_OK;
8249 } else {
8250 if (server.vm_enabled) pthread_mutex_unlock(&server.obj_freelist_mutex);
8251 return REDIS_ERR;
8252 }
f870935d 8253}
8254
3fd78bcd 8255/* This function gets called when 'maxmemory' is set on the config file to limit
8256 * the max memory used by the server, and we are out of memory.
8257 * This function will try to, in order:
8258 *
8259 * - Free objects from the free list
8260 * - Try to remove keys with an EXPIRE set
8261 *
8262 * It is not possible to free enough memory to reach used-memory < maxmemory
8263 * the server will start refusing commands that will enlarge even more the
8264 * memory usage.
8265 */
8266static void freeMemoryIfNeeded(void) {
8267 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
a5819310 8268 int j, k, freed = 0;
8269
8270 if (tryFreeOneObjectFromFreelist() == REDIS_OK) continue;
8271 for (j = 0; j < server.dbnum; j++) {
8272 int minttl = -1;
8273 robj *minkey = NULL;
8274 struct dictEntry *de;
8275
8276 if (dictSize(server.db[j].expires)) {
8277 freed = 1;
8278 /* From a sample of three keys drop the one nearest to
8279 * the natural expire */
8280 for (k = 0; k < 3; k++) {
8281 time_t t;
8282
8283 de = dictGetRandomKey(server.db[j].expires);
8284 t = (time_t) dictGetEntryVal(de);
8285 if (minttl == -1 || t < minttl) {
8286 minkey = dictGetEntryKey(de);
8287 minttl = t;
3fd78bcd 8288 }
3fd78bcd 8289 }
09241813 8290 dbDelete(server.db+j,minkey);
3fd78bcd 8291 }
3fd78bcd 8292 }
a5819310 8293 if (!freed) return; /* nothing to free... */
3fd78bcd 8294 }
8295}
8296
f80dff62 8297/* ============================== Append Only file ========================== */
8298
560db612 8299/* Called when the user switches from "appendonly yes" to "appendonly no"
8300 * at runtime using the CONFIG command. */
8301static void stopAppendOnly(void) {
8302 flushAppendOnlyFile();
8303 aof_fsync(server.appendfd);
8304 close(server.appendfd);
8305
8306 server.appendfd = -1;
8307 server.appendseldb = -1;
8308 server.appendonly = 0;
8309 /* rewrite operation in progress? kill it, wait child exit */
8310 if (server.bgsavechildpid != -1) {
8311 int statloc;
8312
8313 if (kill(server.bgsavechildpid,SIGKILL) != -1)
8314 wait3(&statloc,0,NULL);
8315 /* reset the buffer accumulating changes while the child saves */
8316 sdsfree(server.bgrewritebuf);
8317 server.bgrewritebuf = sdsempty();
8318 server.bgsavechildpid = -1;
8319 }
8320}
8321
8322/* Called when the user switches from "appendonly no" to "appendonly yes"
8323 * at runtime using the CONFIG command. */
8324static int startAppendOnly(void) {
8325 server.appendonly = 1;
8326 server.lastfsync = time(NULL);
8327 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
8328 if (server.appendfd == -1) {
8329 redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, but I can't open the AOF file: %s",strerror(errno));
8330 return REDIS_ERR;
8331 }
8332 if (rewriteAppendOnlyFileBackground() == REDIS_ERR) {
8333 server.appendonly = 0;
8334 close(server.appendfd);
8335 redisLog(REDIS_WARNING,"Used tried to switch on AOF via CONFIG, I can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.",strerror(errno));
8336 return REDIS_ERR;
8337 }
8338 return REDIS_OK;
8339}
8340
28ed1f33 8341/* Write the append only file buffer on disk.
8342 *
8343 * Since we are required to write the AOF before replying to the client,
8344 * and the only way the client socket can get a write is entering when the
8345 * the event loop, we accumulate all the AOF writes in a memory
8346 * buffer and write it on disk using this function just before entering
8347 * the event loop again. */
8348static void flushAppendOnlyFile(void) {
8349 time_t now;
8350 ssize_t nwritten;
8351
8352 if (sdslen(server.aofbuf) == 0) return;
8353
8354 /* We want to perform a single write. This should be guaranteed atomic
8355 * at least if the filesystem we are writing is a real physical one.
8356 * While this will save us against the server being killed I don't think
8357 * there is much to do about the whole server stopping for power problems
8358 * or alike */
8359 nwritten = write(server.appendfd,server.aofbuf,sdslen(server.aofbuf));
8360 if (nwritten != (signed)sdslen(server.aofbuf)) {
8361 /* Ooops, we are in troubles. The best thing to do for now is
8362 * aborting instead of giving the illusion that everything is
8363 * working as expected. */
8364 if (nwritten == -1) {
8365 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
8366 } else {
8367 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
8368 }
8369 exit(1);
8370 }
8371 sdsfree(server.aofbuf);
8372 server.aofbuf = sdsempty();
8373
38db9171 8374 /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have
8375 * childs performing heavy I/O on disk. */
8376 if (server.no_appendfsync_on_rewrite &&
8377 (server.bgrewritechildpid != -1 || server.bgsavechildpid != -1))
8378 return;
28ed1f33 8379 /* Fsync if needed */
8380 now = time(NULL);
8381 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
8382 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
8383 now-server.lastfsync > 1))
8384 {
8385 /* aof_fsync is defined as fdatasync() for Linux in order to avoid
8386 * flushing metadata. */
8387 aof_fsync(server.appendfd); /* Let's try to get this data on the disk */
8388 server.lastfsync = now;
8389 }
8390}
8391
9376e434
PN
8392static sds catAppendOnlyGenericCommand(sds buf, int argc, robj **argv) {
8393 int j;
8394 buf = sdscatprintf(buf,"*%d\r\n",argc);
8395 for (j = 0; j < argc; j++) {
8396 robj *o = getDecodedObject(argv[j]);
8397 buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
8398 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
8399 buf = sdscatlen(buf,"\r\n",2);
8400 decrRefCount(o);
8401 }
8402 return buf;
8403}
8404
8405static sds catAppendOnlyExpireAtCommand(sds buf, robj *key, robj *seconds) {
8406 int argc = 3;
8407 long when;
8408 robj *argv[3];
8409
8410 /* Make sure we can use strtol */
8411 seconds = getDecodedObject(seconds);
8412 when = time(NULL)+strtol(seconds->ptr,NULL,10);
8413 decrRefCount(seconds);
8414
8415 argv[0] = createStringObject("EXPIREAT",8);
8416 argv[1] = key;
8417 argv[2] = createObject(REDIS_STRING,
8418 sdscatprintf(sdsempty(),"%ld",when));
8419 buf = catAppendOnlyGenericCommand(buf, argc, argv);
8420 decrRefCount(argv[0]);
8421 decrRefCount(argv[2]);
8422 return buf;
8423}
8424
f80dff62 8425static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
8426 sds buf = sdsempty();
f80dff62 8427 robj *tmpargv[3];
8428
8429 /* The DB this command was targetting is not the same as the last command
8430 * we appendend. To issue a SELECT command is needed. */
8431 if (dictid != server.appendseldb) {
8432 char seldb[64];
8433
8434 snprintf(seldb,sizeof(seldb),"%d",dictid);
682ac724 8435 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
83c6a618 8436 (unsigned long)strlen(seldb),seldb);
f80dff62 8437 server.appendseldb = dictid;
8438 }
8439
f80dff62 8440 if (cmd->proc == expireCommand) {
9376e434
PN
8441 /* Translate EXPIRE into EXPIREAT */
8442 buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
8443 } else if (cmd->proc == setexCommand) {
8444 /* Translate SETEX to SET and EXPIREAT */
8445 tmpargv[0] = createStringObject("SET",3);
f80dff62 8446 tmpargv[1] = argv[1];
9376e434
PN
8447 tmpargv[2] = argv[3];
8448 buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
8449 decrRefCount(tmpargv[0]);
8450 buf = catAppendOnlyExpireAtCommand(buf,argv[1],argv[2]);
8451 } else {
8452 buf = catAppendOnlyGenericCommand(buf,argc,argv);
f80dff62 8453 }
8454
28ed1f33 8455 /* Append to the AOF buffer. This will be flushed on disk just before
8456 * of re-entering the event loop, so before the client will get a
8457 * positive reply about the operation performed. */
8458 server.aofbuf = sdscatlen(server.aofbuf,buf,sdslen(buf));
8459
85a83172 8460 /* If a background append only file rewriting is in progress we want to
8461 * accumulate the differences between the child DB and the current one
8462 * in a buffer, so that when the child process will do its work we
8463 * can append the differences to the new append only file. */
8464 if (server.bgrewritechildpid != -1)
8465 server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
8466
8467 sdsfree(buf);
f80dff62 8468}
8469
8470/* In Redis commands are always executed in the context of a client, so in
8471 * order to load the append only file we need to create a fake client. */
8472static struct redisClient *createFakeClient(void) {
8473 struct redisClient *c = zmalloc(sizeof(*c));
8474
8475 selectDb(c,0);
8476 c->fd = -1;
8477 c->querybuf = sdsempty();
8478 c->argc = 0;
8479 c->argv = NULL;
8480 c->flags = 0;
9387d17d 8481 /* We set the fake client as a slave waiting for the synchronization
8482 * so that Redis will not try to send replies to this client. */
8483 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
f80dff62 8484 c->reply = listCreate();
8485 listSetFreeMethod(c->reply,decrRefCount);
8486 listSetDupMethod(c->reply,dupClientReplyValue);
4132ad8d 8487 initClientMultiState(c);
f80dff62 8488 return c;
8489}
8490
8491static void freeFakeClient(struct redisClient *c) {
8492 sdsfree(c->querybuf);
8493 listRelease(c->reply);
4132ad8d 8494 freeClientMultiState(c);
f80dff62 8495 zfree(c);
8496}
8497
8498/* Replay the append log file. On error REDIS_OK is returned. On non fatal
8499 * error (the append only file is zero-length) REDIS_ERR is returned. On
8500 * fatal error an error message is logged and the program exists. */
8501int loadAppendOnlyFile(char *filename) {
8502 struct redisClient *fakeClient;
8503 FILE *fp = fopen(filename,"r");
8504 struct redis_stat sb;
4132ad8d 8505 int appendonly = server.appendonly;
f80dff62 8506
8507 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
8508 return REDIS_ERR;
8509
8510 if (fp == NULL) {
8511 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
8512 exit(1);
8513 }
8514
4132ad8d
PN
8515 /* Temporarily disable AOF, to prevent EXEC from feeding a MULTI
8516 * to the same file we're about to read. */
8517 server.appendonly = 0;
8518
f80dff62 8519 fakeClient = createFakeClient();
8520 while(1) {
8521 int argc, j;
8522 unsigned long len;
8523 robj **argv;
8524 char buf[128];
8525 sds argsds;
8526 struct redisCommand *cmd;
a89b7013 8527 int force_swapout;
f80dff62 8528
8529 if (fgets(buf,sizeof(buf),fp) == NULL) {
8530 if (feof(fp))
8531 break;
8532 else
8533 goto readerr;
8534 }
8535 if (buf[0] != '*') goto fmterr;
8536 argc = atoi(buf+1);
8537 argv = zmalloc(sizeof(robj*)*argc);
8538 for (j = 0; j < argc; j++) {
8539 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
8540 if (buf[0] != '$') goto fmterr;
8541 len = strtol(buf+1,NULL,10);
8542 argsds = sdsnewlen(NULL,len);
0f151ef1 8543 if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
f80dff62 8544 argv[j] = createObject(REDIS_STRING,argsds);
8545 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
8546 }
8547
8548 /* Command lookup */
8549 cmd = lookupCommand(argv[0]->ptr);
8550 if (!cmd) {
8551 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
8552 exit(1);
8553 }
bdcb92f2 8554 /* Try object encoding */
f80dff62 8555 if (cmd->flags & REDIS_CMD_BULK)
05df7621 8556 argv[argc-1] = tryObjectEncoding(argv[argc-1]);
f80dff62 8557 /* Run the command in the context of a fake client */
8558 fakeClient->argc = argc;
8559 fakeClient->argv = argv;
8560 cmd->proc(fakeClient);
8561 /* Discard the reply objects list from the fake client */
8562 while(listLength(fakeClient->reply))
8563 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
8564 /* Clean up, ready for the next command */
8565 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
8566 zfree(argv);
b492cf00 8567 /* Handle swapping while loading big datasets when VM is on */
a89b7013 8568 force_swapout = 0;
8569 if ((zmalloc_used_memory() - server.vm_max_memory) > 1024*1024*32)
8570 force_swapout = 1;
8571
8572 if (server.vm_enabled && force_swapout) {
b492cf00 8573 while (zmalloc_used_memory() > server.vm_max_memory) {
a69a0c9c 8574 if (vmSwapOneObjectBlocking() == REDIS_ERR) break;
b492cf00 8575 }
8576 }
f80dff62 8577 }
4132ad8d
PN
8578
8579 /* This point can only be reached when EOF is reached without errors.
8580 * If the client is in the middle of a MULTI/EXEC, log error and quit. */
8581 if (fakeClient->flags & REDIS_MULTI) goto readerr;
8582
f80dff62 8583 fclose(fp);
8584 freeFakeClient(fakeClient);
4132ad8d 8585 server.appendonly = appendonly;
f80dff62 8586 return REDIS_OK;
8587
8588readerr:
8589 if (feof(fp)) {
8590 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
8591 } else {
8592 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
8593 }
8594 exit(1);
8595fmterr:
8596 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
8597 exit(1);
8598}
8599
9d65a1bb 8600/* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
9c8e3cee 8601static int fwriteBulkObject(FILE *fp, robj *obj) {
9d65a1bb 8602 char buf[128];
b9bc0eef 8603 int decrrc = 0;
8604
f2d9f50f 8605 /* Avoid the incr/decr ref count business if possible to help
8606 * copy-on-write (we are often in a child process when this function
8607 * is called).
8608 * Also makes sure that key objects don't get incrRefCount-ed when VM
8609 * is enabled */
8610 if (obj->encoding != REDIS_ENCODING_RAW) {
b9bc0eef 8611 obj = getDecodedObject(obj);
8612 decrrc = 1;
8613 }
9d65a1bb 8614 snprintf(buf,sizeof(buf),"$%ld\r\n",(long)sdslen(obj->ptr));
8615 if (fwrite(buf,strlen(buf),1,fp) == 0) goto err;
e96e4fbf 8616 if (sdslen(obj->ptr) && fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0)
8617 goto err;
9d65a1bb 8618 if (fwrite("\r\n",2,1,fp) == 0) goto err;
b9bc0eef 8619 if (decrrc) decrRefCount(obj);
9d65a1bb 8620 return 1;
8621err:
b9bc0eef 8622 if (decrrc) decrRefCount(obj);
9d65a1bb 8623 return 0;
8624}
8625
9c8e3cee 8626/* Write binary-safe string into a file in the bulkformat
8627 * $<count>\r\n<payload>\r\n */
8628static int fwriteBulkString(FILE *fp, char *s, unsigned long len) {
8629 char buf[128];
8630
8631 snprintf(buf,sizeof(buf),"$%ld\r\n",(unsigned long)len);
8632 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
8633 if (len && fwrite(s,len,1,fp) == 0) return 0;
8634 if (fwrite("\r\n",2,1,fp) == 0) return 0;
8635 return 1;
8636}
8637
9d65a1bb 8638/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
8639static int fwriteBulkDouble(FILE *fp, double d) {
8640 char buf[128], dbuf[128];
8641
8642 snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
8643 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
8644 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
8645 if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
8646 return 1;
8647}
8648
8649/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
8650static int fwriteBulkLong(FILE *fp, long l) {
8651 char buf[128], lbuf[128];
8652
8653 snprintf(lbuf,sizeof(lbuf),"%ld\r\n",l);
8654 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(lbuf)-2);
8655 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
8656 if (fwrite(lbuf,strlen(lbuf),1,fp) == 0) return 0;
8657 return 1;
8658}
8659
8660/* Write a sequence of commands able to fully rebuild the dataset into
8661 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
8662static int rewriteAppendOnlyFile(char *filename) {
8663 dictIterator *di = NULL;
8664 dictEntry *de;
8665 FILE *fp;
8666 char tmpfile[256];
8667 int j;
8668 time_t now = time(NULL);
8669
8670 /* Note that we have to use a different temp name here compared to the
8671 * one used by rewriteAppendOnlyFileBackground() function. */
8672 snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
8673 fp = fopen(tmpfile,"w");
8674 if (!fp) {
8675 redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
8676 return REDIS_ERR;
8677 }
8678 for (j = 0; j < server.dbnum; j++) {
8679 char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
8680 redisDb *db = server.db+j;
8681 dict *d = db->dict;
8682 if (dictSize(d) == 0) continue;
8683 di = dictGetIterator(d);
8684 if (!di) {
8685 fclose(fp);
8686 return REDIS_ERR;
8687 }
8688
8689 /* SELECT the new DB */
8690 if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
85a83172 8691 if (fwriteBulkLong(fp,j) == 0) goto werr;
9d65a1bb 8692
8693 /* Iterate this DB writing every entry */
8694 while((de = dictNext(di)) != NULL) {
09241813 8695 sds keystr = dictGetEntryKey(de);
8696 robj key, *o;
e7546c63 8697 time_t expiretime;
8698 int swapped;
8699
09241813 8700 keystr = dictGetEntryKey(de);
560db612 8701 o = dictGetEntryVal(de);
09241813 8702 initStaticStringObject(key,keystr);
b9bc0eef 8703 /* If the value for this key is swapped, load a preview in memory.
8704 * We use a "swapped" flag to remember if we need to free the
8705 * value object instead to just increment the ref count anyway
8706 * in order to avoid copy-on-write of pages if we are forked() */
560db612 8707 if (!server.vm_enabled || o->storage == REDIS_VM_MEMORY ||
8708 o->storage == REDIS_VM_SWAPPING) {
e7546c63 8709 swapped = 0;
8710 } else {
560db612 8711 o = vmPreviewObject(o);
e7546c63 8712 swapped = 1;
8713 }
09241813 8714 expiretime = getExpire(db,&key);
9d65a1bb 8715
8716 /* Save the key and associated value */
9d65a1bb 8717 if (o->type == REDIS_STRING) {
8718 /* Emit a SET command */
8719 char cmd[]="*3\r\n$3\r\nSET\r\n";
8720 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
8721 /* Key and value */
09241813 8722 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9c8e3cee 8723 if (fwriteBulkObject(fp,o) == 0) goto werr;
9d65a1bb 8724 } else if (o->type == REDIS_LIST) {
8725 /* Emit the RPUSHes needed to rebuild the list */
8726 list *list = o->ptr;
8727 listNode *ln;
c7df85a4 8728 listIter li;
9d65a1bb 8729
c7df85a4 8730 listRewind(list,&li);
8731 while((ln = listNext(&li))) {
9d65a1bb 8732 char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
8733 robj *eleobj = listNodeValue(ln);
8734
8735 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
09241813 8736 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9c8e3cee 8737 if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
9d65a1bb 8738 }
8739 } else if (o->type == REDIS_SET) {
8740 /* Emit the SADDs needed to rebuild the set */
8741 dict *set = o->ptr;
8742 dictIterator *di = dictGetIterator(set);
8743 dictEntry *de;
8744
8745 while((de = dictNext(di)) != NULL) {
8746 char cmd[]="*3\r\n$4\r\nSADD\r\n";
8747 robj *eleobj = dictGetEntryKey(de);
8748
8749 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
09241813 8750 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9c8e3cee 8751 if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
9d65a1bb 8752 }
8753 dictReleaseIterator(di);
8754 } else if (o->type == REDIS_ZSET) {
8755 /* Emit the ZADDs needed to rebuild the sorted set */
8756 zset *zs = o->ptr;
8757 dictIterator *di = dictGetIterator(zs->dict);
8758 dictEntry *de;
8759
8760 while((de = dictNext(di)) != NULL) {
8761 char cmd[]="*4\r\n$4\r\nZADD\r\n";
8762 robj *eleobj = dictGetEntryKey(de);
8763 double *score = dictGetEntryVal(de);
8764
8765 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
09241813 8766 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9d65a1bb 8767 if (fwriteBulkDouble(fp,*score) == 0) goto werr;
9c8e3cee 8768 if (fwriteBulkObject(fp,eleobj) == 0) goto werr;
9d65a1bb 8769 }
8770 dictReleaseIterator(di);
9c8e3cee 8771 } else if (o->type == REDIS_HASH) {
8772 char cmd[]="*4\r\n$4\r\nHSET\r\n";
8773
8774 /* Emit the HSETs needed to rebuild the hash */
8775 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
8776 unsigned char *p = zipmapRewind(o->ptr);
8777 unsigned char *field, *val;
8778 unsigned int flen, vlen;
8779
8780 while((p = zipmapNext(p,&field,&flen,&val,&vlen)) != NULL) {
8781 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
09241813 8782 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9c8e3cee 8783 if (fwriteBulkString(fp,(char*)field,flen) == -1)
8784 return -1;
8785 if (fwriteBulkString(fp,(char*)val,vlen) == -1)
8786 return -1;
8787 }
8788 } else {
8789 dictIterator *di = dictGetIterator(o->ptr);
8790 dictEntry *de;
8791
8792 while((de = dictNext(di)) != NULL) {
8793 robj *field = dictGetEntryKey(de);
8794 robj *val = dictGetEntryVal(de);
8795
8796 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
09241813 8797 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9c8e3cee 8798 if (fwriteBulkObject(fp,field) == -1) return -1;
8799 if (fwriteBulkObject(fp,val) == -1) return -1;
8800 }
8801 dictReleaseIterator(di);
8802 }
9d65a1bb 8803 } else {
f83c6cb5 8804 redisPanic("Unknown object type");
9d65a1bb 8805 }
8806 /* Save the expire time */
8807 if (expiretime != -1) {
e96e4fbf 8808 char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
9d65a1bb 8809 /* If this key is already expired skip it */
8810 if (expiretime < now) continue;
8811 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
09241813 8812 if (fwriteBulkObject(fp,&key) == 0) goto werr;
9d65a1bb 8813 if (fwriteBulkLong(fp,expiretime) == 0) goto werr;
8814 }
b9bc0eef 8815 if (swapped) decrRefCount(o);
9d65a1bb 8816 }
8817 dictReleaseIterator(di);
8818 }
8819
8820 /* Make sure data will not remain on the OS's output buffers */
8821 fflush(fp);
b0bd87f6 8822 aof_fsync(fileno(fp));
9d65a1bb 8823 fclose(fp);
e0a62c7f 8824
9d65a1bb 8825 /* Use RENAME to make sure the DB file is changed atomically only
8826 * if the generate DB file is ok. */
8827 if (rename(tmpfile,filename) == -1) {
8828 redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
8829 unlink(tmpfile);
8830 return REDIS_ERR;
8831 }
8832 redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
8833 return REDIS_OK;
8834
8835werr:
8836 fclose(fp);
8837 unlink(tmpfile);
e96e4fbf 8838 redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
9d65a1bb 8839 if (di) dictReleaseIterator(di);
8840 return REDIS_ERR;
8841}
8842
8843/* This is how rewriting of the append only file in background works:
8844 *
8845 * 1) The user calls BGREWRITEAOF
8846 * 2) Redis calls this function, that forks():
8847 * 2a) the child rewrite the append only file in a temp file.
8848 * 2b) the parent accumulates differences in server.bgrewritebuf.
8849 * 3) When the child finished '2a' exists.
8850 * 4) The parent will trap the exit code, if it's OK, will append the
8851 * data accumulated into server.bgrewritebuf into the temp file, and
8852 * finally will rename(2) the temp file in the actual file name.
8853 * The the new file is reopened as the new append only file. Profit!
8854 */
8855static int rewriteAppendOnlyFileBackground(void) {
8856 pid_t childpid;
8857
8858 if (server.bgrewritechildpid != -1) return REDIS_ERR;
054e426d 8859 if (server.vm_enabled) waitEmptyIOJobsQueue();
9d65a1bb 8860 if ((childpid = fork()) == 0) {
8861 /* Child */
8862 char tmpfile[256];
9d65a1bb 8863
054e426d 8864 if (server.vm_enabled) vmReopenSwapFile();
8865 close(server.fd);
9d65a1bb 8866 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
8867 if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
478c2c6f 8868 _exit(0);
9d65a1bb 8869 } else {
478c2c6f 8870 _exit(1);
9d65a1bb 8871 }
8872 } else {
8873 /* Parent */
8874 if (childpid == -1) {
8875 redisLog(REDIS_WARNING,
8876 "Can't rewrite append only file in background: fork: %s",
8877 strerror(errno));
8878 return REDIS_ERR;
8879 }
8880 redisLog(REDIS_NOTICE,
8881 "Background append only file rewriting started by pid %d",childpid);
8882 server.bgrewritechildpid = childpid;
884d4b39 8883 updateDictResizePolicy();
85a83172 8884 /* We set appendseldb to -1 in order to force the next call to the
8885 * feedAppendOnlyFile() to issue a SELECT command, so the differences
8886 * accumulated by the parent into server.bgrewritebuf will start
8887 * with a SELECT statement and it will be safe to merge. */
8888 server.appendseldb = -1;
9d65a1bb 8889 return REDIS_OK;
8890 }
8891 return REDIS_OK; /* unreached */
8892}
8893
8894static void bgrewriteaofCommand(redisClient *c) {
8895 if (server.bgrewritechildpid != -1) {
8896 addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
8897 return;
8898 }
8899 if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
49b99ab4 8900 char *status = "+Background append only file rewriting started\r\n";
8901 addReplySds(c,sdsnew(status));
9d65a1bb 8902 } else {
8903 addReply(c,shared.err);
8904 }
8905}
8906
8907static void aofRemoveTempFile(pid_t childpid) {
8908 char tmpfile[256];
8909
8910 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
8911 unlink(tmpfile);
8912}
8913
996cb5f7 8914/* Virtual Memory is composed mainly of two subsystems:
8915 * - Blocking Virutal Memory
8916 * - Threaded Virtual Memory I/O
8917 * The two parts are not fully decoupled, but functions are split among two
8918 * different sections of the source code (delimited by comments) in order to
8919 * make more clear what functionality is about the blocking VM and what about
8920 * the threaded (not blocking) VM.
8921 *
8922 * Redis VM design:
8923 *
8924 * Redis VM is a blocking VM (one that blocks reading swapped values from
8925 * disk into memory when a value swapped out is needed in memory) that is made
8926 * unblocking by trying to examine the command argument vector in order to
8927 * load in background values that will likely be needed in order to exec
8928 * the command. The command is executed only once all the relevant keys
8929 * are loaded into memory.
8930 *
8931 * This basically is almost as simple of a blocking VM, but almost as parallel
8932 * as a fully non-blocking VM.
8933 */
8934
560db612 8935/* =================== Virtual Memory - Blocking Side ====================== */
2e5eb04e 8936
560db612 8937/* Create a VM pointer object. This kind of objects are used in place of
8938 * values in the key -> value hash table, for swapped out objects. */
8939static vmpointer *createVmPointer(int vtype) {
8940 vmpointer *vp = zmalloc(sizeof(vmpointer));
2e5eb04e 8941
560db612 8942 vp->type = REDIS_VMPOINTER;
8943 vp->storage = REDIS_VM_SWAPPED;
8944 vp->vtype = vtype;
8945 return vp;
2e5eb04e 8946}
8947
75680a3c 8948static void vmInit(void) {
8949 off_t totsize;
996cb5f7 8950 int pipefds[2];
bcaa7a4f 8951 size_t stacksize;
8b5bb414 8952 struct flock fl;
75680a3c 8953
4ad37480 8954 if (server.vm_max_threads != 0)
8955 zmalloc_enable_thread_safeness(); /* we need thread safe zmalloc() */
8956
054e426d 8957 redisLog(REDIS_NOTICE,"Using '%s' as swap file",server.vm_swap_file);
8b5bb414 8958 /* Try to open the old swap file, otherwise create it */
6fa987e3 8959 if ((server.vm_fp = fopen(server.vm_swap_file,"r+b")) == NULL) {
8960 server.vm_fp = fopen(server.vm_swap_file,"w+b");
8961 }
75680a3c 8962 if (server.vm_fp == NULL) {
6fa987e3 8963 redisLog(REDIS_WARNING,
8b5bb414 8964 "Can't open the swap file: %s. Exiting.",
6fa987e3 8965 strerror(errno));
75680a3c 8966 exit(1);
8967 }
8968 server.vm_fd = fileno(server.vm_fp);
8b5bb414 8969 /* Lock the swap file for writing, this is useful in order to avoid
8970 * another instance to use the same swap file for a config error. */
8971 fl.l_type = F_WRLCK;
8972 fl.l_whence = SEEK_SET;
8973 fl.l_start = fl.l_len = 0;
8974 if (fcntl(server.vm_fd,F_SETLK,&fl) == -1) {
8975 redisLog(REDIS_WARNING,
8976 "Can't lock the swap file at '%s': %s. Make sure it is not used by another Redis instance.", server.vm_swap_file, strerror(errno));
8977 exit(1);
8978 }
8979 /* Initialize */
75680a3c 8980 server.vm_next_page = 0;
8981 server.vm_near_pages = 0;
7d98e08c 8982 server.vm_stats_used_pages = 0;
8983 server.vm_stats_swapped_objects = 0;
8984 server.vm_stats_swapouts = 0;
8985 server.vm_stats_swapins = 0;
75680a3c 8986 totsize = server.vm_pages*server.vm_page_size;
8987 redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
8988 if (ftruncate(server.vm_fd,totsize) == -1) {
8989 redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
8990 strerror(errno));
8991 exit(1);
8992 } else {
8993 redisLog(REDIS_NOTICE,"Swap file allocated with success");
8994 }
7d30035d 8995 server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
f870935d 8996 redisLog(REDIS_VERBOSE,"Allocated %lld bytes page table for %lld pages",
4ef8de8a 8997 (long long) (server.vm_pages+7)/8, server.vm_pages);
7d30035d 8998 memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
92f8e882 8999
996cb5f7 9000 /* Initialize threaded I/O (used by Virtual Memory) */
9001 server.io_newjobs = listCreate();
9002 server.io_processing = listCreate();
9003 server.io_processed = listCreate();
d5d55fc3 9004 server.io_ready_clients = listCreate();
92f8e882 9005 pthread_mutex_init(&server.io_mutex,NULL);
a5819310 9006 pthread_mutex_init(&server.obj_freelist_mutex,NULL);
9007 pthread_mutex_init(&server.io_swapfile_mutex,NULL);
92f8e882 9008 server.io_active_threads = 0;
996cb5f7 9009 if (pipe(pipefds) == -1) {
9010 redisLog(REDIS_WARNING,"Unable to intialized VM: pipe(2): %s. Exiting."
9011 ,strerror(errno));
9012 exit(1);
9013 }
9014 server.io_ready_pipe_read = pipefds[0];
9015 server.io_ready_pipe_write = pipefds[1];
9016 redisAssert(anetNonBlock(NULL,server.io_ready_pipe_read) != ANET_ERR);
bcaa7a4f 9017 /* LZF requires a lot of stack */
9018 pthread_attr_init(&server.io_threads_attr);
9019 pthread_attr_getstacksize(&server.io_threads_attr, &stacksize);
9020 while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
9021 pthread_attr_setstacksize(&server.io_threads_attr, stacksize);
b9bc0eef 9022 /* Listen for events in the threaded I/O pipe */
9023 if (aeCreateFileEvent(server.el, server.io_ready_pipe_read, AE_READABLE,
9024 vmThreadedIOCompletedJob, NULL) == AE_ERR)
9025 oom("creating file event");
75680a3c 9026}
9027
06224fec 9028/* Mark the page as used */
9029static void vmMarkPageUsed(off_t page) {
9030 off_t byte = page/8;
9031 int bit = page&7;
970e10bb 9032 redisAssert(vmFreePage(page) == 1);
06224fec 9033 server.vm_bitmap[byte] |= 1<<bit;
9034}
9035
9036/* Mark N contiguous pages as used, with 'page' being the first. */
9037static void vmMarkPagesUsed(off_t page, off_t count) {
9038 off_t j;
9039
9040 for (j = 0; j < count; j++)
7d30035d 9041 vmMarkPageUsed(page+j);
7d98e08c 9042 server.vm_stats_used_pages += count;
7c775e09 9043 redisLog(REDIS_DEBUG,"Mark USED pages: %lld pages at %lld\n",
9044 (long long)count, (long long)page);
06224fec 9045}
9046
9047/* Mark the page as free */
9048static void vmMarkPageFree(off_t page) {
9049 off_t byte = page/8;
9050 int bit = page&7;
970e10bb 9051 redisAssert(vmFreePage(page) == 0);
06224fec 9052 server.vm_bitmap[byte] &= ~(1<<bit);
9053}
9054
9055/* Mark N contiguous pages as free, with 'page' being the first. */
9056static void vmMarkPagesFree(off_t page, off_t count) {
9057 off_t j;
9058
9059 for (j = 0; j < count; j++)
7d30035d 9060 vmMarkPageFree(page+j);
7d98e08c 9061 server.vm_stats_used_pages -= count;
7c775e09 9062 redisLog(REDIS_DEBUG,"Mark FREE pages: %lld pages at %lld\n",
9063 (long long)count, (long long)page);
06224fec 9064}
9065
9066/* Test if the page is free */
9067static int vmFreePage(off_t page) {
9068 off_t byte = page/8;
9069 int bit = page&7;
7d30035d 9070 return (server.vm_bitmap[byte] & (1<<bit)) == 0;
06224fec 9071}
9072
9073/* Find N contiguous free pages storing the first page of the cluster in *first.
e0a62c7f 9074 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
3a66edc7 9075 * REDIS_ERR is returned.
06224fec 9076 *
9077 * This function uses a simple algorithm: we try to allocate
9078 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
9079 * again from the start of the swap file searching for free spaces.
9080 *
9081 * If it looks pretty clear that there are no free pages near our offset
9082 * we try to find less populated places doing a forward jump of
9083 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
9084 * without hurry, and then we jump again and so forth...
e0a62c7f 9085 *
06224fec 9086 * This function can be improved using a free list to avoid to guess
9087 * too much, since we could collect data about freed pages.
9088 *
9089 * note: I implemented this function just after watching an episode of
9090 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
9091 */
c7df85a4 9092static int vmFindContiguousPages(off_t *first, off_t n) {
06224fec 9093 off_t base, offset = 0, since_jump = 0, numfree = 0;
9094
9095 if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
9096 server.vm_near_pages = 0;
9097 server.vm_next_page = 0;
9098 }
9099 server.vm_near_pages++; /* Yet another try for pages near to the old ones */
9100 base = server.vm_next_page;
9101
9102 while(offset < server.vm_pages) {
9103 off_t this = base+offset;
9104
9105 /* If we overflow, restart from page zero */
9106 if (this >= server.vm_pages) {
9107 this -= server.vm_pages;
9108 if (this == 0) {
9109 /* Just overflowed, what we found on tail is no longer
9110 * interesting, as it's no longer contiguous. */
9111 numfree = 0;
9112 }
9113 }
9114 if (vmFreePage(this)) {
9115 /* This is a free page */
9116 numfree++;
9117 /* Already got N free pages? Return to the caller, with success */
9118 if (numfree == n) {
7d30035d 9119 *first = this-(n-1);
9120 server.vm_next_page = this+1;
7c775e09 9121 redisLog(REDIS_DEBUG, "FOUND CONTIGUOUS PAGES: %lld pages at %lld\n", (long long) n, (long long) *first);
3a66edc7 9122 return REDIS_OK;
06224fec 9123 }
9124 } else {
9125 /* The current one is not a free page */
9126 numfree = 0;
9127 }
9128
9129 /* Fast-forward if the current page is not free and we already
9130 * searched enough near this place. */
9131 since_jump++;
9132 if (!numfree && since_jump >= REDIS_VM_MAX_RANDOM_JUMP/4) {
9133 offset += random() % REDIS_VM_MAX_RANDOM_JUMP;
9134 since_jump = 0;
9135 /* Note that even if we rewind after the jump, we are don't need
9136 * to make sure numfree is set to zero as we only jump *if* it
9137 * is set to zero. */
9138 } else {
9139 /* Otherwise just check the next page */
9140 offset++;
9141 }
9142 }
3a66edc7 9143 return REDIS_ERR;
9144}
9145
a5819310 9146/* Write the specified object at the specified page of the swap file */
9147static int vmWriteObjectOnSwap(robj *o, off_t page) {
9148 if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
9149 if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
9150 if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
9151 redisLog(REDIS_WARNING,
9ebed7cf 9152 "Critical VM problem in vmWriteObjectOnSwap(): can't seek: %s",
a5819310 9153 strerror(errno));
9154 return REDIS_ERR;
9155 }
9156 rdbSaveObject(server.vm_fp,o);
ba76a8f9 9157 fflush(server.vm_fp);
a5819310 9158 if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
9159 return REDIS_OK;
9160}
9161
a4798f73 9162/* Transfers the 'val' object to disk. Store all the information
9163 * a 'vmpointer' object containing all the information needed to load the
9164 * object back later is returned.
9165 *
3a66edc7 9166 * If we can't find enough contiguous empty pages to swap the object on disk
a4798f73 9167 * NULL is returned. */
560db612 9168static vmpointer *vmSwapObjectBlocking(robj *val) {
b9bc0eef 9169 off_t pages = rdbSavedObjectPages(val,NULL);
3a66edc7 9170 off_t page;
560db612 9171 vmpointer *vp;
3a66edc7 9172
560db612 9173 assert(val->storage == REDIS_VM_MEMORY);
9174 assert(val->refcount == 1);
9175 if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return NULL;
9176 if (vmWriteObjectOnSwap(val,page) == REDIS_ERR) return NULL;
9177
9178 vp = createVmPointer(val->type);
9179 vp->page = page;
9180 vp->usedpages = pages;
3a66edc7 9181 decrRefCount(val); /* Deallocate the object from memory. */
9182 vmMarkPagesUsed(page,pages);
560db612 9183 redisLog(REDIS_DEBUG,"VM: object %p swapped out at %lld (%lld pages)",
9184 (void*) val,
7d30035d 9185 (unsigned long long) page, (unsigned long long) pages);
7d98e08c 9186 server.vm_stats_swapped_objects++;
9187 server.vm_stats_swapouts++;
560db612 9188 return vp;
3a66edc7 9189}
9190
a5819310 9191static robj *vmReadObjectFromSwap(off_t page, int type) {
9192 robj *o;
3a66edc7 9193
a5819310 9194 if (server.vm_enabled) pthread_mutex_lock(&server.io_swapfile_mutex);
9195 if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
3a66edc7 9196 redisLog(REDIS_WARNING,
d5d55fc3 9197 "Unrecoverable VM problem in vmReadObjectFromSwap(): can't seek: %s",
3a66edc7 9198 strerror(errno));
478c2c6f 9199 _exit(1);
3a66edc7 9200 }
a5819310 9201 o = rdbLoadObject(type,server.vm_fp);
9202 if (o == NULL) {
d5d55fc3 9203 redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmReadObjectFromSwap(): can't load object from swap file: %s", strerror(errno));
478c2c6f 9204 _exit(1);
3a66edc7 9205 }
a5819310 9206 if (server.vm_enabled) pthread_mutex_unlock(&server.io_swapfile_mutex);
9207 return o;
9208}
9209
560db612 9210/* Load the specified object from swap to memory.
a5819310 9211 * The newly allocated object is returned.
9212 *
9213 * If preview is true the unserialized object is returned to the caller but
560db612 9214 * the pages are not marked as freed, nor the vp object is freed. */
9215static robj *vmGenericLoadObject(vmpointer *vp, int preview) {
a5819310 9216 robj *val;
9217
560db612 9218 redisAssert(vp->type == REDIS_VMPOINTER &&
9219 (vp->storage == REDIS_VM_SWAPPED || vp->storage == REDIS_VM_LOADING));
9220 val = vmReadObjectFromSwap(vp->page,vp->vtype);
7e69548d 9221 if (!preview) {
560db612 9222 redisLog(REDIS_DEBUG, "VM: object %p loaded from disk", (void*)vp);
9223 vmMarkPagesFree(vp->page,vp->usedpages);
9224 zfree(vp);
7d98e08c 9225 server.vm_stats_swapped_objects--;
38aba9a1 9226 } else {
560db612 9227 redisLog(REDIS_DEBUG, "VM: object %p previewed from disk", (void*)vp);
7e69548d 9228 }
7d98e08c 9229 server.vm_stats_swapins++;
3a66edc7 9230 return val;
06224fec 9231}
9232
560db612 9233/* Plain object loading, from swap to memory.
9234 *
9235 * 'o' is actually a redisVmPointer structure that will be freed by the call.
9236 * The return value is the loaded object. */
9237static robj *vmLoadObject(robj *o) {
996cb5f7 9238 /* If we are loading the object in background, stop it, we
9239 * need to load this object synchronously ASAP. */
560db612 9240 if (o->storage == REDIS_VM_LOADING)
9241 vmCancelThreadedIOJob(o);
9242 return vmGenericLoadObject((vmpointer*)o,0);
7e69548d 9243}
9244
9245/* Just load the value on disk, without to modify the key.
9246 * This is useful when we want to perform some operation on the value
9247 * without to really bring it from swap to memory, like while saving the
9248 * dataset or rewriting the append only log. */
560db612 9249static robj *vmPreviewObject(robj *o) {
9250 return vmGenericLoadObject((vmpointer*)o,1);
7e69548d 9251}
9252
4ef8de8a 9253/* How a good candidate is this object for swapping?
9254 * The better candidate it is, the greater the returned value.
9255 *
9256 * Currently we try to perform a fast estimation of the object size in
9257 * memory, and combine it with aging informations.
9258 *
9259 * Basically swappability = idle-time * log(estimated size)
9260 *
9261 * Bigger objects are preferred over smaller objects, but not
9262 * proportionally, this is why we use the logarithm. This algorithm is
9263 * just a first try and will probably be tuned later. */
9264static double computeObjectSwappability(robj *o) {
560db612 9265 /* actual age can be >= minage, but not < minage. As we use wrapping
9266 * 21 bit clocks with minutes resolution for the LRU. */
9267 time_t minage = abs(server.lruclock - o->lru);
4ef8de8a 9268 long asize = 0;
9269 list *l;
9270 dict *d;
9271 struct dictEntry *de;
9272 int z;
9273
560db612 9274 if (minage <= 0) return 0;
4ef8de8a 9275 switch(o->type) {
9276 case REDIS_STRING:
9277 if (o->encoding != REDIS_ENCODING_RAW) {
9278 asize = sizeof(*o);
9279 } else {
9280 asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
9281 }
9282 break;
9283 case REDIS_LIST:
9284 l = o->ptr;
9285 listNode *ln = listFirst(l);
9286
9287 asize = sizeof(list);
9288 if (ln) {
9289 robj *ele = ln->value;
9290 long elesize;
9291
9292 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
560db612 9293 (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
4ef8de8a 9294 asize += (sizeof(listNode)+elesize)*listLength(l);
9295 }
9296 break;
9297 case REDIS_SET:
9298 case REDIS_ZSET:
9299 z = (o->type == REDIS_ZSET);
9300 d = z ? ((zset*)o->ptr)->dict : o->ptr;
9301
9302 asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
9303 if (z) asize += sizeof(zset)-sizeof(dict);
9304 if (dictSize(d)) {
9305 long elesize;
9306 robj *ele;
9307
9308 de = dictGetRandomKey(d);
9309 ele = dictGetEntryKey(de);
9310 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
560db612 9311 (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
4ef8de8a 9312 asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
9313 if (z) asize += sizeof(zskiplistNode)*dictSize(d);
9314 }
9315 break;
a97b9060 9316 case REDIS_HASH:
9317 if (o->encoding == REDIS_ENCODING_ZIPMAP) {
9318 unsigned char *p = zipmapRewind((unsigned char*)o->ptr);
9319 unsigned int len = zipmapLen((unsigned char*)o->ptr);
9320 unsigned int klen, vlen;
9321 unsigned char *key, *val;
9322
9323 if ((p = zipmapNext(p,&key,&klen,&val,&vlen)) == NULL) {
9324 klen = 0;
9325 vlen = 0;
9326 }
9327 asize = len*(klen+vlen+3);
9328 } else if (o->encoding == REDIS_ENCODING_HT) {
9329 d = o->ptr;
9330 asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
9331 if (dictSize(d)) {
9332 long elesize;
9333 robj *ele;
9334
9335 de = dictGetRandomKey(d);
9336 ele = dictGetEntryKey(de);
9337 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
560db612 9338 (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
a97b9060 9339 ele = dictGetEntryVal(de);
9340 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
560db612 9341 (sizeof(*o)+sdslen(ele->ptr)) : sizeof(*o);
a97b9060 9342 asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
9343 }
9344 }
9345 break;
4ef8de8a 9346 }
560db612 9347 return (double)minage*log(1+asize);
4ef8de8a 9348}
9349
9350/* Try to swap an object that's a good candidate for swapping.
9351 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
a69a0c9c 9352 * to swap any object at all.
9353 *
9354 * If 'usethreaded' is true, Redis will try to swap the object in background
9355 * using I/O threads. */
9356static int vmSwapOneObject(int usethreads) {
4ef8de8a 9357 int j, i;
9358 struct dictEntry *best = NULL;
9359 double best_swappability = 0;
b9bc0eef 9360 redisDb *best_db = NULL;
44262c58 9361 robj *val;
9362 sds key;
4ef8de8a 9363
9364 for (j = 0; j < server.dbnum; j++) {
9365 redisDb *db = server.db+j;
b72f6a4b 9366 /* Why maxtries is set to 100?
9367 * Because this way (usually) we'll find 1 object even if just 1% - 2%
9368 * are swappable objects */
b0d8747d 9369 int maxtries = 100;
4ef8de8a 9370
9371 if (dictSize(db->dict) == 0) continue;
9372 for (i = 0; i < 5; i++) {
9373 dictEntry *de;
9374 double swappability;
9375
e3cadb8a 9376 if (maxtries) maxtries--;
4ef8de8a 9377 de = dictGetRandomKey(db->dict);
4ef8de8a 9378 val = dictGetEntryVal(de);
1064ef87 9379 /* Only swap objects that are currently in memory.
9380 *
560db612 9381 * Also don't swap shared objects: not a good idea in general and
9382 * we need to ensure that the main thread does not touch the
1064ef87 9383 * object while the I/O thread is using it, but we can't
9384 * control other keys without adding additional mutex. */
560db612 9385 if (val->storage != REDIS_VM_MEMORY || val->refcount != 1) {
e3cadb8a 9386 if (maxtries) i--; /* don't count this try */
9387 continue;
9388 }
4ef8de8a 9389 swappability = computeObjectSwappability(val);
9390 if (!best || swappability > best_swappability) {
9391 best = de;
9392 best_swappability = swappability;
b9bc0eef 9393 best_db = db;
4ef8de8a 9394 }
9395 }
9396 }
7c775e09 9397 if (best == NULL) return REDIS_ERR;
4ef8de8a 9398 key = dictGetEntryKey(best);
9399 val = dictGetEntryVal(best);
9400
e3cadb8a 9401 redisLog(REDIS_DEBUG,"Key with best swappability: %s, %f",
44262c58 9402 key, best_swappability);
4ef8de8a 9403
4ef8de8a 9404 /* Swap it */
a69a0c9c 9405 if (usethreads) {
4c8f2370 9406 robj *keyobj = createStringObject(key,sdslen(key));
9407 vmSwapObjectThreaded(keyobj,val,best_db);
9408 decrRefCount(keyobj);
4ef8de8a 9409 return REDIS_OK;
9410 } else {
560db612 9411 vmpointer *vp;
9412
9413 if ((vp = vmSwapObjectBlocking(val)) != NULL) {
9414 dictGetEntryVal(best) = vp;
a69a0c9c 9415 return REDIS_OK;
9416 } else {
9417 return REDIS_ERR;
9418 }
4ef8de8a 9419 }
9420}
9421
a69a0c9c 9422static int vmSwapOneObjectBlocking() {
9423 return vmSwapOneObject(0);
9424}
9425
9426static int vmSwapOneObjectThreaded() {
9427 return vmSwapOneObject(1);
9428}
9429
7e69548d 9430/* Return true if it's safe to swap out objects in a given moment.
9431 * Basically we don't want to swap objects out while there is a BGSAVE
9432 * or a BGAEOREWRITE running in backgroud. */
9433static int vmCanSwapOut(void) {
9434 return (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1);
9435}
9436
996cb5f7 9437/* =================== Virtual Memory - Threaded I/O ======================= */
9438
b9bc0eef 9439static void freeIOJob(iojob *j) {
d5d55fc3 9440 if ((j->type == REDIS_IOJOB_PREPARE_SWAP ||
9441 j->type == REDIS_IOJOB_DO_SWAP ||
9442 j->type == REDIS_IOJOB_LOAD) && j->val != NULL)
560db612 9443 {
e4ed181d 9444 /* we fix the storage type, otherwise decrRefCount() will try to
9445 * kill the I/O thread Job (that does no longer exists). */
9446 if (j->val->storage == REDIS_VM_SWAPPING)
560db612 9447 j->val->storage = REDIS_VM_MEMORY;
b9bc0eef 9448 decrRefCount(j->val);
560db612 9449 }
9450 decrRefCount(j->key);
b9bc0eef 9451 zfree(j);
9452}
9453
996cb5f7 9454/* Every time a thread finished a Job, it writes a byte into the write side
9455 * of an unix pipe in order to "awake" the main thread, and this function
9456 * is called. */
9457static void vmThreadedIOCompletedJob(aeEventLoop *el, int fd, void *privdata,
9458 int mask)
9459{
9460 char buf[1];
b0d8747d 9461 int retval, processed = 0, toprocess = -1, trytoswap = 1;
996cb5f7 9462 REDIS_NOTUSED(el);
9463 REDIS_NOTUSED(mask);
9464 REDIS_NOTUSED(privdata);
9465
9466 /* For every byte we read in the read side of the pipe, there is one
9467 * I/O job completed to process. */
9468 while((retval = read(fd,buf,1)) == 1) {
b9bc0eef 9469 iojob *j;
9470 listNode *ln;
b9bc0eef 9471 struct dictEntry *de;
9472
996cb5f7 9473 redisLog(REDIS_DEBUG,"Processing I/O completed job");
b9bc0eef 9474
9475 /* Get the processed element (the oldest one) */
9476 lockThreadedIO();
1064ef87 9477 assert(listLength(server.io_processed) != 0);
f6c0bba8 9478 if (toprocess == -1) {
9479 toprocess = (listLength(server.io_processed)*REDIS_MAX_COMPLETED_JOBS_PROCESSED)/100;
9480 if (toprocess <= 0) toprocess = 1;
9481 }
b9bc0eef 9482 ln = listFirst(server.io_processed);
9483 j = ln->value;
9484 listDelNode(server.io_processed,ln);
9485 unlockThreadedIO();
9486 /* If this job is marked as canceled, just ignore it */
9487 if (j->canceled) {
9488 freeIOJob(j);
9489 continue;
9490 }
9491 /* Post process it in the main thread, as there are things we
9492 * can do just here to avoid race conditions and/or invasive locks */
560db612 9493 redisLog(REDIS_DEBUG,"COMPLETED Job type: %d, ID %p, key: %s", j->type, (void*)j->id, (unsigned char*)j->key->ptr);
44262c58 9494 de = dictFind(j->db->dict,j->key->ptr);
e4ed181d 9495 redisAssert(de != NULL);
b9bc0eef 9496 if (j->type == REDIS_IOJOB_LOAD) {
d5d55fc3 9497 redisDb *db;
560db612 9498 vmpointer *vp = dictGetEntryVal(de);
d5d55fc3 9499
b9bc0eef 9500 /* Key loaded, bring it at home */
560db612 9501 vmMarkPagesFree(vp->page,vp->usedpages);
b9bc0eef 9502 redisLog(REDIS_DEBUG, "VM: object %s loaded from disk (threaded)",
560db612 9503 (unsigned char*) j->key->ptr);
b9bc0eef 9504 server.vm_stats_swapped_objects--;
9505 server.vm_stats_swapins++;
d5d55fc3 9506 dictGetEntryVal(de) = j->val;
9507 incrRefCount(j->val);
9508 db = j->db;
d5d55fc3 9509 /* Handle clients waiting for this key to be loaded. */
560db612 9510 handleClientsBlockedOnSwappedKey(db,j->key);
9511 freeIOJob(j);
9512 zfree(vp);
b9bc0eef 9513 } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
9514 /* Now we know the amount of pages required to swap this object.
9515 * Let's find some space for it, and queue this task again
9516 * rebranded as REDIS_IOJOB_DO_SWAP. */
054e426d 9517 if (!vmCanSwapOut() ||
9518 vmFindContiguousPages(&j->page,j->pages) == REDIS_ERR)
9519 {
9520 /* Ooops... no space or we can't swap as there is
9521 * a fork()ed Redis trying to save stuff on disk. */
560db612 9522 j->val->storage = REDIS_VM_MEMORY; /* undo operation */
b9bc0eef 9523 freeIOJob(j);
9524 } else {
c7df85a4 9525 /* Note that we need to mark this pages as used now,
9526 * if the job will be canceled, we'll mark them as freed
9527 * again. */
9528 vmMarkPagesUsed(j->page,j->pages);
b9bc0eef 9529 j->type = REDIS_IOJOB_DO_SWAP;
9530 lockThreadedIO();
9531 queueIOJob(j);
9532 unlockThreadedIO();
9533 }
9534 } else if (j->type == REDIS_IOJOB_DO_SWAP) {
560db612 9535 vmpointer *vp;
b9bc0eef 9536
9537 /* Key swapped. We can finally free some memory. */
560db612 9538 if (j->val->storage != REDIS_VM_SWAPPING) {
9539 vmpointer *vp = (vmpointer*) j->id;
9540 printf("storage: %d\n",vp->storage);
9541 printf("key->name: %s\n",(char*)j->key->ptr);
6c96ba7d 9542 printf("val: %p\n",(void*)j->val);
9543 printf("val->type: %d\n",j->val->type);
9544 printf("val->ptr: %s\n",(char*)j->val->ptr);
9545 }
560db612 9546 redisAssert(j->val->storage == REDIS_VM_SWAPPING);
9547 vp = createVmPointer(j->val->type);
9548 vp->page = j->page;
9549 vp->usedpages = j->pages;
9550 dictGetEntryVal(de) = vp;
e4ed181d 9551 /* Fix the storage otherwise decrRefCount will attempt to
9552 * remove the associated I/O job */
9553 j->val->storage = REDIS_VM_MEMORY;
560db612 9554 decrRefCount(j->val);
b9bc0eef 9555 redisLog(REDIS_DEBUG,
9556 "VM: object %s swapped out at %lld (%lld pages) (threaded)",
560db612 9557 (unsigned char*) j->key->ptr,
b9bc0eef 9558 (unsigned long long) j->page, (unsigned long long) j->pages);
9559 server.vm_stats_swapped_objects++;
9560 server.vm_stats_swapouts++;
9561 freeIOJob(j);
f11b8647 9562 /* Put a few more swap requests in queue if we are still
9563 * out of memory */
b0d8747d 9564 if (trytoswap && vmCanSwapOut() &&
9565 zmalloc_used_memory() > server.vm_max_memory)
9566 {
f11b8647 9567 int more = 1;
9568 while(more) {
9569 lockThreadedIO();
9570 more = listLength(server.io_newjobs) <
9571 (unsigned) server.vm_max_threads;
9572 unlockThreadedIO();
9573 /* Don't waste CPU time if swappable objects are rare. */
b0d8747d 9574 if (vmSwapOneObjectThreaded() == REDIS_ERR) {
9575 trytoswap = 0;
9576 break;
9577 }
f11b8647 9578 }
9579 }
b9bc0eef 9580 }
c953f24b 9581 processed++;
f6c0bba8 9582 if (processed == toprocess) return;
996cb5f7 9583 }
9584 if (retval < 0 && errno != EAGAIN) {
9585 redisLog(REDIS_WARNING,
9586 "WARNING: read(2) error in vmThreadedIOCompletedJob() %s",
9587 strerror(errno));
9588 }
9589}
9590
9591static void lockThreadedIO(void) {
9592 pthread_mutex_lock(&server.io_mutex);
9593}
9594
9595static void unlockThreadedIO(void) {
9596 pthread_mutex_unlock(&server.io_mutex);
9597}
9598
9599/* Remove the specified object from the threaded I/O queue if still not
9600 * processed, otherwise make sure to flag it as canceled. */
9601static void vmCancelThreadedIOJob(robj *o) {
9602 list *lists[3] = {
6c96ba7d 9603 server.io_newjobs, /* 0 */
9604 server.io_processing, /* 1 */
9605 server.io_processed /* 2 */
996cb5f7 9606 };
9607 int i;
9608
9609 assert(o->storage == REDIS_VM_LOADING || o->storage == REDIS_VM_SWAPPING);
2e111efe 9610again:
996cb5f7 9611 lockThreadedIO();
560db612 9612 /* Search for a matching object in one of the queues */
996cb5f7 9613 for (i = 0; i < 3; i++) {
9614 listNode *ln;
c7df85a4 9615 listIter li;
996cb5f7 9616
c7df85a4 9617 listRewind(lists[i],&li);
9618 while ((ln = listNext(&li)) != NULL) {
996cb5f7 9619 iojob *job = ln->value;
9620
6c96ba7d 9621 if (job->canceled) continue; /* Skip this, already canceled. */
560db612 9622 if (job->id == o) {
dbc289ae 9623 redisLog(REDIS_DEBUG,"*** CANCELED %p (key %s) (type %d) (LIST ID %d)\n",
9624 (void*)job, (char*)job->key->ptr, job->type, i);
427a2153 9625 /* Mark the pages as free since the swap didn't happened
9626 * or happened but is now discarded. */
970e10bb 9627 if (i != 1 && job->type == REDIS_IOJOB_DO_SWAP)
427a2153 9628 vmMarkPagesFree(job->page,job->pages);
9629 /* Cancel the job. It depends on the list the job is
9630 * living in. */
996cb5f7 9631 switch(i) {
9632 case 0: /* io_newjobs */
6c96ba7d 9633 /* If the job was yet not processed the best thing to do
996cb5f7 9634 * is to remove it from the queue at all */
6c96ba7d 9635 freeIOJob(job);
996cb5f7 9636 listDelNode(lists[i],ln);
9637 break;
9638 case 1: /* io_processing */
d5d55fc3 9639 /* Oh Shi- the thread is messing with the Job:
9640 *
9641 * Probably it's accessing the object if this is a
9642 * PREPARE_SWAP or DO_SWAP job.
9643 * If it's a LOAD job it may be reading from disk and
9644 * if we don't wait for the job to terminate before to
9645 * cancel it, maybe in a few microseconds data can be
9646 * corrupted in this pages. So the short story is:
9647 *
9648 * Better to wait for the job to move into the
9649 * next queue (processed)... */
9650
9651 /* We try again and again until the job is completed. */
9652 unlockThreadedIO();
9653 /* But let's wait some time for the I/O thread
9654 * to finish with this job. After all this condition
9655 * should be very rare. */
9656 usleep(1);
9657 goto again;
996cb5f7 9658 case 2: /* io_processed */
2e111efe 9659 /* The job was already processed, that's easy...
9660 * just mark it as canceled so that we'll ignore it
9661 * when processing completed jobs. */
996cb5f7 9662 job->canceled = 1;
9663 break;
9664 }
c7df85a4 9665 /* Finally we have to adjust the storage type of the object
9666 * in order to "UNDO" the operaiton. */
996cb5f7 9667 if (o->storage == REDIS_VM_LOADING)
9668 o->storage = REDIS_VM_SWAPPED;
9669 else if (o->storage == REDIS_VM_SWAPPING)
9670 o->storage = REDIS_VM_MEMORY;
9671 unlockThreadedIO();
e4ed181d 9672 redisLog(REDIS_DEBUG,"*** DONE");
996cb5f7 9673 return;
9674 }
9675 }
9676 }
9677 unlockThreadedIO();
560db612 9678 printf("Not found: %p\n", (void*)o);
9679 redisAssert(1 != 1); /* We should never reach this */
996cb5f7 9680}
9681
b9bc0eef 9682static void *IOThreadEntryPoint(void *arg) {
9683 iojob *j;
9684 listNode *ln;
9685 REDIS_NOTUSED(arg);
9686
9687 pthread_detach(pthread_self());
9688 while(1) {
9689 /* Get a new job to process */
9690 lockThreadedIO();
9691 if (listLength(server.io_newjobs) == 0) {
9692 /* No new jobs in queue, exit. */
9ebed7cf 9693 redisLog(REDIS_DEBUG,"Thread %ld exiting, nothing to do",
9694 (long) pthread_self());
b9bc0eef 9695 server.io_active_threads--;
9696 unlockThreadedIO();
9697 return NULL;
9698 }
9699 ln = listFirst(server.io_newjobs);
9700 j = ln->value;
9701 listDelNode(server.io_newjobs,ln);
9702 /* Add the job in the processing queue */
9703 j->thread = pthread_self();
9704 listAddNodeTail(server.io_processing,j);
9705 ln = listLast(server.io_processing); /* We use ln later to remove it */
9706 unlockThreadedIO();
9ebed7cf 9707 redisLog(REDIS_DEBUG,"Thread %ld got a new job (type %d): %p about key '%s'",
9708 (long) pthread_self(), j->type, (void*)j, (char*)j->key->ptr);
b9bc0eef 9709
9710 /* Process the Job */
9711 if (j->type == REDIS_IOJOB_LOAD) {
560db612 9712 vmpointer *vp = (vmpointer*)j->id;
9713 j->val = vmReadObjectFromSwap(j->page,vp->vtype);
b9bc0eef 9714 } else if (j->type == REDIS_IOJOB_PREPARE_SWAP) {
9715 FILE *fp = fopen("/dev/null","w+");
9716 j->pages = rdbSavedObjectPages(j->val,fp);
9717 fclose(fp);
9718 } else if (j->type == REDIS_IOJOB_DO_SWAP) {
a5819310 9719 if (vmWriteObjectOnSwap(j->val,j->page) == REDIS_ERR)
9720 j->canceled = 1;
b9bc0eef 9721 }
9722
9723 /* Done: insert the job into the processed queue */
9ebed7cf 9724 redisLog(REDIS_DEBUG,"Thread %ld completed the job: %p (key %s)",
9725 (long) pthread_self(), (void*)j, (char*)j->key->ptr);
b9bc0eef 9726 lockThreadedIO();
9727 listDelNode(server.io_processing,ln);
9728 listAddNodeTail(server.io_processed,j);
9729 unlockThreadedIO();
e0a62c7f 9730
b9bc0eef 9731 /* Signal the main thread there is new stuff to process */
9732 assert(write(server.io_ready_pipe_write,"x",1) == 1);
9733 }
9734 return NULL; /* never reached */
9735}
9736
9737static void spawnIOThread(void) {
9738 pthread_t thread;
478c2c6f 9739 sigset_t mask, omask;
a97b9060 9740 int err;
b9bc0eef 9741
478c2c6f 9742 sigemptyset(&mask);
9743 sigaddset(&mask,SIGCHLD);
9744 sigaddset(&mask,SIGHUP);
9745 sigaddset(&mask,SIGPIPE);
9746 pthread_sigmask(SIG_SETMASK, &mask, &omask);
a97b9060 9747 while ((err = pthread_create(&thread,&server.io_threads_attr,IOThreadEntryPoint,NULL)) != 0) {
9748 redisLog(REDIS_WARNING,"Unable to spawn an I/O thread: %s",
9749 strerror(err));
9750 usleep(1000000);
9751 }
478c2c6f 9752 pthread_sigmask(SIG_SETMASK, &omask, NULL);
b9bc0eef 9753 server.io_active_threads++;
9754}
9755
4ee9488d 9756/* We need to wait for the last thread to exit before we are able to
9757 * fork() in order to BGSAVE or BGREWRITEAOF. */
054e426d 9758static void waitEmptyIOJobsQueue(void) {
4ee9488d 9759 while(1) {
76b7233a 9760 int io_processed_len;
9761
4ee9488d 9762 lockThreadedIO();
054e426d 9763 if (listLength(server.io_newjobs) == 0 &&
9764 listLength(server.io_processing) == 0 &&
9765 server.io_active_threads == 0)
9766 {
4ee9488d 9767 unlockThreadedIO();
9768 return;
9769 }
76b7233a 9770 /* While waiting for empty jobs queue condition we post-process some
9771 * finshed job, as I/O threads may be hanging trying to write against
9772 * the io_ready_pipe_write FD but there are so much pending jobs that
9773 * it's blocking. */
9774 io_processed_len = listLength(server.io_processed);
4ee9488d 9775 unlockThreadedIO();
76b7233a 9776 if (io_processed_len) {
9777 vmThreadedIOCompletedJob(NULL,server.io_ready_pipe_read,NULL,0);
9778 usleep(1000); /* 1 millisecond */
9779 } else {
9780 usleep(10000); /* 10 milliseconds */
9781 }
4ee9488d 9782 }
9783}
9784
054e426d 9785static void vmReopenSwapFile(void) {
478c2c6f 9786 /* Note: we don't close the old one as we are in the child process
9787 * and don't want to mess at all with the original file object. */
054e426d 9788 server.vm_fp = fopen(server.vm_swap_file,"r+b");
9789 if (server.vm_fp == NULL) {
9790 redisLog(REDIS_WARNING,"Can't re-open the VM swap file: %s. Exiting.",
9791 server.vm_swap_file);
478c2c6f 9792 _exit(1);
054e426d 9793 }
9794 server.vm_fd = fileno(server.vm_fp);
9795}
9796
b9bc0eef 9797/* This function must be called while with threaded IO locked */
9798static void queueIOJob(iojob *j) {
6c96ba7d 9799 redisLog(REDIS_DEBUG,"Queued IO Job %p type %d about key '%s'\n",
9800 (void*)j, j->type, (char*)j->key->ptr);
b9bc0eef 9801 listAddNodeTail(server.io_newjobs,j);
9802 if (server.io_active_threads < server.vm_max_threads)
9803 spawnIOThread();
9804}
9805
9806static int vmSwapObjectThreaded(robj *key, robj *val, redisDb *db) {
9807 iojob *j;
e0a62c7f 9808
b9bc0eef 9809 j = zmalloc(sizeof(*j));
9810 j->type = REDIS_IOJOB_PREPARE_SWAP;
9811 j->db = db;
78ebe4c8 9812 j->key = key;
7dd8e7cf 9813 incrRefCount(key);
560db612 9814 j->id = j->val = val;
b9bc0eef 9815 incrRefCount(val);
9816 j->canceled = 0;
9817 j->thread = (pthread_t) -1;
560db612 9818 val->storage = REDIS_VM_SWAPPING;
b9bc0eef 9819
9820 lockThreadedIO();
9821 queueIOJob(j);
9822 unlockThreadedIO();
9823 return REDIS_OK;
9824}
9825
b0d8747d 9826/* ============ Virtual Memory - Blocking clients on missing keys =========== */
9827
d5d55fc3 9828/* This function makes the clinet 'c' waiting for the key 'key' to be loaded.
9829 * If there is not already a job loading the key, it is craeted.
9830 * The key is added to the io_keys list in the client structure, and also
9831 * in the hash table mapping swapped keys to waiting clients, that is,
9832 * server.io_waited_keys. */
9833static int waitForSwappedKey(redisClient *c, robj *key) {
9834 struct dictEntry *de;
9835 robj *o;
9836 list *l;
9837
9838 /* If the key does not exist or is already in RAM we don't need to
9839 * block the client at all. */
09241813 9840 de = dictFind(c->db->dict,key->ptr);
d5d55fc3 9841 if (de == NULL) return 0;
560db612 9842 o = dictGetEntryVal(de);
d5d55fc3 9843 if (o->storage == REDIS_VM_MEMORY) {
9844 return 0;
9845 } else if (o->storage == REDIS_VM_SWAPPING) {
9846 /* We were swapping the key, undo it! */
9847 vmCancelThreadedIOJob(o);
9848 return 0;
9849 }
e0a62c7f 9850
d5d55fc3 9851 /* OK: the key is either swapped, or being loaded just now. */
9852
9853 /* Add the key to the list of keys this client is waiting for.
9854 * This maps clients to keys they are waiting for. */
9855 listAddNodeTail(c->io_keys,key);
9856 incrRefCount(key);
9857
9858 /* Add the client to the swapped keys => clients waiting map. */
9859 de = dictFind(c->db->io_keys,key);
9860 if (de == NULL) {
9861 int retval;
9862
9863 /* For every key we take a list of clients blocked for it */
9864 l = listCreate();
9865 retval = dictAdd(c->db->io_keys,key,l);
9866 incrRefCount(key);
9867 assert(retval == DICT_OK);
9868 } else {
9869 l = dictGetEntryVal(de);
9870 }
9871 listAddNodeTail(l,c);
9872
9873 /* Are we already loading the key from disk? If not create a job */
9874 if (o->storage == REDIS_VM_SWAPPED) {
9875 iojob *j;
560db612 9876 vmpointer *vp = (vmpointer*)o;
d5d55fc3 9877
9878 o->storage = REDIS_VM_LOADING;
9879 j = zmalloc(sizeof(*j));
9880 j->type = REDIS_IOJOB_LOAD;
9881 j->db = c->db;
560db612 9882 j->id = (robj*)vp;
9883 j->key = key;
9884 incrRefCount(key);
9885 j->page = vp->page;
d5d55fc3 9886 j->val = NULL;
9887 j->canceled = 0;
9888 j->thread = (pthread_t) -1;
9889 lockThreadedIO();
9890 queueIOJob(j);
9891 unlockThreadedIO();
9892 }
9893 return 1;
9894}
9895
6f078746
PN
9896/* Preload keys for any command with first, last and step values for
9897 * the command keys prototype, as defined in the command table. */
9898static void waitForMultipleSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
9899 int j, last;
9900 if (cmd->vm_firstkey == 0) return;
9901 last = cmd->vm_lastkey;
9902 if (last < 0) last = argc+last;
9903 for (j = cmd->vm_firstkey; j <= last; j += cmd->vm_keystep) {
9904 redisAssert(j < argc);
9905 waitForSwappedKey(c,argv[j]);
9906 }
9907}
9908
5d373da9 9909/* Preload keys needed for the ZUNIONSTORE and ZINTERSTORE commands.
739ba0d2
PN
9910 * Note that the number of keys to preload is user-defined, so we need to
9911 * apply a sanity check against argc. */
ca1788b5 9912static void zunionInterBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
76583ea4 9913 int i, num;
ca1788b5 9914 REDIS_NOTUSED(cmd);
ca1788b5
PN
9915
9916 num = atoi(argv[2]->ptr);
739ba0d2 9917 if (num > (argc-3)) return;
76583ea4 9918 for (i = 0; i < num; i++) {
ca1788b5 9919 waitForSwappedKey(c,argv[3+i]);
76583ea4
PN
9920 }
9921}
9922
3805e04f
PN
9923/* Preload keys needed to execute the entire MULTI/EXEC block.
9924 *
9925 * This function is called by blockClientOnSwappedKeys when EXEC is issued,
9926 * and will block the client when any command requires a swapped out value. */
9927static void execBlockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd, int argc, robj **argv) {
9928 int i, margc;
9929 struct redisCommand *mcmd;
9930 robj **margv;
9931 REDIS_NOTUSED(cmd);
9932 REDIS_NOTUSED(argc);
9933 REDIS_NOTUSED(argv);
9934
9935 if (!(c->flags & REDIS_MULTI)) return;
9936 for (i = 0; i < c->mstate.count; i++) {
9937 mcmd = c->mstate.commands[i].cmd;
9938 margc = c->mstate.commands[i].argc;
9939 margv = c->mstate.commands[i].argv;
9940
9941 if (mcmd->vm_preload_proc != NULL) {
9942 mcmd->vm_preload_proc(c,mcmd,margc,margv);
9943 } else {
9944 waitForMultipleSwappedKeys(c,mcmd,margc,margv);
9945 }
76583ea4
PN
9946 }
9947}
9948
b0d8747d 9949/* Is this client attempting to run a command against swapped keys?
d5d55fc3 9950 * If so, block it ASAP, load the keys in background, then resume it.
b0d8747d 9951 *
d5d55fc3 9952 * The important idea about this function is that it can fail! If keys will
9953 * still be swapped when the client is resumed, this key lookups will
9954 * just block loading keys from disk. In practical terms this should only
9955 * happen with SORT BY command or if there is a bug in this function.
9956 *
9957 * Return 1 if the client is marked as blocked, 0 if the client can
9958 * continue as the keys it is going to access appear to be in memory. */
0a6f3f0f 9959static int blockClientOnSwappedKeys(redisClient *c, struct redisCommand *cmd) {
76583ea4 9960 if (cmd->vm_preload_proc != NULL) {
ca1788b5 9961 cmd->vm_preload_proc(c,cmd,c->argc,c->argv);
76583ea4 9962 } else {
6f078746 9963 waitForMultipleSwappedKeys(c,cmd,c->argc,c->argv);
76583ea4
PN
9964 }
9965
d5d55fc3 9966 /* If the client was blocked for at least one key, mark it as blocked. */
9967 if (listLength(c->io_keys)) {
9968 c->flags |= REDIS_IO_WAIT;
9969 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
9970 server.vm_blocked_clients++;
9971 return 1;
9972 } else {
9973 return 0;
9974 }
9975}
9976
9977/* Remove the 'key' from the list of blocked keys for a given client.
9978 *
9979 * The function returns 1 when there are no longer blocking keys after
9980 * the current one was removed (and the client can be unblocked). */
9981static int dontWaitForSwappedKey(redisClient *c, robj *key) {
9982 list *l;
9983 listNode *ln;
9984 listIter li;
9985 struct dictEntry *de;
9986
9987 /* Remove the key from the list of keys this client is waiting for. */
9988 listRewind(c->io_keys,&li);
9989 while ((ln = listNext(&li)) != NULL) {
bf028098 9990 if (equalStringObjects(ln->value,key)) {
d5d55fc3 9991 listDelNode(c->io_keys,ln);
9992 break;
9993 }
9994 }
9995 assert(ln != NULL);
9996
9997 /* Remove the client form the key => waiting clients map. */
9998 de = dictFind(c->db->io_keys,key);
9999 assert(de != NULL);
10000 l = dictGetEntryVal(de);
10001 ln = listSearchKey(l,c);
10002 assert(ln != NULL);
10003 listDelNode(l,ln);
10004 if (listLength(l) == 0)
10005 dictDelete(c->db->io_keys,key);
10006
10007 return listLength(c->io_keys) == 0;
10008}
10009
560db612 10010/* Every time we now a key was loaded back in memory, we handle clients
10011 * waiting for this key if any. */
d5d55fc3 10012static void handleClientsBlockedOnSwappedKey(redisDb *db, robj *key) {
10013 struct dictEntry *de;
10014 list *l;
10015 listNode *ln;
10016 int len;
10017
10018 de = dictFind(db->io_keys,key);
10019 if (!de) return;
10020
10021 l = dictGetEntryVal(de);
10022 len = listLength(l);
10023 /* Note: we can't use something like while(listLength(l)) as the list
10024 * can be freed by the calling function when we remove the last element. */
10025 while (len--) {
10026 ln = listFirst(l);
10027 redisClient *c = ln->value;
10028
10029 if (dontWaitForSwappedKey(c,key)) {
10030 /* Put the client in the list of clients ready to go as we
10031 * loaded all the keys about it. */
10032 listAddNodeTail(server.io_ready_clients,c);
10033 }
10034 }
b0d8747d 10035}
b0d8747d 10036
500ece7c 10037/* =========================== Remote Configuration ========================= */
10038
10039static void configSetCommand(redisClient *c) {
10040 robj *o = getDecodedObject(c->argv[3]);
2e5eb04e 10041 long long ll;
10042
500ece7c 10043 if (!strcasecmp(c->argv[2]->ptr,"dbfilename")) {
10044 zfree(server.dbfilename);
10045 server.dbfilename = zstrdup(o->ptr);
10046 } else if (!strcasecmp(c->argv[2]->ptr,"requirepass")) {
10047 zfree(server.requirepass);
10048 server.requirepass = zstrdup(o->ptr);
10049 } else if (!strcasecmp(c->argv[2]->ptr,"masterauth")) {
10050 zfree(server.masterauth);
10051 server.masterauth = zstrdup(o->ptr);
10052 } else if (!strcasecmp(c->argv[2]->ptr,"maxmemory")) {
2e5eb04e 10053 if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
10054 ll < 0) goto badfmt;
10055 server.maxmemory = ll;
10056 } else if (!strcasecmp(c->argv[2]->ptr,"timeout")) {
10057 if (getLongLongFromObject(o,&ll) == REDIS_ERR ||
10058 ll < 0 || ll > LONG_MAX) goto badfmt;
10059 server.maxidletime = ll;
1b677732 10060 } else if (!strcasecmp(c->argv[2]->ptr,"appendfsync")) {
10061 if (!strcasecmp(o->ptr,"no")) {
10062 server.appendfsync = APPENDFSYNC_NO;
10063 } else if (!strcasecmp(o->ptr,"everysec")) {
10064 server.appendfsync = APPENDFSYNC_EVERYSEC;
10065 } else if (!strcasecmp(o->ptr,"always")) {
10066 server.appendfsync = APPENDFSYNC_ALWAYS;
10067 } else {
10068 goto badfmt;
10069 }
38db9171 10070 } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) {
10071 int yn = yesnotoi(o->ptr);
10072
10073 if (yn == -1) goto badfmt;
10074 server.no_appendfsync_on_rewrite = yn;
2e5eb04e 10075 } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) {
10076 int old = server.appendonly;
10077 int new = yesnotoi(o->ptr);
10078
10079 if (new == -1) goto badfmt;
10080 if (old != new) {
10081 if (new == 0) {
10082 stopAppendOnly();
10083 } else {
10084 if (startAppendOnly() == REDIS_ERR) {
10085 addReplySds(c,sdscatprintf(sdsempty(),
10086 "-ERR Unable to turn on AOF. Check server logs.\r\n"));
10087 decrRefCount(o);
10088 return;
10089 }
10090 }
10091 }
a34e0a25 10092 } else if (!strcasecmp(c->argv[2]->ptr,"save")) {
10093 int vlen, j;
10094 sds *v = sdssplitlen(o->ptr,sdslen(o->ptr)," ",1,&vlen);
10095
10096 /* Perform sanity check before setting the new config:
10097 * - Even number of args
10098 * - Seconds >= 1, changes >= 0 */
10099 if (vlen & 1) {
10100 sdsfreesplitres(v,vlen);
10101 goto badfmt;
10102 }
10103 for (j = 0; j < vlen; j++) {
10104 char *eptr;
10105 long val;
10106
10107 val = strtoll(v[j], &eptr, 10);
10108 if (eptr[0] != '\0' ||
10109 ((j & 1) == 0 && val < 1) ||
10110 ((j & 1) == 1 && val < 0)) {
10111 sdsfreesplitres(v,vlen);
10112 goto badfmt;
10113 }
10114 }
10115 /* Finally set the new config */
10116 resetServerSaveParams();
10117 for (j = 0; j < vlen; j += 2) {
10118 time_t seconds;
10119 int changes;
10120
10121 seconds = strtoll(v[j],NULL,10);
10122 changes = strtoll(v[j+1],NULL,10);
10123 appendServerSaveParams(seconds, changes);
10124 }
10125 sdsfreesplitres(v,vlen);
500ece7c 10126 } else {
10127 addReplySds(c,sdscatprintf(sdsempty(),
10128 "-ERR not supported CONFIG parameter %s\r\n",
10129 (char*)c->argv[2]->ptr));
10130 decrRefCount(o);
10131 return;
10132 }
10133 decrRefCount(o);
10134 addReply(c,shared.ok);
a34e0a25 10135 return;
10136
10137badfmt: /* Bad format errors */
10138 addReplySds(c,sdscatprintf(sdsempty(),
10139 "-ERR invalid argument '%s' for CONFIG SET '%s'\r\n",
10140 (char*)o->ptr,
10141 (char*)c->argv[2]->ptr));
10142 decrRefCount(o);
500ece7c 10143}
10144
10145static void configGetCommand(redisClient *c) {
10146 robj *o = getDecodedObject(c->argv[2]);
10147 robj *lenobj = createObject(REDIS_STRING,NULL);
10148 char *pattern = o->ptr;
10149 int matches = 0;
10150
10151 addReply(c,lenobj);
10152 decrRefCount(lenobj);
10153
10154 if (stringmatch(pattern,"dbfilename",0)) {
10155 addReplyBulkCString(c,"dbfilename");
10156 addReplyBulkCString(c,server.dbfilename);
10157 matches++;
10158 }
10159 if (stringmatch(pattern,"requirepass",0)) {
10160 addReplyBulkCString(c,"requirepass");
10161 addReplyBulkCString(c,server.requirepass);
10162 matches++;
10163 }
10164 if (stringmatch(pattern,"masterauth",0)) {
10165 addReplyBulkCString(c,"masterauth");
10166 addReplyBulkCString(c,server.masterauth);
10167 matches++;
10168 }
10169 if (stringmatch(pattern,"maxmemory",0)) {
10170 char buf[128];
10171
2e5eb04e 10172 ll2string(buf,128,server.maxmemory);
500ece7c 10173 addReplyBulkCString(c,"maxmemory");
10174 addReplyBulkCString(c,buf);
10175 matches++;
10176 }
2e5eb04e 10177 if (stringmatch(pattern,"timeout",0)) {
10178 char buf[128];
10179
10180 ll2string(buf,128,server.maxidletime);
10181 addReplyBulkCString(c,"timeout");
10182 addReplyBulkCString(c,buf);
10183 matches++;
10184 }
10185 if (stringmatch(pattern,"appendonly",0)) {
10186 addReplyBulkCString(c,"appendonly");
10187 addReplyBulkCString(c,server.appendonly ? "yes" : "no");
10188 matches++;
10189 }
38db9171 10190 if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) {
10191 addReplyBulkCString(c,"no-appendfsync-on-rewrite");
10192 addReplyBulkCString(c,server.no_appendfsync_on_rewrite ? "yes" : "no");
10193 matches++;
10194 }
1b677732 10195 if (stringmatch(pattern,"appendfsync",0)) {
10196 char *policy;
10197
10198 switch(server.appendfsync) {
10199 case APPENDFSYNC_NO: policy = "no"; break;
10200 case APPENDFSYNC_EVERYSEC: policy = "everysec"; break;
10201 case APPENDFSYNC_ALWAYS: policy = "always"; break;
10202 default: policy = "unknown"; break; /* too harmless to panic */
10203 }
10204 addReplyBulkCString(c,"appendfsync");
10205 addReplyBulkCString(c,policy);
10206 matches++;
10207 }
a34e0a25 10208 if (stringmatch(pattern,"save",0)) {
10209 sds buf = sdsempty();
10210 int j;
10211
10212 for (j = 0; j < server.saveparamslen; j++) {
10213 buf = sdscatprintf(buf,"%ld %d",
10214 server.saveparams[j].seconds,
10215 server.saveparams[j].changes);
10216 if (j != server.saveparamslen-1)
10217 buf = sdscatlen(buf," ",1);
10218 }
10219 addReplyBulkCString(c,"save");
10220 addReplyBulkCString(c,buf);
10221 sdsfree(buf);
10222 matches++;
10223 }
500ece7c 10224 decrRefCount(o);
10225 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",matches*2);
10226}
10227
10228static void configCommand(redisClient *c) {
10229 if (!strcasecmp(c->argv[1]->ptr,"set")) {
10230 if (c->argc != 4) goto badarity;
10231 configSetCommand(c);
10232 } else if (!strcasecmp(c->argv[1]->ptr,"get")) {
10233 if (c->argc != 3) goto badarity;
10234 configGetCommand(c);
10235 } else if (!strcasecmp(c->argv[1]->ptr,"resetstat")) {
10236 if (c->argc != 2) goto badarity;
10237 server.stat_numcommands = 0;
10238 server.stat_numconnections = 0;
10239 server.stat_expiredkeys = 0;
10240 server.stat_starttime = time(NULL);
10241 addReply(c,shared.ok);
10242 } else {
10243 addReplySds(c,sdscatprintf(sdsempty(),
10244 "-ERR CONFIG subcommand must be one of GET, SET, RESETSTAT\r\n"));
10245 }
10246 return;
10247
10248badarity:
10249 addReplySds(c,sdscatprintf(sdsempty(),
10250 "-ERR Wrong number of arguments for CONFIG %s\r\n",
10251 (char*) c->argv[1]->ptr));
10252}
10253
befec3cd 10254/* =========================== Pubsub implementation ======================== */
10255
ffc6b7f8 10256static void freePubsubPattern(void *p) {
10257 pubsubPattern *pat = p;
10258
10259 decrRefCount(pat->pattern);
10260 zfree(pat);
10261}
10262
10263static int listMatchPubsubPattern(void *a, void *b) {
10264 pubsubPattern *pa = a, *pb = b;
10265
10266 return (pa->client == pb->client) &&
bf028098 10267 (equalStringObjects(pa->pattern,pb->pattern));
ffc6b7f8 10268}
10269
10270/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
10271 * 0 if the client was already subscribed to that channel. */
10272static int pubsubSubscribeChannel(redisClient *c, robj *channel) {
befec3cd 10273 struct dictEntry *de;
10274 list *clients = NULL;
10275 int retval = 0;
10276
ffc6b7f8 10277 /* Add the channel to the client -> channels hash table */
10278 if (dictAdd(c->pubsub_channels,channel,NULL) == DICT_OK) {
befec3cd 10279 retval = 1;
ffc6b7f8 10280 incrRefCount(channel);
10281 /* Add the client to the channel -> list of clients hash table */
10282 de = dictFind(server.pubsub_channels,channel);
befec3cd 10283 if (de == NULL) {
10284 clients = listCreate();
ffc6b7f8 10285 dictAdd(server.pubsub_channels,channel,clients);
10286 incrRefCount(channel);
befec3cd 10287 } else {
10288 clients = dictGetEntryVal(de);
10289 }
10290 listAddNodeTail(clients,c);
10291 }
10292 /* Notify the client */
10293 addReply(c,shared.mbulk3);
10294 addReply(c,shared.subscribebulk);
ffc6b7f8 10295 addReplyBulk(c,channel);
482b672d 10296 addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
befec3cd 10297 return retval;
10298}
10299
ffc6b7f8 10300/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10301 * 0 if the client was not subscribed to the specified channel. */
10302static int pubsubUnsubscribeChannel(redisClient *c, robj *channel, int notify) {
befec3cd 10303 struct dictEntry *de;
10304 list *clients;
10305 listNode *ln;
10306 int retval = 0;
10307
ffc6b7f8 10308 /* Remove the channel from the client -> channels hash table */
10309 incrRefCount(channel); /* channel may be just a pointer to the same object
201037f5 10310 we have in the hash tables. Protect it... */
ffc6b7f8 10311 if (dictDelete(c->pubsub_channels,channel) == DICT_OK) {
befec3cd 10312 retval = 1;
ffc6b7f8 10313 /* Remove the client from the channel -> clients list hash table */
10314 de = dictFind(server.pubsub_channels,channel);
befec3cd 10315 assert(de != NULL);
10316 clients = dictGetEntryVal(de);
10317 ln = listSearchKey(clients,c);
10318 assert(ln != NULL);
10319 listDelNode(clients,ln);
ff767a75 10320 if (listLength(clients) == 0) {
10321 /* Free the list and associated hash entry at all if this was
10322 * the latest client, so that it will be possible to abuse
ffc6b7f8 10323 * Redis PUBSUB creating millions of channels. */
10324 dictDelete(server.pubsub_channels,channel);
ff767a75 10325 }
befec3cd 10326 }
10327 /* Notify the client */
10328 if (notify) {
10329 addReply(c,shared.mbulk3);
10330 addReply(c,shared.unsubscribebulk);
ffc6b7f8 10331 addReplyBulk(c,channel);
482b672d 10332 addReplyLongLong(c,dictSize(c->pubsub_channels)+
ffc6b7f8 10333 listLength(c->pubsub_patterns));
10334
10335 }
10336 decrRefCount(channel); /* it is finally safe to release it */
10337 return retval;
10338}
10339
10340/* Subscribe a client to a pattern. Returns 1 if the operation succeeded, or 0 if the clinet was already subscribed to that pattern. */
10341static int pubsubSubscribePattern(redisClient *c, robj *pattern) {
10342 int retval = 0;
10343
10344 if (listSearchKey(c->pubsub_patterns,pattern) == NULL) {
10345 retval = 1;
10346 pubsubPattern *pat;
10347 listAddNodeTail(c->pubsub_patterns,pattern);
10348 incrRefCount(pattern);
10349 pat = zmalloc(sizeof(*pat));
10350 pat->pattern = getDecodedObject(pattern);
10351 pat->client = c;
10352 listAddNodeTail(server.pubsub_patterns,pat);
10353 }
10354 /* Notify the client */
10355 addReply(c,shared.mbulk3);
10356 addReply(c,shared.psubscribebulk);
10357 addReplyBulk(c,pattern);
482b672d 10358 addReplyLongLong(c,dictSize(c->pubsub_channels)+listLength(c->pubsub_patterns));
ffc6b7f8 10359 return retval;
10360}
10361
10362/* Unsubscribe a client from a channel. Returns 1 if the operation succeeded, or
10363 * 0 if the client was not subscribed to the specified channel. */
10364static int pubsubUnsubscribePattern(redisClient *c, robj *pattern, int notify) {
10365 listNode *ln;
10366 pubsubPattern pat;
10367 int retval = 0;
10368
10369 incrRefCount(pattern); /* Protect the object. May be the same we remove */
10370 if ((ln = listSearchKey(c->pubsub_patterns,pattern)) != NULL) {
10371 retval = 1;
10372 listDelNode(c->pubsub_patterns,ln);
10373 pat.client = c;
10374 pat.pattern = pattern;
10375 ln = listSearchKey(server.pubsub_patterns,&pat);
10376 listDelNode(server.pubsub_patterns,ln);
10377 }
10378 /* Notify the client */
10379 if (notify) {
10380 addReply(c,shared.mbulk3);
10381 addReply(c,shared.punsubscribebulk);
10382 addReplyBulk(c,pattern);
482b672d 10383 addReplyLongLong(c,dictSize(c->pubsub_channels)+
ffc6b7f8 10384 listLength(c->pubsub_patterns));
befec3cd 10385 }
ffc6b7f8 10386 decrRefCount(pattern);
befec3cd 10387 return retval;
10388}
10389
ffc6b7f8 10390/* Unsubscribe from all the channels. Return the number of channels the
10391 * client was subscribed from. */
10392static int pubsubUnsubscribeAllChannels(redisClient *c, int notify) {
10393 dictIterator *di = dictGetIterator(c->pubsub_channels);
befec3cd 10394 dictEntry *de;
10395 int count = 0;
10396
10397 while((de = dictNext(di)) != NULL) {
ffc6b7f8 10398 robj *channel = dictGetEntryKey(de);
befec3cd 10399
ffc6b7f8 10400 count += pubsubUnsubscribeChannel(c,channel,notify);
befec3cd 10401 }
10402 dictReleaseIterator(di);
10403 return count;
10404}
10405
ffc6b7f8 10406/* Unsubscribe from all the patterns. Return the number of patterns the
10407 * client was subscribed from. */
10408static int pubsubUnsubscribeAllPatterns(redisClient *c, int notify) {
10409 listNode *ln;
10410 listIter li;
10411 int count = 0;
10412
10413 listRewind(c->pubsub_patterns,&li);
10414 while ((ln = listNext(&li)) != NULL) {
10415 robj *pattern = ln->value;
10416
10417 count += pubsubUnsubscribePattern(c,pattern,notify);
10418 }
10419 return count;
10420}
10421
befec3cd 10422/* Publish a message */
ffc6b7f8 10423static int pubsubPublishMessage(robj *channel, robj *message) {
befec3cd 10424 int receivers = 0;
10425 struct dictEntry *de;
ffc6b7f8 10426 listNode *ln;
10427 listIter li;
befec3cd 10428
ffc6b7f8 10429 /* Send to clients listening for that channel */
10430 de = dictFind(server.pubsub_channels,channel);
befec3cd 10431 if (de) {
10432 list *list = dictGetEntryVal(de);
10433 listNode *ln;
10434 listIter li;
10435
10436 listRewind(list,&li);
10437 while ((ln = listNext(&li)) != NULL) {
10438 redisClient *c = ln->value;
10439
10440 addReply(c,shared.mbulk3);
10441 addReply(c,shared.messagebulk);
ffc6b7f8 10442 addReplyBulk(c,channel);
befec3cd 10443 addReplyBulk(c,message);
10444 receivers++;
10445 }
10446 }
ffc6b7f8 10447 /* Send to clients listening to matching channels */
10448 if (listLength(server.pubsub_patterns)) {
10449 listRewind(server.pubsub_patterns,&li);
10450 channel = getDecodedObject(channel);
10451 while ((ln = listNext(&li)) != NULL) {
10452 pubsubPattern *pat = ln->value;
10453
10454 if (stringmatchlen((char*)pat->pattern->ptr,
10455 sdslen(pat->pattern->ptr),
10456 (char*)channel->ptr,
10457 sdslen(channel->ptr),0)) {
c8d0ea0e 10458 addReply(pat->client,shared.mbulk4);
10459 addReply(pat->client,shared.pmessagebulk);
10460 addReplyBulk(pat->client,pat->pattern);
ffc6b7f8 10461 addReplyBulk(pat->client,channel);
10462 addReplyBulk(pat->client,message);
10463 receivers++;
10464 }
10465 }
10466 decrRefCount(channel);
10467 }
befec3cd 10468 return receivers;
10469}
10470
10471static void subscribeCommand(redisClient *c) {
10472 int j;
10473
10474 for (j = 1; j < c->argc; j++)
ffc6b7f8 10475 pubsubSubscribeChannel(c,c->argv[j]);
befec3cd 10476}
10477
10478static void unsubscribeCommand(redisClient *c) {
10479 if (c->argc == 1) {
ffc6b7f8 10480 pubsubUnsubscribeAllChannels(c,1);
10481 return;
10482 } else {
10483 int j;
10484
10485 for (j = 1; j < c->argc; j++)
10486 pubsubUnsubscribeChannel(c,c->argv[j],1);
10487 }
10488}
10489
10490static void psubscribeCommand(redisClient *c) {
10491 int j;
10492
10493 for (j = 1; j < c->argc; j++)
10494 pubsubSubscribePattern(c,c->argv[j]);
10495}
10496
10497static void punsubscribeCommand(redisClient *c) {
10498 if (c->argc == 1) {
10499 pubsubUnsubscribeAllPatterns(c,1);
befec3cd 10500 return;
10501 } else {
10502 int j;
10503
10504 for (j = 1; j < c->argc; j++)
ffc6b7f8 10505 pubsubUnsubscribePattern(c,c->argv[j],1);
befec3cd 10506 }
10507}
10508
10509static void publishCommand(redisClient *c) {
10510 int receivers = pubsubPublishMessage(c->argv[1],c->argv[2]);
482b672d 10511 addReplyLongLong(c,receivers);
befec3cd 10512}
10513
37ab76c9 10514/* ===================== WATCH (CAS alike for MULTI/EXEC) ===================
10515 *
10516 * The implementation uses a per-DB hash table mapping keys to list of clients
10517 * WATCHing those keys, so that given a key that is going to be modified
10518 * we can mark all the associated clients as dirty.
10519 *
10520 * Also every client contains a list of WATCHed keys so that's possible to
10521 * un-watch such keys when the client is freed or when UNWATCH is called. */
10522
10523/* In the client->watched_keys list we need to use watchedKey structures
10524 * as in order to identify a key in Redis we need both the key name and the
10525 * DB */
10526typedef struct watchedKey {
10527 robj *key;
10528 redisDb *db;
10529} watchedKey;
10530
10531/* Watch for the specified key */
10532static void watchForKey(redisClient *c, robj *key) {
10533 list *clients = NULL;
10534 listIter li;
10535 listNode *ln;
10536 watchedKey *wk;
10537
10538 /* Check if we are already watching for this key */
10539 listRewind(c->watched_keys,&li);
10540 while((ln = listNext(&li))) {
10541 wk = listNodeValue(ln);
10542 if (wk->db == c->db && equalStringObjects(key,wk->key))
10543 return; /* Key already watched */
10544 }
10545 /* This key is not already watched in this DB. Let's add it */
10546 clients = dictFetchValue(c->db->watched_keys,key);
10547 if (!clients) {
10548 clients = listCreate();
10549 dictAdd(c->db->watched_keys,key,clients);
10550 incrRefCount(key);
10551 }
10552 listAddNodeTail(clients,c);
10553 /* Add the new key to the lits of keys watched by this client */
10554 wk = zmalloc(sizeof(*wk));
10555 wk->key = key;
10556 wk->db = c->db;
10557 incrRefCount(key);
10558 listAddNodeTail(c->watched_keys,wk);
10559}
10560
10561/* Unwatch all the keys watched by this client. To clean the EXEC dirty
10562 * flag is up to the caller. */
10563static void unwatchAllKeys(redisClient *c) {
10564 listIter li;
10565 listNode *ln;
10566
10567 if (listLength(c->watched_keys) == 0) return;
10568 listRewind(c->watched_keys,&li);
10569 while((ln = listNext(&li))) {
10570 list *clients;
10571 watchedKey *wk;
10572
10573 /* Lookup the watched key -> clients list and remove the client
10574 * from the list */
10575 wk = listNodeValue(ln);
10576 clients = dictFetchValue(wk->db->watched_keys, wk->key);
10577 assert(clients != NULL);
10578 listDelNode(clients,listSearchKey(clients,c));
10579 /* Kill the entry at all if this was the only client */
10580 if (listLength(clients) == 0)
10581 dictDelete(wk->db->watched_keys, wk->key);
10582 /* Remove this watched key from the client->watched list */
10583 listDelNode(c->watched_keys,ln);
10584 decrRefCount(wk->key);
10585 zfree(wk);
10586 }
10587}
10588
ca3f830b 10589/* "Touch" a key, so that if this key is being WATCHed by some client the
37ab76c9 10590 * next EXEC will fail. */
10591static void touchWatchedKey(redisDb *db, robj *key) {
10592 list *clients;
10593 listIter li;
10594 listNode *ln;
10595
10596 if (dictSize(db->watched_keys) == 0) return;
10597 clients = dictFetchValue(db->watched_keys, key);
10598 if (!clients) return;
10599
10600 /* Mark all the clients watching this key as REDIS_DIRTY_CAS */
10601 /* Check if we are already watching for this key */
10602 listRewind(clients,&li);
10603 while((ln = listNext(&li))) {
10604 redisClient *c = listNodeValue(ln);
10605
10606 c->flags |= REDIS_DIRTY_CAS;
10607 }
10608}
10609
9b30e1a2 10610/* On FLUSHDB or FLUSHALL all the watched keys that are present before the
10611 * flush but will be deleted as effect of the flushing operation should
10612 * be touched. "dbid" is the DB that's getting the flush. -1 if it is
10613 * a FLUSHALL operation (all the DBs flushed). */
10614static void touchWatchedKeysOnFlush(int dbid) {
10615 listIter li1, li2;
10616 listNode *ln;
10617
10618 /* For every client, check all the waited keys */
10619 listRewind(server.clients,&li1);
10620 while((ln = listNext(&li1))) {
10621 redisClient *c = listNodeValue(ln);
10622 listRewind(c->watched_keys,&li2);
10623 while((ln = listNext(&li2))) {
10624 watchedKey *wk = listNodeValue(ln);
10625
10626 /* For every watched key matching the specified DB, if the
10627 * key exists, mark the client as dirty, as the key will be
10628 * removed. */
10629 if (dbid == -1 || wk->db->id == dbid) {
09241813 10630 if (dictFind(wk->db->dict, wk->key->ptr) != NULL)
9b30e1a2 10631 c->flags |= REDIS_DIRTY_CAS;
10632 }
10633 }
10634 }
10635}
10636
37ab76c9 10637static void watchCommand(redisClient *c) {
10638 int j;
10639
6531c94d 10640 if (c->flags & REDIS_MULTI) {
10641 addReplySds(c,sdsnew("-ERR WATCH inside MULTI is not allowed\r\n"));
10642 return;
10643 }
37ab76c9 10644 for (j = 1; j < c->argc; j++)
10645 watchForKey(c,c->argv[j]);
10646 addReply(c,shared.ok);
10647}
10648
10649static void unwatchCommand(redisClient *c) {
10650 unwatchAllKeys(c);
10651 c->flags &= (~REDIS_DIRTY_CAS);
10652 addReply(c,shared.ok);
10653}
10654
7f957c92 10655/* ================================= Debugging ============================== */
10656
ba798261 10657/* Compute the sha1 of string at 's' with 'len' bytes long.
10658 * The SHA1 is then xored againt the string pointed by digest.
10659 * Since xor is commutative, this operation is used in order to
10660 * "add" digests relative to unordered elements.
10661 *
10662 * So digest(a,b,c,d) will be the same of digest(b,a,c,d) */
10663static void xorDigest(unsigned char *digest, void *ptr, size_t len) {
10664 SHA1_CTX ctx;
10665 unsigned char hash[20], *s = ptr;
10666 int j;
10667
10668 SHA1Init(&ctx);
10669 SHA1Update(&ctx,s,len);
10670 SHA1Final(hash,&ctx);
10671
10672 for (j = 0; j < 20; j++)
10673 digest[j] ^= hash[j];
10674}
10675
10676static void xorObjectDigest(unsigned char *digest, robj *o) {
10677 o = getDecodedObject(o);
10678 xorDigest(digest,o->ptr,sdslen(o->ptr));
10679 decrRefCount(o);
10680}
10681
10682/* This function instead of just computing the SHA1 and xoring it
10683 * against diget, also perform the digest of "digest" itself and
10684 * replace the old value with the new one.
10685 *
10686 * So the final digest will be:
10687 *
10688 * digest = SHA1(digest xor SHA1(data))
10689 *
10690 * This function is used every time we want to preserve the order so
10691 * that digest(a,b,c,d) will be different than digest(b,c,d,a)
10692 *
10693 * Also note that mixdigest("foo") followed by mixdigest("bar")
10694 * will lead to a different digest compared to "fo", "obar".
10695 */
10696static void mixDigest(unsigned char *digest, void *ptr, size_t len) {
10697 SHA1_CTX ctx;
10698 char *s = ptr;
10699
10700 xorDigest(digest,s,len);
10701 SHA1Init(&ctx);
10702 SHA1Update(&ctx,digest,20);
10703 SHA1Final(digest,&ctx);
10704}
10705
10706static void mixObjectDigest(unsigned char *digest, robj *o) {
10707 o = getDecodedObject(o);
10708 mixDigest(digest,o->ptr,sdslen(o->ptr));
10709 decrRefCount(o);
10710}
10711
10712/* Compute the dataset digest. Since keys, sets elements, hashes elements
10713 * are not ordered, we use a trick: every aggregate digest is the xor
10714 * of the digests of their elements. This way the order will not change
10715 * the result. For list instead we use a feedback entering the output digest
10716 * as input in order to ensure that a different ordered list will result in
10717 * a different digest. */
10718static void computeDatasetDigest(unsigned char *final) {
10719 unsigned char digest[20];
10720 char buf[128];
10721 dictIterator *di = NULL;
10722 dictEntry *de;
10723 int j;
10724 uint32_t aux;
10725
10726 memset(final,0,20); /* Start with a clean result */
10727
10728 for (j = 0; j < server.dbnum; j++) {
10729 redisDb *db = server.db+j;
10730
10731 if (dictSize(db->dict) == 0) continue;
10732 di = dictGetIterator(db->dict);
10733
10734 /* hash the DB id, so the same dataset moved in a different
10735 * DB will lead to a different digest */
10736 aux = htonl(j);
10737 mixDigest(final,&aux,sizeof(aux));
10738
10739 /* Iterate this DB writing every entry */
10740 while((de = dictNext(di)) != NULL) {
09241813 10741 sds key;
10742 robj *keyobj, *o;
ba798261 10743 time_t expiretime;
10744
10745 memset(digest,0,20); /* This key-val digest */
10746 key = dictGetEntryKey(de);
09241813 10747 keyobj = createStringObject(key,sdslen(key));
10748
10749 mixDigest(digest,key,sdslen(key));
10750
10751 /* Make sure the key is loaded if VM is active */
10752 o = lookupKeyRead(db,keyobj);
cbae1d34 10753
ba798261 10754 aux = htonl(o->type);
10755 mixDigest(digest,&aux,sizeof(aux));
09241813 10756 expiretime = getExpire(db,keyobj);
ba798261 10757
10758 /* Save the key and associated value */
10759 if (o->type == REDIS_STRING) {
10760 mixObjectDigest(digest,o);
10761 } else if (o->type == REDIS_LIST) {
10762 list *list = o->ptr;
10763 listNode *ln;
10764 listIter li;
10765
10766 listRewind(list,&li);
10767 while((ln = listNext(&li))) {
10768 robj *eleobj = listNodeValue(ln);
10769
10770 mixObjectDigest(digest,eleobj);
10771 }
10772 } else if (o->type == REDIS_SET) {
10773 dict *set = o->ptr;
10774 dictIterator *di = dictGetIterator(set);
10775 dictEntry *de;
10776
10777 while((de = dictNext(di)) != NULL) {
10778 robj *eleobj = dictGetEntryKey(de);
10779
10780 xorObjectDigest(digest,eleobj);
10781 }
10782 dictReleaseIterator(di);
10783 } else if (o->type == REDIS_ZSET) {
10784 zset *zs = o->ptr;
10785 dictIterator *di = dictGetIterator(zs->dict);
10786 dictEntry *de;
10787
10788 while((de = dictNext(di)) != NULL) {
10789 robj *eleobj = dictGetEntryKey(de);
10790 double *score = dictGetEntryVal(de);
10791 unsigned char eledigest[20];
10792
10793 snprintf(buf,sizeof(buf),"%.17g",*score);
10794 memset(eledigest,0,20);
10795 mixObjectDigest(eledigest,eleobj);
10796 mixDigest(eledigest,buf,strlen(buf));
10797 xorDigest(digest,eledigest,20);
10798 }
10799 dictReleaseIterator(di);
10800 } else if (o->type == REDIS_HASH) {
10801 hashIterator *hi;
10802 robj *obj;
10803
10804 hi = hashInitIterator(o);
10805 while (hashNext(hi) != REDIS_ERR) {
10806 unsigned char eledigest[20];
10807
10808 memset(eledigest,0,20);
10809 obj = hashCurrent(hi,REDIS_HASH_KEY);
10810 mixObjectDigest(eledigest,obj);
10811 decrRefCount(obj);
10812 obj = hashCurrent(hi,REDIS_HASH_VALUE);
10813 mixObjectDigest(eledigest,obj);
10814 decrRefCount(obj);
10815 xorDigest(digest,eledigest,20);
10816 }
10817 hashReleaseIterator(hi);
10818 } else {
10819 redisPanic("Unknown object type");
10820 }
ba798261 10821 /* If the key has an expire, add it to the mix */
10822 if (expiretime != -1) xorDigest(digest,"!!expire!!",10);
10823 /* We can finally xor the key-val digest to the final digest */
10824 xorDigest(final,digest,20);
09241813 10825 decrRefCount(keyobj);
ba798261 10826 }
10827 dictReleaseIterator(di);
10828 }
10829}
10830
7f957c92 10831static void debugCommand(redisClient *c) {
10832 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
10833 *((char*)-1) = 'x';
210e29f7 10834 } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
10835 if (rdbSave(server.dbfilename) != REDIS_OK) {
10836 addReply(c,shared.err);
10837 return;
10838 }
10839 emptyDb();
10840 if (rdbLoad(server.dbfilename) != REDIS_OK) {
10841 addReply(c,shared.err);
10842 return;
10843 }
10844 redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
10845 addReply(c,shared.ok);
71c2b467 10846 } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
10847 emptyDb();
10848 if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
10849 addReply(c,shared.err);
10850 return;
10851 }
10852 redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
10853 addReply(c,shared.ok);
333298da 10854 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
09241813 10855 dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
10856 robj *val;
333298da 10857
10858 if (!de) {
10859 addReply(c,shared.nokeyerr);
10860 return;
10861 }
333298da 10862 val = dictGetEntryVal(de);
560db612 10863 if (!server.vm_enabled || (val->storage == REDIS_VM_MEMORY ||
10864 val->storage == REDIS_VM_SWAPPING)) {
07efaf74 10865 char *strenc;
10866 char buf[128];
10867
10868 if (val->encoding < (sizeof(strencoding)/sizeof(char*))) {
10869 strenc = strencoding[val->encoding];
10870 } else {
10871 snprintf(buf,64,"unknown encoding %d\n", val->encoding);
10872 strenc = buf;
10873 }
ace06542 10874 addReplySds(c,sdscatprintf(sdsempty(),
09241813 10875 "+Value at:%p refcount:%d "
07efaf74 10876 "encoding:%s serializedlength:%lld\r\n",
09241813 10877 (void*)val, val->refcount,
07efaf74 10878 strenc, (long long) rdbSavedObjectLen(val,NULL)));
ace06542 10879 } else {
560db612 10880 vmpointer *vp = (vmpointer*) val;
ace06542 10881 addReplySds(c,sdscatprintf(sdsempty(),
09241813 10882 "+Value swapped at: page %llu "
ace06542 10883 "using %llu pages\r\n",
09241813 10884 (unsigned long long) vp->page,
560db612 10885 (unsigned long long) vp->usedpages));
ace06542 10886 }
78ebe4c8 10887 } else if (!strcasecmp(c->argv[1]->ptr,"swapin") && c->argc == 3) {
10888 lookupKeyRead(c->db,c->argv[2]);
10889 addReply(c,shared.ok);
7d30035d 10890 } else if (!strcasecmp(c->argv[1]->ptr,"swapout") && c->argc == 3) {
09241813 10891 dictEntry *de = dictFind(c->db->dict,c->argv[2]->ptr);
10892 robj *val;
560db612 10893 vmpointer *vp;
7d30035d 10894
10895 if (!server.vm_enabled) {
10896 addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n"));
10897 return;
10898 }
10899 if (!de) {
10900 addReply(c,shared.nokeyerr);
10901 return;
10902 }
7d30035d 10903 val = dictGetEntryVal(de);
4ef8de8a 10904 /* Swap it */
560db612 10905 if (val->storage != REDIS_VM_MEMORY) {
7d30035d 10906 addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
560db612 10907 } else if (val->refcount != 1) {
10908 addReplySds(c,sdsnew("-ERR Object is shared\r\n"));
10909 } else if ((vp = vmSwapObjectBlocking(val)) != NULL) {
10910 dictGetEntryVal(de) = vp;
7d30035d 10911 addReply(c,shared.ok);
10912 } else {
10913 addReply(c,shared.err);
10914 }
59305dc7 10915 } else if (!strcasecmp(c->argv[1]->ptr,"populate") && c->argc == 3) {
10916 long keys, j;
10917 robj *key, *val;
10918 char buf[128];
10919
10920 if (getLongFromObjectOrReply(c, c->argv[2], &keys, NULL) != REDIS_OK)
10921 return;
10922 for (j = 0; j < keys; j++) {
10923 snprintf(buf,sizeof(buf),"key:%lu",j);
10924 key = createStringObject(buf,strlen(buf));
10925 if (lookupKeyRead(c->db,key) != NULL) {
10926 decrRefCount(key);
10927 continue;
10928 }
10929 snprintf(buf,sizeof(buf),"value:%lu",j);
10930 val = createStringObject(buf,strlen(buf));
09241813 10931 dbAdd(c->db,key,val);
10932 decrRefCount(key);
59305dc7 10933 }
10934 addReply(c,shared.ok);
ba798261 10935 } else if (!strcasecmp(c->argv[1]->ptr,"digest") && c->argc == 2) {
10936 unsigned char digest[20];
10937 sds d = sdsnew("+");
10938 int j;
10939
10940 computeDatasetDigest(digest);
10941 for (j = 0; j < 20; j++)
10942 d = sdscatprintf(d, "%02x",digest[j]);
10943
10944 d = sdscatlen(d,"\r\n",2);
10945 addReplySds(c,d);
7f957c92 10946 } else {
333298da 10947 addReplySds(c,sdsnew(
bdcb92f2 10948 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPIN <key>|SWAPOUT <key>|RELOAD]\r\n"));
7f957c92 10949 }
10950}
56906eef 10951
6c96ba7d 10952static void _redisAssert(char *estr, char *file, int line) {
dfc5e96c 10953 redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
fdfb02e7 10954 redisLog(REDIS_WARNING,"==> %s:%d '%s' is not true",file,line,estr);
dfc5e96c 10955#ifdef HAVE_BACKTRACE
10956 redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
10957 *((char*)-1) = 'x';
10958#endif
10959}
10960
c651fd9e 10961static void _redisPanic(char *msg, char *file, int line) {
10962 redisLog(REDIS_WARNING,"!!! Software Failure. Press left mouse button to continue");
17772754 10963 redisLog(REDIS_WARNING,"Guru Meditation: %s #%s:%d",msg,file,line);
c651fd9e 10964#ifdef HAVE_BACKTRACE
10965 redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
10966 *((char*)-1) = 'x';
10967#endif
10968}
10969
bcfc686d 10970/* =================================== Main! ================================ */
56906eef 10971
bcfc686d 10972#ifdef __linux__
10973int linuxOvercommitMemoryValue(void) {
10974 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
10975 char buf[64];
56906eef 10976
bcfc686d 10977 if (!fp) return -1;
10978 if (fgets(buf,64,fp) == NULL) {
10979 fclose(fp);
10980 return -1;
10981 }
10982 fclose(fp);
56906eef 10983
bcfc686d 10984 return atoi(buf);
10985}
10986
10987void linuxOvercommitMemoryWarning(void) {
10988 if (linuxOvercommitMemoryValue() == 0) {
7ccd2d0a 10989 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
bcfc686d 10990 }
10991}
10992#endif /* __linux__ */
10993
10994static void daemonize(void) {
10995 int fd;
10996 FILE *fp;
10997
10998 if (fork() != 0) exit(0); /* parent exits */
10999 setsid(); /* create a new session */
11000
11001 /* Every output goes to /dev/null. If Redis is daemonized but
11002 * the 'logfile' is set to 'stdout' in the configuration file
11003 * it will not log at all. */
11004 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
11005 dup2(fd, STDIN_FILENO);
11006 dup2(fd, STDOUT_FILENO);
11007 dup2(fd, STDERR_FILENO);
11008 if (fd > STDERR_FILENO) close(fd);
11009 }
11010 /* Try to write the pid file */
11011 fp = fopen(server.pidfile,"w");
11012 if (fp) {
11013 fprintf(fp,"%d\n",getpid());
11014 fclose(fp);
56906eef 11015 }
56906eef 11016}
11017
42ab0172 11018static void version() {
8a3b0d2d 11019 printf("Redis server version %s (%s:%d)\n", REDIS_VERSION,
11020 REDIS_GIT_SHA1, atoi(REDIS_GIT_DIRTY) > 0);
42ab0172
AO
11021 exit(0);
11022}
11023
723fb69b
AO
11024static void usage() {
11025 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
e9409273 11026 fprintf(stderr," ./redis-server - (read config from stdin)\n");
723fb69b
AO
11027 exit(1);
11028}
11029
bcfc686d 11030int main(int argc, char **argv) {
9651a787 11031 time_t start;
11032
bcfc686d 11033 initServerConfig();
1a132bbc 11034 sortCommandTable();
bcfc686d 11035 if (argc == 2) {
44efe66e 11036 if (strcmp(argv[1], "-v") == 0 ||
11037 strcmp(argv[1], "--version") == 0) version();
11038 if (strcmp(argv[1], "--help") == 0) usage();
bcfc686d 11039 resetServerSaveParams();
11040 loadServerConfig(argv[1]);
723fb69b
AO
11041 } else if ((argc > 2)) {
11042 usage();
bcfc686d 11043 } else {
11044 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
11045 }
bcfc686d 11046 if (server.daemonize) daemonize();
71c54b21 11047 initServer();
bcfc686d 11048 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
11049#ifdef __linux__
11050 linuxOvercommitMemoryWarning();
11051#endif
9651a787 11052 start = time(NULL);
bcfc686d 11053 if (server.appendonly) {
11054 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
9651a787 11055 redisLog(REDIS_NOTICE,"DB loaded from append only file: %ld seconds",time(NULL)-start);
bcfc686d 11056 } else {
11057 if (rdbLoad(server.dbfilename) == REDIS_OK)
9651a787 11058 redisLog(REDIS_NOTICE,"DB loaded from disk: %ld seconds",time(NULL)-start);
bcfc686d 11059 }
bcfc686d 11060 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
d5d55fc3 11061 aeSetBeforeSleepProc(server.el,beforeSleep);
bcfc686d 11062 aeMain(server.el);
11063 aeDeleteEventLoop(server.el);
11064 return 0;
11065}
11066
11067/* ============================= Backtrace support ========================= */
11068
11069#ifdef HAVE_BACKTRACE
11070static char *findFuncName(void *pointer, unsigned long *offset);
11071
56906eef 11072static void *getMcontextEip(ucontext_t *uc) {
11073#if defined(__FreeBSD__)
11074 return (void*) uc->uc_mcontext.mc_eip;
11075#elif defined(__dietlibc__)
11076 return (void*) uc->uc_mcontext.eip;
06db1f50 11077#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
da0a1620 11078 #if __x86_64__
11079 return (void*) uc->uc_mcontext->__ss.__rip;
11080 #else
56906eef 11081 return (void*) uc->uc_mcontext->__ss.__eip;
da0a1620 11082 #endif
06db1f50 11083#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 11084 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 11085 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 11086 #else
11087 return (void*) uc->uc_mcontext->__ss.__eip;
e0a62c7f 11088 #endif
54bac49d 11089#elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
c04c9ac9 11090 return (void*) uc->uc_mcontext.gregs[REG_EIP]; /* Linux 32/64 bit */
b91cf5ef 11091#elif defined(__ia64__) /* Linux IA64 */
11092 return (void*) uc->uc_mcontext.sc_ip;
11093#else
11094 return NULL;
56906eef 11095#endif
11096}
11097
11098static void segvHandler(int sig, siginfo_t *info, void *secret) {
11099 void *trace[100];
11100 char **messages = NULL;
11101 int i, trace_size = 0;
11102 unsigned long offset=0;
56906eef 11103 ucontext_t *uc = (ucontext_t*) secret;
1c85b79f 11104 sds infostring;
56906eef 11105 REDIS_NOTUSED(info);
11106
11107 redisLog(REDIS_WARNING,
11108 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
1c85b79f 11109 infostring = genRedisInfoString();
11110 redisLog(REDIS_WARNING, "%s",infostring);
11111 /* It's not safe to sdsfree() the returned string under memory
11112 * corruption conditions. Let it leak as we are going to abort */
e0a62c7f 11113
56906eef 11114 trace_size = backtrace(trace, 100);
de96dbfe 11115 /* overwrite sigaction with caller's address */
b91cf5ef 11116 if (getMcontextEip(uc) != NULL) {
11117 trace[1] = getMcontextEip(uc);
11118 }
56906eef 11119 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 11120
d76412d1 11121 for (i=1; i<trace_size; ++i) {
56906eef 11122 char *fn = findFuncName(trace[i], &offset), *p;
11123
11124 p = strchr(messages[i],'+');
11125 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
11126 redisLog(REDIS_WARNING,"%s", messages[i]);
11127 } else {
11128 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
11129 }
11130 }
b177fd30 11131 /* free(messages); Don't call free() with possibly corrupted memory. */
478c2c6f 11132 _exit(0);
fe3bbfbe 11133}
56906eef 11134
fab43727 11135static void sigtermHandler(int sig) {
11136 REDIS_NOTUSED(sig);
b58ba105 11137
fab43727 11138 redisLog(REDIS_WARNING,"SIGTERM received, scheduling shutting down...");
11139 server.shutdown_asap = 1;
b58ba105
AM
11140}
11141
56906eef 11142static void setupSigSegvAction(void) {
11143 struct sigaction act;
11144
11145 sigemptyset (&act.sa_mask);
11146 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
11147 * is used. Otherwise, sa_handler is used */
11148 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
11149 act.sa_sigaction = segvHandler;
11150 sigaction (SIGSEGV, &act, NULL);
11151 sigaction (SIGBUS, &act, NULL);
12fea928 11152 sigaction (SIGFPE, &act, NULL);
11153 sigaction (SIGILL, &act, NULL);
11154 sigaction (SIGBUS, &act, NULL);
b58ba105
AM
11155
11156 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND;
fab43727 11157 act.sa_handler = sigtermHandler;
b58ba105 11158 sigaction (SIGTERM, &act, NULL);
e65fdc78 11159 return;
56906eef 11160}
e65fdc78 11161
bcfc686d 11162#include "staticsymbols.h"
11163/* This function try to convert a pointer into a function name. It's used in
11164 * oreder to provide a backtrace under segmentation fault that's able to
11165 * display functions declared as static (otherwise the backtrace is useless). */
11166static char *findFuncName(void *pointer, unsigned long *offset){
11167 int i, ret = -1;
11168 unsigned long off, minoff = 0;
ed9b544e 11169
bcfc686d 11170 /* Try to match against the Symbol with the smallest offset */
11171 for (i=0; symsTable[i].pointer; i++) {
11172 unsigned long lp = (unsigned long) pointer;
0bc03378 11173
bcfc686d 11174 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
11175 off=lp-symsTable[i].pointer;
11176 if (ret < 0 || off < minoff) {
11177 minoff=off;
11178 ret=i;
11179 }
11180 }
0bc03378 11181 }
bcfc686d 11182 if (ret == -1) return NULL;
11183 *offset = minoff;
11184 return symsTable[ret].name;
0bc03378 11185}
bcfc686d 11186#else /* HAVE_BACKTRACE */
11187static void setupSigSegvAction(void) {
0bc03378 11188}
bcfc686d 11189#endif /* HAVE_BACKTRACE */
0bc03378 11190
ed9b544e 11191
ed9b544e 11192
bcfc686d 11193/* The End */
11194
11195
ed9b544e 11196