]> git.saurik.com Git - redis.git/blame - redis.c
non-blocking VM data structures, just a start
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
5dc70bff 30#define REDIS_VERSION "1.3.2"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
c9468bcf 40#define __USE_POSIX199309
ed9b544e 41#include <signal.h>
fbf9bcdb 42
43#ifdef HAVE_BACKTRACE
c9468bcf 44#include <execinfo.h>
45#include <ucontext.h>
fbf9bcdb 46#endif /* HAVE_BACKTRACE */
47
ed9b544e 48#include <sys/wait.h>
49#include <errno.h>
50#include <assert.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <inttypes.h>
54#include <arpa/inet.h>
55#include <sys/stat.h>
56#include <fcntl.h>
57#include <sys/time.h>
58#include <sys/resource.h>
2895e862 59#include <sys/uio.h>
f78fd11b 60#include <limits.h>
a7866db6 61#include <math.h>
92f8e882 62#include <pthread.h>
0bc1b2f6 63
64#if defined(__sun)
5043dff3 65#include "solarisfixes.h"
66#endif
ed9b544e 67
c9468bcf 68#include "redis.h"
ed9b544e 69#include "ae.h" /* Event driven programming library */
70#include "sds.h" /* Dynamic safe strings */
71#include "anet.h" /* Networking the easy way */
72#include "dict.h" /* Hash tables */
73#include "adlist.h" /* Linked lists */
74#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 75#include "lzf.h" /* LZF compression library */
76#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ed9b544e 77
78/* Error codes */
79#define REDIS_OK 0
80#define REDIS_ERR -1
81
82/* Static server configuration */
83#define REDIS_SERVERPORT 6379 /* TCP port */
84#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 85#define REDIS_IOBUF_LEN 1024
ed9b544e 86#define REDIS_LOADBUF_LEN 1024
93ea3759 87#define REDIS_STATIC_ARGS 4
ed9b544e 88#define REDIS_DEFAULT_DBNUM 16
89#define REDIS_CONFIGLINE_MAX 1024
90#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
91#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94754ccc 92#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
6f376729 93#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
2895e862 94#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
95
96/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
97#define REDIS_WRITEV_THRESHOLD 3
98/* Max number of iovecs used for each writev call */
99#define REDIS_WRITEV_IOVEC_COUNT 256
ed9b544e 100
101/* Hash table parameters */
102#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 103
104/* Command flags */
3fd78bcd 105#define REDIS_CMD_BULK 1 /* Bulk write command */
106#define REDIS_CMD_INLINE 2 /* Inline command */
107/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
108 this flags will return an error when the 'maxmemory' option is set in the
109 config file and the server is using more than maxmemory bytes of memory.
110 In short this commands are denied on low memory conditions. */
111#define REDIS_CMD_DENYOOM 4
ed9b544e 112
113/* Object types */
114#define REDIS_STRING 0
115#define REDIS_LIST 1
116#define REDIS_SET 2
1812e024 117#define REDIS_ZSET 3
118#define REDIS_HASH 4
f78fd11b 119
942a3961 120/* Objects encoding */
121#define REDIS_ENCODING_RAW 0 /* Raw representation */
122#define REDIS_ENCODING_INT 1 /* Encoded as integer */
123
f78fd11b 124/* Object types only used for dumping to disk */
bb32ede5 125#define REDIS_EXPIRETIME 253
ed9b544e 126#define REDIS_SELECTDB 254
127#define REDIS_EOF 255
128
f78fd11b 129/* Defines related to the dump file format. To store 32 bits lengths for short
130 * keys requires a lot of space, so we check the most significant 2 bits of
131 * the first byte to interpreter the length:
132 *
133 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
134 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
135 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 136 * 11|000000 this means: specially encoded object will follow. The six bits
137 * number specify the kind of object that follows.
138 * See the REDIS_RDB_ENC_* defines.
f78fd11b 139 *
10c43610 140 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
141 * values, will fit inside. */
f78fd11b 142#define REDIS_RDB_6BITLEN 0
143#define REDIS_RDB_14BITLEN 1
144#define REDIS_RDB_32BITLEN 2
17be1a4a 145#define REDIS_RDB_ENCVAL 3
f78fd11b 146#define REDIS_RDB_LENERR UINT_MAX
147
a4d1ba9a 148/* When a length of a string object stored on disk has the first two bits
149 * set, the remaining two bits specify a special encoding for the object
150 * accordingly to the following defines: */
151#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
152#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
153#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 154#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 155
75680a3c 156/* Virtual memory object->where field. */
157#define REDIS_VM_MEMORY 0 /* The object is on memory */
158#define REDIS_VM_SWAPPED 1 /* The object is on disk */
159#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
160#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
161
06224fec 162/* Virtual memory static configuration stuff.
163 * Check vmFindContiguousPages() to know more about this magic numbers. */
164#define REDIS_VM_MAX_NEAR_PAGES 65536
165#define REDIS_VM_MAX_RANDOM_JUMP 4096
92f8e882 166#define REDIS_VM_MAX_THREADS 32
06224fec 167
ed9b544e 168/* Client flags */
169#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
170#define REDIS_SLAVE 2 /* This client is a slave server */
171#define REDIS_MASTER 4 /* This client is a master server */
87eca727 172#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
6e469882 173#define REDIS_MULTI 16 /* This client is in a MULTI context */
4409877e 174#define REDIS_BLOCKED 32 /* The client is waiting in a blocking operation */
92f8e882 175#define REDIS_IO_WAIT 64 /* The client is waiting for Virutal Memory I/O */
ed9b544e 176
40d224a9 177/* Slave replication state - slave side */
ed9b544e 178#define REDIS_REPL_NONE 0 /* No active replication */
179#define REDIS_REPL_CONNECT 1 /* Must connect to master */
180#define REDIS_REPL_CONNECTED 2 /* Connected to master */
181
40d224a9 182/* Slave replication state - from the point of view of master
183 * Note that in SEND_BULK and ONLINE state the slave receives new updates
184 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
185 * to start the next background saving in order to send updates to it. */
186#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
187#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
188#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
189#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
190
ed9b544e 191/* List related stuff */
192#define REDIS_HEAD 0
193#define REDIS_TAIL 1
194
195/* Sort operations */
196#define REDIS_SORT_GET 0
443c6409 197#define REDIS_SORT_ASC 1
198#define REDIS_SORT_DESC 2
ed9b544e 199#define REDIS_SORTKEY_MAX 1024
200
201/* Log levels */
202#define REDIS_DEBUG 0
f870935d 203#define REDIS_VERBOSE 1
204#define REDIS_NOTICE 2
205#define REDIS_WARNING 3
ed9b544e 206
207/* Anti-warning macro... */
208#define REDIS_NOTUSED(V) ((void) V)
209
6b47e12e 210#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
211#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 212
48f0308a 213/* Append only defines */
214#define APPENDFSYNC_NO 0
215#define APPENDFSYNC_ALWAYS 1
216#define APPENDFSYNC_EVERYSEC 2
217
dfc5e96c 218/* We can print the stacktrace, so our assert is defined this way: */
219#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e),exit(1)))
220static void _redisAssert(char *estr);
221
ed9b544e 222/*================================= Data types ============================== */
223
224/* A redis object, that is a type able to hold a string / list / set */
75680a3c 225
226/* The VM object structure */
227struct redisObjectVM {
3a66edc7 228 off_t page; /* the page at witch the object is stored on disk */
229 off_t usedpages; /* number of pages used on disk */
230 time_t atime; /* Last access time */
75680a3c 231} vm;
232
233/* The actual Redis Object */
ed9b544e 234typedef struct redisObject {
ed9b544e 235 void *ptr;
942a3961 236 unsigned char type;
237 unsigned char encoding;
d894161b 238 unsigned char storage; /* If this object is a key, where is the value?
239 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
240 unsigned char vtype; /* If this object is a key, and value is swapped out,
241 * this is the type of the swapped out object. */
ed9b544e 242 int refcount;
75680a3c 243 /* VM fields, this are only allocated if VM is active, otherwise the
244 * object allocation function will just allocate
245 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
246 * Redis without VM active will not have any overhead. */
247 struct redisObjectVM vm;
ed9b544e 248} robj;
249
dfc5e96c 250/* Macro used to initalize a Redis object allocated on the stack.
251 * Note that this macro is taken near the structure definition to make sure
252 * we'll update it when the structure is changed, to avoid bugs like
253 * bug #85 introduced exactly in this way. */
254#define initStaticStringObject(_var,_ptr) do { \
255 _var.refcount = 1; \
256 _var.type = REDIS_STRING; \
257 _var.encoding = REDIS_ENCODING_RAW; \
258 _var.ptr = _ptr; \
3a66edc7 259 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
dfc5e96c 260} while(0);
261
3305306f 262typedef struct redisDb {
4409877e 263 dict *dict; /* The keyspace for this DB */
264 dict *expires; /* Timeout of keys with a timeout set */
265 dict *blockingkeys; /* Keys with clients waiting for data (BLPOP) */
3305306f 266 int id;
267} redisDb;
268
6e469882 269/* Client MULTI/EXEC state */
270typedef struct multiCmd {
271 robj **argv;
272 int argc;
273 struct redisCommand *cmd;
274} multiCmd;
275
276typedef struct multiState {
277 multiCmd *commands; /* Array of MULTI commands */
278 int count; /* Total number of MULTI commands */
279} multiState;
280
ed9b544e 281/* With multiplexing we need to take per-clinet state.
282 * Clients are taken in a liked list. */
283typedef struct redisClient {
284 int fd;
3305306f 285 redisDb *db;
ed9b544e 286 int dictid;
287 sds querybuf;
e8a74421 288 robj **argv, **mbargv;
289 int argc, mbargc;
40d224a9 290 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 291 int multibulk; /* multi bulk command format active */
ed9b544e 292 list *reply;
293 int sentlen;
294 time_t lastinteraction; /* time of the last interaction, used for timeout */
40d224a9 295 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
6e469882 296 /* REDIS_MULTI */
40d224a9 297 int slaveseldb; /* slave selected db, if this client is a slave */
298 int authenticated; /* when requirepass is non-NULL */
299 int replstate; /* replication state if this is a slave */
300 int repldbfd; /* replication DB file descriptor */
6e469882 301 long repldboff; /* replication DB file offset */
40d224a9 302 off_t repldbsize; /* replication DB file size */
6e469882 303 multiState mstate; /* MULTI/EXEC state */
b177fd30 304 robj **blockingkeys; /* The key we waiting to terminate a blocking
4409877e 305 * operation such as BLPOP. Otherwise NULL. */
b177fd30 306 int blockingkeysnum; /* Number of blocking keys */
4409877e 307 time_t blockingto; /* Blocking operation timeout. If UNIX current time
308 * is >= blockingto then the operation timed out. */
92f8e882 309 list *io_keys; /* Keys this client is waiting to be loaded from the
310 * swap file in order to continue. */
ed9b544e 311} redisClient;
312
313struct saveparam {
314 time_t seconds;
315 int changes;
316};
317
318/* Global server state structure */
319struct redisServer {
320 int port;
321 int fd;
3305306f 322 redisDb *db;
4409877e 323 dict *sharingpool; /* Poll used for object sharing */
10c43610 324 unsigned int sharingpoolsize;
ed9b544e 325 long long dirty; /* changes to DB from the last save */
326 list *clients;
87eca727 327 list *slaves, *monitors;
ed9b544e 328 char neterr[ANET_ERR_LEN];
329 aeEventLoop *el;
330 int cronloops; /* number of times the cron function run */
331 list *objfreelist; /* A list of freed objects to avoid malloc() */
332 time_t lastsave; /* Unix time of last save succeeede */
5fba9f71 333 size_t usedmemory; /* Used memory in megabytes */
ed9b544e 334 /* Fields used only for stats */
335 time_t stat_starttime; /* server start time */
336 long long stat_numcommands; /* number of processed commands */
337 long long stat_numconnections; /* number of connections received */
338 /* Configuration */
339 int verbosity;
340 int glueoutputbuf;
341 int maxidletime;
342 int dbnum;
343 int daemonize;
44b38ef4 344 int appendonly;
48f0308a 345 int appendfsync;
346 time_t lastfsync;
44b38ef4 347 int appendfd;
348 int appendseldb;
ed329fcf 349 char *pidfile;
9f3c422c 350 pid_t bgsavechildpid;
9d65a1bb 351 pid_t bgrewritechildpid;
352 sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
ed9b544e 353 struct saveparam *saveparams;
354 int saveparamslen;
355 char *logfile;
356 char *bindaddr;
357 char *dbfilename;
44b38ef4 358 char *appendfilename;
abcb223e 359 char *requirepass;
10c43610 360 int shareobjects;
121f70cf 361 int rdbcompression;
ed9b544e 362 /* Replication related */
363 int isslave;
d0ccebcf 364 char *masterauth;
ed9b544e 365 char *masterhost;
366 int masterport;
40d224a9 367 redisClient *master; /* client that is master for this slave */
ed9b544e 368 int replstate;
285add55 369 unsigned int maxclients;
4ef8de8a 370 unsigned long long maxmemory;
f86a74e9 371 unsigned int blockedclients;
ed9b544e 372 /* Sort parameters - qsort_r() is only available under BSD so we
373 * have to take this state global, in order to pass it to sortCompare() */
374 int sort_desc;
375 int sort_alpha;
376 int sort_bypattern;
75680a3c 377 /* Virtual memory configuration */
378 int vm_enabled;
379 off_t vm_page_size;
380 off_t vm_pages;
4ef8de8a 381 unsigned long long vm_max_memory;
75680a3c 382 /* Virtual memory state */
383 FILE *vm_fp;
384 int vm_fd;
385 off_t vm_next_page; /* Next probably empty page */
386 off_t vm_near_pages; /* Number of pages allocated sequentially */
06224fec 387 unsigned char *vm_bitmap; /* Bitmap of free/used pages */
3a66edc7 388 time_t unixtime; /* Unix time sampled every second. */
92f8e882 389 /* Virtual memory I/O threads stuff */
390 pthread_t io_threads[REDIS_VM_MAX_THREADS];
391 /* An I/O thread process an element taken from the io_jobs queue and
392 * put the result of the operation in the io_done list. */
393 list *io_jobs; /* List of VM I/O jobs */
394 list *io_done; /* List of VM processed jobs */
395 list *io_clients; /* All the clients waiting for SWAP I/O operations */
396 pthread_mutex_t io_mutex; /* lock to access io_jobs and io_done */
397 int io_active_threads; /* Number of running I/O threads */
398 int vm_max_threads; /* Max number of I/O threads running at the same time */
7d98e08c 399 /* Virtual memory stats */
400 unsigned long long vm_stats_used_pages;
401 unsigned long long vm_stats_swapped_objects;
402 unsigned long long vm_stats_swapouts;
403 unsigned long long vm_stats_swapins;
ed9b544e 404};
405
406typedef void redisCommandProc(redisClient *c);
407struct redisCommand {
408 char *name;
409 redisCommandProc *proc;
410 int arity;
411 int flags;
412};
413
de96dbfe 414struct redisFunctionSym {
415 char *name;
56906eef 416 unsigned long pointer;
de96dbfe 417};
418
ed9b544e 419typedef struct _redisSortObject {
420 robj *obj;
421 union {
422 double score;
423 robj *cmpobj;
424 } u;
425} redisSortObject;
426
427typedef struct _redisSortOperation {
428 int type;
429 robj *pattern;
430} redisSortOperation;
431
6b47e12e 432/* ZSETs use a specialized version of Skiplists */
433
434typedef struct zskiplistNode {
435 struct zskiplistNode **forward;
e3870fab 436 struct zskiplistNode *backward;
6b47e12e 437 double score;
438 robj *obj;
439} zskiplistNode;
440
441typedef struct zskiplist {
e3870fab 442 struct zskiplistNode *header, *tail;
d13f767c 443 unsigned long length;
6b47e12e 444 int level;
445} zskiplist;
446
1812e024 447typedef struct zset {
448 dict *dict;
6b47e12e 449 zskiplist *zsl;
1812e024 450} zset;
451
6b47e12e 452/* Our shared "common" objects */
453
ed9b544e 454struct sharedObjectsStruct {
c937aa89 455 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
6e469882 456 *colon, *nullbulk, *nullmultibulk, *queued,
c937aa89 457 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
458 *outofrangeerr, *plus,
ed9b544e 459 *select0, *select1, *select2, *select3, *select4,
460 *select5, *select6, *select7, *select8, *select9;
461} shared;
462
a7866db6 463/* Global vars that are actally used as constants. The following double
464 * values are used for double on-disk serialization, and are initialized
465 * at runtime to avoid strange compiler optimizations. */
466
467static double R_Zero, R_PosInf, R_NegInf, R_Nan;
468
92f8e882 469/* VM threaded I/O request message */
470#define REDIS_IOREQ_LOAD 0
471#define REDIS_IOREQ_SWAP 1
472typedef struct ioreq {
473 int type; /* Request type, REDIS_IOREQ_* */
474 int dbid; /* Redis database ID */
475 robj *key; /* This I/O request is about swapping this key */
476 robj *val; /* the value to swap for REDIS_IOREQ_SWAP, otherwise this
477 * field is populated by the I/O thread for REDIS_IOREQ_LOAD. */
478 off_t page; /* Swap page where to read/write the object */
479} ioreq;
480
ed9b544e 481/*================================ Prototypes =============================== */
482
483static void freeStringObject(robj *o);
484static void freeListObject(robj *o);
485static void freeSetObject(robj *o);
486static void decrRefCount(void *o);
487static robj *createObject(int type, void *ptr);
488static void freeClient(redisClient *c);
f78fd11b 489static int rdbLoad(char *filename);
ed9b544e 490static void addReply(redisClient *c, robj *obj);
491static void addReplySds(redisClient *c, sds s);
492static void incrRefCount(robj *o);
f78fd11b 493static int rdbSaveBackground(char *filename);
ed9b544e 494static robj *createStringObject(char *ptr, size_t len);
4ef8de8a 495static robj *dupStringObject(robj *o);
87eca727 496static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
44b38ef4 497static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 498static int syncWithMaster(void);
10c43610 499static robj *tryObjectSharing(robj *o);
942a3961 500static int tryObjectEncoding(robj *o);
9d65a1bb 501static robj *getDecodedObject(robj *o);
3305306f 502static int removeExpire(redisDb *db, robj *key);
503static int expireIfNeeded(redisDb *db, robj *key);
504static int deleteIfVolatile(redisDb *db, robj *key);
1b03836c 505static int deleteIfSwapped(redisDb *db, robj *key);
94754ccc 506static int deleteKey(redisDb *db, robj *key);
bb32ede5 507static time_t getExpire(redisDb *db, robj *key);
508static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 509static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 510static void freeMemoryIfNeeded(void);
de96dbfe 511static int processCommand(redisClient *c);
56906eef 512static void setupSigSegvAction(void);
a3b21203 513static void rdbRemoveTempFile(pid_t childpid);
9d65a1bb 514static void aofRemoveTempFile(pid_t childpid);
0ea663ea 515static size_t stringObjectLen(robj *o);
638e42ac 516static void processInputBuffer(redisClient *c);
6b47e12e 517static zskiplist *zslCreate(void);
fd8ccf44 518static void zslFree(zskiplist *zsl);
2b59cfdf 519static void zslInsert(zskiplist *zsl, double score, robj *obj);
2895e862 520static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
6e469882 521static void initClientMultiState(redisClient *c);
522static void freeClientMultiState(redisClient *c);
523static void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
4409877e 524static void unblockClient(redisClient *c);
525static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
75680a3c 526static void vmInit(void);
a35ddf12 527static void vmMarkPagesFree(off_t page, off_t count);
55cf8433 528static robj *vmLoadObject(robj *key);
7e69548d 529static robj *vmPreviewObject(robj *key);
4ef8de8a 530static int vmSwapOneObject(void);
7e69548d 531static int vmCanSwapOut(void);
f870935d 532static void freeOneObjectFromFreelist(void);
ed9b544e 533
abcb223e 534static void authCommand(redisClient *c);
ed9b544e 535static void pingCommand(redisClient *c);
536static void echoCommand(redisClient *c);
537static void setCommand(redisClient *c);
538static void setnxCommand(redisClient *c);
539static void getCommand(redisClient *c);
540static void delCommand(redisClient *c);
541static void existsCommand(redisClient *c);
542static void incrCommand(redisClient *c);
543static void decrCommand(redisClient *c);
544static void incrbyCommand(redisClient *c);
545static void decrbyCommand(redisClient *c);
546static void selectCommand(redisClient *c);
547static void randomkeyCommand(redisClient *c);
548static void keysCommand(redisClient *c);
549static void dbsizeCommand(redisClient *c);
550static void lastsaveCommand(redisClient *c);
551static void saveCommand(redisClient *c);
552static void bgsaveCommand(redisClient *c);
9d65a1bb 553static void bgrewriteaofCommand(redisClient *c);
ed9b544e 554static void shutdownCommand(redisClient *c);
555static void moveCommand(redisClient *c);
556static void renameCommand(redisClient *c);
557static void renamenxCommand(redisClient *c);
558static void lpushCommand(redisClient *c);
559static void rpushCommand(redisClient *c);
560static void lpopCommand(redisClient *c);
561static void rpopCommand(redisClient *c);
562static void llenCommand(redisClient *c);
563static void lindexCommand(redisClient *c);
564static void lrangeCommand(redisClient *c);
565static void ltrimCommand(redisClient *c);
566static void typeCommand(redisClient *c);
567static void lsetCommand(redisClient *c);
568static void saddCommand(redisClient *c);
569static void sremCommand(redisClient *c);
a4460ef4 570static void smoveCommand(redisClient *c);
ed9b544e 571static void sismemberCommand(redisClient *c);
572static void scardCommand(redisClient *c);
12fea928 573static void spopCommand(redisClient *c);
2abb95a9 574static void srandmemberCommand(redisClient *c);
ed9b544e 575static void sinterCommand(redisClient *c);
576static void sinterstoreCommand(redisClient *c);
40d224a9 577static void sunionCommand(redisClient *c);
578static void sunionstoreCommand(redisClient *c);
f4f56e1d 579static void sdiffCommand(redisClient *c);
580static void sdiffstoreCommand(redisClient *c);
ed9b544e 581static void syncCommand(redisClient *c);
582static void flushdbCommand(redisClient *c);
583static void flushallCommand(redisClient *c);
584static void sortCommand(redisClient *c);
585static void lremCommand(redisClient *c);
0f5f7e9a 586static void rpoplpushcommand(redisClient *c);
ed9b544e 587static void infoCommand(redisClient *c);
70003d28 588static void mgetCommand(redisClient *c);
87eca727 589static void monitorCommand(redisClient *c);
3305306f 590static void expireCommand(redisClient *c);
802e8373 591static void expireatCommand(redisClient *c);
f6b141c5 592static void getsetCommand(redisClient *c);
fd88489a 593static void ttlCommand(redisClient *c);
321b0e13 594static void slaveofCommand(redisClient *c);
7f957c92 595static void debugCommand(redisClient *c);
f6b141c5 596static void msetCommand(redisClient *c);
597static void msetnxCommand(redisClient *c);
fd8ccf44 598static void zaddCommand(redisClient *c);
7db723ad 599static void zincrbyCommand(redisClient *c);
cc812361 600static void zrangeCommand(redisClient *c);
50c55df5 601static void zrangebyscoreCommand(redisClient *c);
e3870fab 602static void zrevrangeCommand(redisClient *c);
3c41331e 603static void zcardCommand(redisClient *c);
1b7106e7 604static void zremCommand(redisClient *c);
6e333bbe 605static void zscoreCommand(redisClient *c);
1807985b 606static void zremrangebyscoreCommand(redisClient *c);
6e469882 607static void multiCommand(redisClient *c);
608static void execCommand(redisClient *c);
4409877e 609static void blpopCommand(redisClient *c);
610static void brpopCommand(redisClient *c);
f6b141c5 611
ed9b544e 612/*================================= Globals ================================= */
613
614/* Global vars */
615static struct redisServer server; /* server global state */
616static struct redisCommand cmdTable[] = {
617 {"get",getCommand,2,REDIS_CMD_INLINE},
3fd78bcd 618 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
619 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
5109cdff 620 {"del",delCommand,-2,REDIS_CMD_INLINE},
ed9b544e 621 {"exists",existsCommand,2,REDIS_CMD_INLINE},
3fd78bcd 622 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
623 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
70003d28 624 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
3fd78bcd 625 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
626 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 627 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
628 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
b177fd30 629 {"brpop",brpopCommand,-3,REDIS_CMD_INLINE},
630 {"blpop",blpopCommand,-3,REDIS_CMD_INLINE},
ed9b544e 631 {"llen",llenCommand,2,REDIS_CMD_INLINE},
632 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
3fd78bcd 633 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 634 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
635 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
636 {"lrem",lremCommand,4,REDIS_CMD_BULK},
0b13687c 637 {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 638 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 639 {"srem",sremCommand,3,REDIS_CMD_BULK},
a4460ef4 640 {"smove",smoveCommand,4,REDIS_CMD_BULK},
ed9b544e 641 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
642 {"scard",scardCommand,2,REDIS_CMD_INLINE},
12fea928 643 {"spop",spopCommand,2,REDIS_CMD_INLINE},
2abb95a9 644 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
3fd78bcd 645 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
646 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
647 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
648 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
649 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
650 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 651 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
fd8ccf44 652 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
7db723ad 653 {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
1b7106e7 654 {"zrem",zremCommand,3,REDIS_CMD_BULK},
1807985b 655 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
752da584 656 {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE},
80181f78 657 {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE},
752da584 658 {"zrevrange",zrevrangeCommand,-4,REDIS_CMD_INLINE},
3c41331e 659 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
6e333bbe 660 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 661 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
662 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
f6b141c5 663 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
664 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
665 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 666 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
667 {"select",selectCommand,2,REDIS_CMD_INLINE},
668 {"move",moveCommand,3,REDIS_CMD_INLINE},
669 {"rename",renameCommand,3,REDIS_CMD_INLINE},
670 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
321b0e13 671 {"expire",expireCommand,3,REDIS_CMD_INLINE},
802e8373 672 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
ed9b544e 673 {"keys",keysCommand,2,REDIS_CMD_INLINE},
674 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
abcb223e 675 {"auth",authCommand,2,REDIS_CMD_INLINE},
ed9b544e 676 {"ping",pingCommand,1,REDIS_CMD_INLINE},
677 {"echo",echoCommand,2,REDIS_CMD_BULK},
678 {"save",saveCommand,1,REDIS_CMD_INLINE},
679 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
9d65a1bb 680 {"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE},
ed9b544e 681 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
682 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
683 {"type",typeCommand,2,REDIS_CMD_INLINE},
6e469882 684 {"multi",multiCommand,1,REDIS_CMD_INLINE},
685 {"exec",execCommand,1,REDIS_CMD_INLINE},
ed9b544e 686 {"sync",syncCommand,1,REDIS_CMD_INLINE},
687 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
688 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
3fd78bcd 689 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 690 {"info",infoCommand,1,REDIS_CMD_INLINE},
87eca727 691 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
fd88489a 692 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
321b0e13 693 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
7f957c92 694 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
ed9b544e 695 {NULL,NULL,0,0}
696};
bcfc686d 697
ed9b544e 698/*============================ Utility functions ============================ */
699
700/* Glob-style pattern matching. */
701int stringmatchlen(const char *pattern, int patternLen,
702 const char *string, int stringLen, int nocase)
703{
704 while(patternLen) {
705 switch(pattern[0]) {
706 case '*':
707 while (pattern[1] == '*') {
708 pattern++;
709 patternLen--;
710 }
711 if (patternLen == 1)
712 return 1; /* match */
713 while(stringLen) {
714 if (stringmatchlen(pattern+1, patternLen-1,
715 string, stringLen, nocase))
716 return 1; /* match */
717 string++;
718 stringLen--;
719 }
720 return 0; /* no match */
721 break;
722 case '?':
723 if (stringLen == 0)
724 return 0; /* no match */
725 string++;
726 stringLen--;
727 break;
728 case '[':
729 {
730 int not, match;
731
732 pattern++;
733 patternLen--;
734 not = pattern[0] == '^';
735 if (not) {
736 pattern++;
737 patternLen--;
738 }
739 match = 0;
740 while(1) {
741 if (pattern[0] == '\\') {
742 pattern++;
743 patternLen--;
744 if (pattern[0] == string[0])
745 match = 1;
746 } else if (pattern[0] == ']') {
747 break;
748 } else if (patternLen == 0) {
749 pattern--;
750 patternLen++;
751 break;
752 } else if (pattern[1] == '-' && patternLen >= 3) {
753 int start = pattern[0];
754 int end = pattern[2];
755 int c = string[0];
756 if (start > end) {
757 int t = start;
758 start = end;
759 end = t;
760 }
761 if (nocase) {
762 start = tolower(start);
763 end = tolower(end);
764 c = tolower(c);
765 }
766 pattern += 2;
767 patternLen -= 2;
768 if (c >= start && c <= end)
769 match = 1;
770 } else {
771 if (!nocase) {
772 if (pattern[0] == string[0])
773 match = 1;
774 } else {
775 if (tolower((int)pattern[0]) == tolower((int)string[0]))
776 match = 1;
777 }
778 }
779 pattern++;
780 patternLen--;
781 }
782 if (not)
783 match = !match;
784 if (!match)
785 return 0; /* no match */
786 string++;
787 stringLen--;
788 break;
789 }
790 case '\\':
791 if (patternLen >= 2) {
792 pattern++;
793 patternLen--;
794 }
795 /* fall through */
796 default:
797 if (!nocase) {
798 if (pattern[0] != string[0])
799 return 0; /* no match */
800 } else {
801 if (tolower((int)pattern[0]) != tolower((int)string[0]))
802 return 0; /* no match */
803 }
804 string++;
805 stringLen--;
806 break;
807 }
808 pattern++;
809 patternLen--;
810 if (stringLen == 0) {
811 while(*pattern == '*') {
812 pattern++;
813 patternLen--;
814 }
815 break;
816 }
817 }
818 if (patternLen == 0 && stringLen == 0)
819 return 1;
820 return 0;
821}
822
56906eef 823static void redisLog(int level, const char *fmt, ...) {
ed9b544e 824 va_list ap;
825 FILE *fp;
826
827 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
828 if (!fp) return;
829
830 va_start(ap, fmt);
831 if (level >= server.verbosity) {
832 char *c = ".-*";
1904ecc1 833 char buf[64];
834 time_t now;
835
836 now = time(NULL);
6c9385e0 837 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
1904ecc1 838 fprintf(fp,"%s %c ",buf,c[level]);
ed9b544e 839 vfprintf(fp, fmt, ap);
840 fprintf(fp,"\n");
841 fflush(fp);
842 }
843 va_end(ap);
844
845 if (server.logfile) fclose(fp);
846}
847
848/*====================== Hash table type implementation ==================== */
849
850/* This is an hash table type that uses the SDS dynamic strings libary as
851 * keys and radis objects as values (objects can hold SDS strings,
852 * lists, sets). */
853
1812e024 854static void dictVanillaFree(void *privdata, void *val)
855{
856 DICT_NOTUSED(privdata);
857 zfree(val);
858}
859
4409877e 860static void dictListDestructor(void *privdata, void *val)
861{
862 DICT_NOTUSED(privdata);
863 listRelease((list*)val);
864}
865
ed9b544e 866static int sdsDictKeyCompare(void *privdata, const void *key1,
867 const void *key2)
868{
869 int l1,l2;
870 DICT_NOTUSED(privdata);
871
872 l1 = sdslen((sds)key1);
873 l2 = sdslen((sds)key2);
874 if (l1 != l2) return 0;
875 return memcmp(key1, key2, l1) == 0;
876}
877
878static void dictRedisObjectDestructor(void *privdata, void *val)
879{
880 DICT_NOTUSED(privdata);
881
a35ddf12 882 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
ed9b544e 883 decrRefCount(val);
884}
885
942a3961 886static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 887 const void *key2)
888{
889 const robj *o1 = key1, *o2 = key2;
890 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
891}
892
942a3961 893static unsigned int dictObjHash(const void *key) {
ed9b544e 894 const robj *o = key;
895 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
896}
897
942a3961 898static int dictEncObjKeyCompare(void *privdata, const void *key1,
899 const void *key2)
900{
9d65a1bb 901 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
902 int cmp;
942a3961 903
9d65a1bb 904 o1 = getDecodedObject(o1);
905 o2 = getDecodedObject(o2);
906 cmp = sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
907 decrRefCount(o1);
908 decrRefCount(o2);
909 return cmp;
942a3961 910}
911
912static unsigned int dictEncObjHash(const void *key) {
9d65a1bb 913 robj *o = (robj*) key;
942a3961 914
9d65a1bb 915 o = getDecodedObject(o);
916 unsigned int hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
917 decrRefCount(o);
918 return hash;
942a3961 919}
920
ed9b544e 921static dictType setDictType = {
942a3961 922 dictEncObjHash, /* hash function */
ed9b544e 923 NULL, /* key dup */
924 NULL, /* val dup */
942a3961 925 dictEncObjKeyCompare, /* key compare */
ed9b544e 926 dictRedisObjectDestructor, /* key destructor */
927 NULL /* val destructor */
928};
929
1812e024 930static dictType zsetDictType = {
931 dictEncObjHash, /* hash function */
932 NULL, /* key dup */
933 NULL, /* val dup */
934 dictEncObjKeyCompare, /* key compare */
935 dictRedisObjectDestructor, /* key destructor */
da0a1620 936 dictVanillaFree /* val destructor of malloc(sizeof(double)) */
1812e024 937};
938
ed9b544e 939static dictType hashDictType = {
942a3961 940 dictObjHash, /* hash function */
ed9b544e 941 NULL, /* key dup */
942 NULL, /* val dup */
942a3961 943 dictObjKeyCompare, /* key compare */
ed9b544e 944 dictRedisObjectDestructor, /* key destructor */
945 dictRedisObjectDestructor /* val destructor */
946};
947
4409877e 948/* Keylist hash table type has unencoded redis objects as keys and
949 * lists as values. It's used for blocking operations (BLPOP) */
950static dictType keylistDictType = {
951 dictObjHash, /* hash function */
952 NULL, /* key dup */
953 NULL, /* val dup */
954 dictObjKeyCompare, /* key compare */
955 dictRedisObjectDestructor, /* key destructor */
956 dictListDestructor /* val destructor */
957};
958
ed9b544e 959/* ========================= Random utility functions ======================= */
960
961/* Redis generally does not try to recover from out of memory conditions
962 * when allocating objects or strings, it is not clear if it will be possible
963 * to report this condition to the client since the networking layer itself
964 * is based on heap allocation for send buffers, so we simply abort.
965 * At least the code will be simpler to read... */
966static void oom(const char *msg) {
71c54b21 967 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
ed9b544e 968 sleep(1);
969 abort();
970}
971
972/* ====================== Redis server networking stuff ===================== */
56906eef 973static void closeTimedoutClients(void) {
ed9b544e 974 redisClient *c;
ed9b544e 975 listNode *ln;
976 time_t now = time(NULL);
977
6208b3a7 978 listRewind(server.clients);
979 while ((ln = listYield(server.clients)) != NULL) {
ed9b544e 980 c = listNodeValue(ln);
f86a74e9 981 if (server.maxidletime &&
982 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 983 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
f86a74e9 984 (now - c->lastinteraction > server.maxidletime))
985 {
f870935d 986 redisLog(REDIS_VERBOSE,"Closing idle client");
ed9b544e 987 freeClient(c);
f86a74e9 988 } else if (c->flags & REDIS_BLOCKED) {
58d976b8 989 if (c->blockingto != 0 && c->blockingto < now) {
b177fd30 990 addReply(c,shared.nullmultibulk);
f86a74e9 991 unblockClient(c);
992 }
ed9b544e 993 }
994 }
ed9b544e 995}
996
12fea928 997static int htNeedsResize(dict *dict) {
998 long long size, used;
999
1000 size = dictSlots(dict);
1001 used = dictSize(dict);
1002 return (size && used && size > DICT_HT_INITIAL_SIZE &&
1003 (used*100/size < REDIS_HT_MINFILL));
1004}
1005
0bc03378 1006/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
1007 * we resize the hash table to save memory */
56906eef 1008static void tryResizeHashTables(void) {
0bc03378 1009 int j;
1010
1011 for (j = 0; j < server.dbnum; j++) {
12fea928 1012 if (htNeedsResize(server.db[j].dict)) {
f870935d 1013 redisLog(REDIS_VERBOSE,"The hash table %d is too sparse, resize it...",j);
0bc03378 1014 dictResize(server.db[j].dict);
f870935d 1015 redisLog(REDIS_VERBOSE,"Hash table %d resized.",j);
0bc03378 1016 }
12fea928 1017 if (htNeedsResize(server.db[j].expires))
1018 dictResize(server.db[j].expires);
0bc03378 1019 }
1020}
1021
9d65a1bb 1022/* A background saving child (BGSAVE) terminated its work. Handle this. */
1023void backgroundSaveDoneHandler(int statloc) {
1024 int exitcode = WEXITSTATUS(statloc);
1025 int bysignal = WIFSIGNALED(statloc);
1026
1027 if (!bysignal && exitcode == 0) {
1028 redisLog(REDIS_NOTICE,
1029 "Background saving terminated with success");
1030 server.dirty = 0;
1031 server.lastsave = time(NULL);
1032 } else if (!bysignal && exitcode != 0) {
1033 redisLog(REDIS_WARNING, "Background saving error");
1034 } else {
1035 redisLog(REDIS_WARNING,
1036 "Background saving terminated by signal");
1037 rdbRemoveTempFile(server.bgsavechildpid);
1038 }
1039 server.bgsavechildpid = -1;
1040 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1041 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1042 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
1043}
1044
1045/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1046 * Handle this. */
1047void backgroundRewriteDoneHandler(int statloc) {
1048 int exitcode = WEXITSTATUS(statloc);
1049 int bysignal = WIFSIGNALED(statloc);
1050
1051 if (!bysignal && exitcode == 0) {
1052 int fd;
1053 char tmpfile[256];
1054
1055 redisLog(REDIS_NOTICE,
1056 "Background append only file rewriting terminated with success");
1057 /* Now it's time to flush the differences accumulated by the parent */
1058 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
1059 fd = open(tmpfile,O_WRONLY|O_APPEND);
1060 if (fd == -1) {
1061 redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
1062 goto cleanup;
1063 }
1064 /* Flush our data... */
1065 if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
1066 (signed) sdslen(server.bgrewritebuf)) {
1067 redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
1068 close(fd);
1069 goto cleanup;
1070 }
b32627cd 1071 redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
9d65a1bb 1072 /* Now our work is to rename the temp file into the stable file. And
1073 * switch the file descriptor used by the server for append only. */
1074 if (rename(tmpfile,server.appendfilename) == -1) {
1075 redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
1076 close(fd);
1077 goto cleanup;
1078 }
1079 /* Mission completed... almost */
1080 redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
1081 if (server.appendfd != -1) {
1082 /* If append only is actually enabled... */
1083 close(server.appendfd);
1084 server.appendfd = fd;
1085 fsync(fd);
85a83172 1086 server.appendseldb = -1; /* Make sure it will issue SELECT */
9d65a1bb 1087 redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
1088 } else {
1089 /* If append only is disabled we just generate a dump in this
1090 * format. Why not? */
1091 close(fd);
1092 }
1093 } else if (!bysignal && exitcode != 0) {
1094 redisLog(REDIS_WARNING, "Background append only file rewriting error");
1095 } else {
1096 redisLog(REDIS_WARNING,
1097 "Background append only file rewriting terminated by signal");
1098 }
1099cleanup:
1100 sdsfree(server.bgrewritebuf);
1101 server.bgrewritebuf = sdsempty();
1102 aofRemoveTempFile(server.bgrewritechildpid);
1103 server.bgrewritechildpid = -1;
1104}
1105
56906eef 1106static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 1107 int j, loops = server.cronloops++;
ed9b544e 1108 REDIS_NOTUSED(eventLoop);
1109 REDIS_NOTUSED(id);
1110 REDIS_NOTUSED(clientData);
1111
3a66edc7 1112 /* We take a cached value of the unix time in the global state because
1113 * with virtual memory and aging there is to store the current time
1114 * in objects at every object access, and accuracy is not needed.
1115 * To access a global var is faster than calling time(NULL) */
1116 server.unixtime = time(NULL);
1117
ed9b544e 1118 /* Update the global state with the amount of used memory */
1119 server.usedmemory = zmalloc_used_memory();
1120
0bc03378 1121 /* Show some info about non-empty databases */
ed9b544e 1122 for (j = 0; j < server.dbnum; j++) {
dec423d9 1123 long long size, used, vkeys;
94754ccc 1124
3305306f 1125 size = dictSlots(server.db[j].dict);
1126 used = dictSize(server.db[j].dict);
94754ccc 1127 vkeys = dictSize(server.db[j].expires);
c3cb078d 1128 if (!(loops % 5) && (used || vkeys)) {
f870935d 1129 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 1130 /* dictPrintStats(server.dict); */
ed9b544e 1131 }
ed9b544e 1132 }
1133
0bc03378 1134 /* We don't want to resize the hash tables while a bacground saving
1135 * is in progress: the saving child is created using fork() that is
1136 * implemented with a copy-on-write semantic in most modern systems, so
1137 * if we resize the HT while there is the saving child at work actually
1138 * a lot of memory movements in the parent will cause a lot of pages
1139 * copied. */
9d65a1bb 1140 if (server.bgsavechildpid == -1) tryResizeHashTables();
0bc03378 1141
ed9b544e 1142 /* Show information about connected clients */
1143 if (!(loops % 5)) {
f870935d 1144 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
ed9b544e 1145 listLength(server.clients)-listLength(server.slaves),
1146 listLength(server.slaves),
10c43610 1147 server.usedmemory,
3305306f 1148 dictSize(server.sharingpool));
ed9b544e 1149 }
1150
1151 /* Close connections of timedout clients */
f86a74e9 1152 if ((server.maxidletime && !(loops % 10)) || server.blockedclients)
ed9b544e 1153 closeTimedoutClients();
1154
9d65a1bb 1155 /* Check if a background saving or AOF rewrite in progress terminated */
1156 if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
ed9b544e 1157 int statloc;
9d65a1bb 1158 pid_t pid;
1159
1160 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
1161 if (pid == server.bgsavechildpid) {
1162 backgroundSaveDoneHandler(statloc);
ed9b544e 1163 } else {
9d65a1bb 1164 backgroundRewriteDoneHandler(statloc);
ed9b544e 1165 }
ed9b544e 1166 }
1167 } else {
1168 /* If there is not a background saving in progress check if
1169 * we have to save now */
1170 time_t now = time(NULL);
1171 for (j = 0; j < server.saveparamslen; j++) {
1172 struct saveparam *sp = server.saveparams+j;
1173
1174 if (server.dirty >= sp->changes &&
1175 now-server.lastsave > sp->seconds) {
1176 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
1177 sp->changes, sp->seconds);
f78fd11b 1178 rdbSaveBackground(server.dbfilename);
ed9b544e 1179 break;
1180 }
1181 }
1182 }
94754ccc 1183
f2324293 1184 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1185 * will use few CPU cycles if there are few expiring keys, otherwise
1186 * it will get more aggressive to avoid that too much memory is used by
1187 * keys that can be removed from the keyspace. */
94754ccc 1188 for (j = 0; j < server.dbnum; j++) {
f2324293 1189 int expired;
94754ccc 1190 redisDb *db = server.db+j;
94754ccc 1191
f2324293 1192 /* Continue to expire if at the end of the cycle more than 25%
1193 * of the keys were expired. */
1194 do {
4ef8de8a 1195 long num = dictSize(db->expires);
94754ccc 1196 time_t now = time(NULL);
1197
f2324293 1198 expired = 0;
94754ccc 1199 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
1200 num = REDIS_EXPIRELOOKUPS_PER_CRON;
1201 while (num--) {
1202 dictEntry *de;
1203 time_t t;
1204
1205 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
1206 t = (time_t) dictGetEntryVal(de);
1207 if (now > t) {
1208 deleteKey(db,dictGetEntryKey(de));
f2324293 1209 expired++;
94754ccc 1210 }
1211 }
f2324293 1212 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
94754ccc 1213 }
1214
4ef8de8a 1215 /* Swap a few keys on disk if we are over the memory limit and VM
f870935d 1216 * is enbled. Try to free objects from the free list first. */
7e69548d 1217 if (vmCanSwapOut()) {
1218 while (server.vm_enabled && zmalloc_used_memory() >
f870935d 1219 server.vm_max_memory)
1220 {
1221 if (listLength(server.objfreelist)) {
1222 freeOneObjectFromFreelist();
1223 } else if (vmSwapOneObject() == REDIS_ERR) {
1224 if ((loops % 30) == 0 && zmalloc_used_memory() >
7e69548d 1225 (server.vm_max_memory+server.vm_max_memory/10)) {
1226 redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1227 }
1228 break;
1229 }
4ef8de8a 1230 }
1231 }
1232
ed9b544e 1233 /* Check if we should connect to a MASTER */
1234 if (server.replstate == REDIS_REPL_CONNECT) {
1235 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
1236 if (syncWithMaster() == REDIS_OK) {
1237 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
1238 }
1239 }
1240 return 1000;
1241}
1242
1243static void createSharedObjects(void) {
1244 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1245 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1246 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 1247 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1248 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1249 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1250 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1251 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1252 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 1253 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
6e469882 1254 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
ed9b544e 1255 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1256 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 1257 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1258 "-ERR no such key\r\n"));
ed9b544e 1259 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1260 "-ERR syntax error\r\n"));
c937aa89 1261 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1262 "-ERR source and destination objects are the same\r\n"));
1263 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1264 "-ERR index out of range\r\n"));
ed9b544e 1265 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1266 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1267 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1268 shared.select0 = createStringObject("select 0\r\n",10);
1269 shared.select1 = createStringObject("select 1\r\n",10);
1270 shared.select2 = createStringObject("select 2\r\n",10);
1271 shared.select3 = createStringObject("select 3\r\n",10);
1272 shared.select4 = createStringObject("select 4\r\n",10);
1273 shared.select5 = createStringObject("select 5\r\n",10);
1274 shared.select6 = createStringObject("select 6\r\n",10);
1275 shared.select7 = createStringObject("select 7\r\n",10);
1276 shared.select8 = createStringObject("select 8\r\n",10);
1277 shared.select9 = createStringObject("select 9\r\n",10);
1278}
1279
1280static void appendServerSaveParams(time_t seconds, int changes) {
1281 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1282 server.saveparams[server.saveparamslen].seconds = seconds;
1283 server.saveparams[server.saveparamslen].changes = changes;
1284 server.saveparamslen++;
1285}
1286
bcfc686d 1287static void resetServerSaveParams() {
ed9b544e 1288 zfree(server.saveparams);
1289 server.saveparams = NULL;
1290 server.saveparamslen = 0;
1291}
1292
1293static void initServerConfig() {
1294 server.dbnum = REDIS_DEFAULT_DBNUM;
1295 server.port = REDIS_SERVERPORT;
f870935d 1296 server.verbosity = REDIS_VERBOSE;
ed9b544e 1297 server.maxidletime = REDIS_MAXIDLETIME;
1298 server.saveparams = NULL;
1299 server.logfile = NULL; /* NULL = log on standard output */
1300 server.bindaddr = NULL;
1301 server.glueoutputbuf = 1;
1302 server.daemonize = 0;
44b38ef4 1303 server.appendonly = 0;
4e141d5a 1304 server.appendfsync = APPENDFSYNC_ALWAYS;
48f0308a 1305 server.lastfsync = time(NULL);
44b38ef4 1306 server.appendfd = -1;
1307 server.appendseldb = -1; /* Make sure the first time will not match */
ed329fcf 1308 server.pidfile = "/var/run/redis.pid";
ed9b544e 1309 server.dbfilename = "dump.rdb";
9d65a1bb 1310 server.appendfilename = "appendonly.aof";
abcb223e 1311 server.requirepass = NULL;
10c43610 1312 server.shareobjects = 0;
b0553789 1313 server.rdbcompression = 1;
21aecf4b 1314 server.sharingpoolsize = 1024;
285add55 1315 server.maxclients = 0;
f86a74e9 1316 server.blockedclients = 0;
3fd78bcd 1317 server.maxmemory = 0;
75680a3c 1318 server.vm_enabled = 0;
1319 server.vm_page_size = 256; /* 256 bytes per page */
1320 server.vm_pages = 1024*1024*100; /* 104 millions of pages */
1321 server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
92f8e882 1322 server.vm_max_threads = 4;
75680a3c 1323
bcfc686d 1324 resetServerSaveParams();
ed9b544e 1325
1326 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1327 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1328 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1329 /* Replication related */
1330 server.isslave = 0;
d0ccebcf 1331 server.masterauth = NULL;
ed9b544e 1332 server.masterhost = NULL;
1333 server.masterport = 6379;
1334 server.master = NULL;
1335 server.replstate = REDIS_REPL_NONE;
a7866db6 1336
1337 /* Double constants initialization */
1338 R_Zero = 0.0;
1339 R_PosInf = 1.0/R_Zero;
1340 R_NegInf = -1.0/R_Zero;
1341 R_Nan = R_Zero/R_Zero;
ed9b544e 1342}
1343
1344static void initServer() {
1345 int j;
1346
1347 signal(SIGHUP, SIG_IGN);
1348 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1349 setupSigSegvAction();
ed9b544e 1350
1351 server.clients = listCreate();
1352 server.slaves = listCreate();
87eca727 1353 server.monitors = listCreate();
ed9b544e 1354 server.objfreelist = listCreate();
1355 createSharedObjects();
1356 server.el = aeCreateEventLoop();
3305306f 1357 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
10c43610 1358 server.sharingpool = dictCreate(&setDictType,NULL);
ed9b544e 1359 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1360 if (server.fd == -1) {
1361 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1362 exit(1);
1363 }
3305306f 1364 for (j = 0; j < server.dbnum; j++) {
1365 server.db[j].dict = dictCreate(&hashDictType,NULL);
1366 server.db[j].expires = dictCreate(&setDictType,NULL);
4409877e 1367 server.db[j].blockingkeys = dictCreate(&keylistDictType,NULL);
3305306f 1368 server.db[j].id = j;
1369 }
ed9b544e 1370 server.cronloops = 0;
9f3c422c 1371 server.bgsavechildpid = -1;
9d65a1bb 1372 server.bgrewritechildpid = -1;
1373 server.bgrewritebuf = sdsempty();
ed9b544e 1374 server.lastsave = time(NULL);
1375 server.dirty = 0;
1376 server.usedmemory = 0;
1377 server.stat_numcommands = 0;
1378 server.stat_numconnections = 0;
1379 server.stat_starttime = time(NULL);
3a66edc7 1380 server.unixtime = time(NULL);
d8f8b666 1381 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
44b38ef4 1382
1383 if (server.appendonly) {
71eba477 1384 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
44b38ef4 1385 if (server.appendfd == -1) {
1386 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1387 strerror(errno));
1388 exit(1);
1389 }
1390 }
75680a3c 1391
1392 if (server.vm_enabled) vmInit();
ed9b544e 1393}
1394
1395/* Empty the whole database */
ca37e9cd 1396static long long emptyDb() {
ed9b544e 1397 int j;
ca37e9cd 1398 long long removed = 0;
ed9b544e 1399
3305306f 1400 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1401 removed += dictSize(server.db[j].dict);
3305306f 1402 dictEmpty(server.db[j].dict);
1403 dictEmpty(server.db[j].expires);
1404 }
ca37e9cd 1405 return removed;
ed9b544e 1406}
1407
85dd2f3a 1408static int yesnotoi(char *s) {
1409 if (!strcasecmp(s,"yes")) return 1;
1410 else if (!strcasecmp(s,"no")) return 0;
1411 else return -1;
1412}
1413
ed9b544e 1414/* I agree, this is a very rudimental way to load a configuration...
1415 will improve later if the config gets more complex */
1416static void loadServerConfig(char *filename) {
c9a111ac 1417 FILE *fp;
ed9b544e 1418 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1419 int linenum = 0;
1420 sds line = NULL;
c9a111ac 1421
1422 if (filename[0] == '-' && filename[1] == '\0')
1423 fp = stdin;
1424 else {
1425 if ((fp = fopen(filename,"r")) == NULL) {
1426 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1427 exit(1);
1428 }
ed9b544e 1429 }
c9a111ac 1430
ed9b544e 1431 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1432 sds *argv;
1433 int argc, j;
1434
1435 linenum++;
1436 line = sdsnew(buf);
1437 line = sdstrim(line," \t\r\n");
1438
1439 /* Skip comments and blank lines*/
1440 if (line[0] == '#' || line[0] == '\0') {
1441 sdsfree(line);
1442 continue;
1443 }
1444
1445 /* Split into arguments */
1446 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1447 sdstolower(argv[0]);
1448
1449 /* Execute config directives */
bb0b03a3 1450 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1451 server.maxidletime = atoi(argv[1]);
0150db36 1452 if (server.maxidletime < 0) {
ed9b544e 1453 err = "Invalid timeout value"; goto loaderr;
1454 }
bb0b03a3 1455 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1456 server.port = atoi(argv[1]);
1457 if (server.port < 1 || server.port > 65535) {
1458 err = "Invalid port"; goto loaderr;
1459 }
bb0b03a3 1460 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1461 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1462 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1463 int seconds = atoi(argv[1]);
1464 int changes = atoi(argv[2]);
1465 if (seconds < 1 || changes < 0) {
1466 err = "Invalid save parameters"; goto loaderr;
1467 }
1468 appendServerSaveParams(seconds,changes);
bb0b03a3 1469 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1470 if (chdir(argv[1]) == -1) {
1471 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1472 argv[1], strerror(errno));
1473 exit(1);
1474 }
bb0b03a3 1475 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1476 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
f870935d 1477 else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
bb0b03a3 1478 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1479 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1480 else {
1481 err = "Invalid log level. Must be one of debug, notice, warning";
1482 goto loaderr;
1483 }
bb0b03a3 1484 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1485 FILE *logfp;
ed9b544e 1486
1487 server.logfile = zstrdup(argv[1]);
bb0b03a3 1488 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1489 zfree(server.logfile);
1490 server.logfile = NULL;
1491 }
1492 if (server.logfile) {
1493 /* Test if we are able to open the file. The server will not
1494 * be able to abort just for this problem later... */
c9a111ac 1495 logfp = fopen(server.logfile,"a");
1496 if (logfp == NULL) {
ed9b544e 1497 err = sdscatprintf(sdsempty(),
1498 "Can't open the log file: %s", strerror(errno));
1499 goto loaderr;
1500 }
c9a111ac 1501 fclose(logfp);
ed9b544e 1502 }
bb0b03a3 1503 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1504 server.dbnum = atoi(argv[1]);
1505 if (server.dbnum < 1) {
1506 err = "Invalid number of databases"; goto loaderr;
1507 }
285add55 1508 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1509 server.maxclients = atoi(argv[1]);
3fd78bcd 1510 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
d4465900 1511 server.maxmemory = strtoll(argv[1], NULL, 10);
bb0b03a3 1512 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1513 server.masterhost = sdsnew(argv[1]);
1514 server.masterport = atoi(argv[2]);
1515 server.replstate = REDIS_REPL_CONNECT;
d0ccebcf 1516 } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
1517 server.masterauth = zstrdup(argv[1]);
bb0b03a3 1518 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1519 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1520 err = "argument must be 'yes' or 'no'"; goto loaderr;
1521 }
bb0b03a3 1522 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
85dd2f3a 1523 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
10c43610 1524 err = "argument must be 'yes' or 'no'"; goto loaderr;
1525 }
121f70cf 1526 } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
1527 if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
1528 err = "argument must be 'yes' or 'no'"; goto loaderr;
1529 }
e52c65b9 1530 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1531 server.sharingpoolsize = atoi(argv[1]);
1532 if (server.sharingpoolsize < 1) {
1533 err = "invalid object sharing pool size"; goto loaderr;
1534 }
bb0b03a3 1535 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1536 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1537 err = "argument must be 'yes' or 'no'"; goto loaderr;
1538 }
44b38ef4 1539 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1540 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1541 err = "argument must be 'yes' or 'no'"; goto loaderr;
1542 }
48f0308a 1543 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1766c6da 1544 if (!strcasecmp(argv[1],"no")) {
48f0308a 1545 server.appendfsync = APPENDFSYNC_NO;
1766c6da 1546 } else if (!strcasecmp(argv[1],"always")) {
48f0308a 1547 server.appendfsync = APPENDFSYNC_ALWAYS;
1766c6da 1548 } else if (!strcasecmp(argv[1],"everysec")) {
48f0308a 1549 server.appendfsync = APPENDFSYNC_EVERYSEC;
1550 } else {
1551 err = "argument must be 'no', 'always' or 'everysec'";
1552 goto loaderr;
1553 }
bb0b03a3 1554 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
abcb223e 1555 server.requirepass = zstrdup(argv[1]);
bb0b03a3 1556 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
ed329fcf 1557 server.pidfile = zstrdup(argv[1]);
bb0b03a3 1558 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
b8b553c8 1559 server.dbfilename = zstrdup(argv[1]);
75680a3c 1560 } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
1561 if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
1562 err = "argument must be 'yes' or 'no'"; goto loaderr;
1563 }
4ef8de8a 1564 } else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
1565 server.vm_max_memory = strtoll(argv[1], NULL, 10);
1566 } else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
1567 server.vm_page_size = strtoll(argv[1], NULL, 10);
1568 } else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
1569 server.vm_pages = strtoll(argv[1], NULL, 10);
92f8e882 1570 } else if (!strcasecmp(argv[0],"vm-max-threads") && argc == 2) {
1571 server.vm_max_threads = strtoll(argv[1], NULL, 10);
ed9b544e 1572 } else {
1573 err = "Bad directive or wrong number of arguments"; goto loaderr;
1574 }
1575 for (j = 0; j < argc; j++)
1576 sdsfree(argv[j]);
1577 zfree(argv);
1578 sdsfree(line);
1579 }
c9a111ac 1580 if (fp != stdin) fclose(fp);
ed9b544e 1581 return;
1582
1583loaderr:
1584 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1585 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1586 fprintf(stderr, ">>> '%s'\n", line);
1587 fprintf(stderr, "%s\n", err);
1588 exit(1);
1589}
1590
1591static void freeClientArgv(redisClient *c) {
1592 int j;
1593
1594 for (j = 0; j < c->argc; j++)
1595 decrRefCount(c->argv[j]);
e8a74421 1596 for (j = 0; j < c->mbargc; j++)
1597 decrRefCount(c->mbargv[j]);
ed9b544e 1598 c->argc = 0;
e8a74421 1599 c->mbargc = 0;
ed9b544e 1600}
1601
1602static void freeClient(redisClient *c) {
1603 listNode *ln;
1604
4409877e 1605 /* Note that if the client we are freeing is blocked into a blocking
1606 * call, we have to set querybuf to NULL *before* to call unblockClient()
1607 * to avoid processInputBuffer() will get called. Also it is important
1608 * to remove the file events after this, because this call adds
1609 * the READABLE event. */
1610 sdsfree(c->querybuf);
1611 c->querybuf = NULL;
1612 if (c->flags & REDIS_BLOCKED)
1613 unblockClient(c);
1614
ed9b544e 1615 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1616 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
ed9b544e 1617 listRelease(c->reply);
1618 freeClientArgv(c);
1619 close(c->fd);
92f8e882 1620 /* Remove from the list of clients */
ed9b544e 1621 ln = listSearchKey(server.clients,c);
dfc5e96c 1622 redisAssert(ln != NULL);
ed9b544e 1623 listDelNode(server.clients,ln);
92f8e882 1624 /* Remove from the list of clients waiting for VM operations */
1625 if (server.vm_enabled && listLength(c->io_keys)) {
1626 ln = listSearchKey(server.io_clients,c);
1627 if (ln) listDelNode(server.io_clients,ln);
1628 listRelease(c->io_keys);
1629 }
1630 /* Other cleanup */
ed9b544e 1631 if (c->flags & REDIS_SLAVE) {
6208b3a7 1632 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1633 close(c->repldbfd);
87eca727 1634 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1635 ln = listSearchKey(l,c);
dfc5e96c 1636 redisAssert(ln != NULL);
87eca727 1637 listDelNode(l,ln);
ed9b544e 1638 }
1639 if (c->flags & REDIS_MASTER) {
1640 server.master = NULL;
1641 server.replstate = REDIS_REPL_CONNECT;
1642 }
93ea3759 1643 zfree(c->argv);
e8a74421 1644 zfree(c->mbargv);
6e469882 1645 freeClientMultiState(c);
ed9b544e 1646 zfree(c);
1647}
1648
cc30e368 1649#define GLUEREPLY_UP_TO (1024)
ed9b544e 1650static void glueReplyBuffersIfNeeded(redisClient *c) {
c28b42ac 1651 int copylen = 0;
1652 char buf[GLUEREPLY_UP_TO];
6208b3a7 1653 listNode *ln;
ed9b544e 1654 robj *o;
1655
6208b3a7 1656 listRewind(c->reply);
1657 while((ln = listYield(c->reply))) {
c28b42ac 1658 int objlen;
1659
ed9b544e 1660 o = ln->value;
c28b42ac 1661 objlen = sdslen(o->ptr);
1662 if (copylen + objlen <= GLUEREPLY_UP_TO) {
1663 memcpy(buf+copylen,o->ptr,objlen);
1664 copylen += objlen;
ed9b544e 1665 listDelNode(c->reply,ln);
c28b42ac 1666 } else {
1667 if (copylen == 0) return;
1668 break;
ed9b544e 1669 }
ed9b544e 1670 }
c28b42ac 1671 /* Now the output buffer is empty, add the new single element */
1672 o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
1673 listAddNodeHead(c->reply,o);
ed9b544e 1674}
1675
1676static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1677 redisClient *c = privdata;
1678 int nwritten = 0, totwritten = 0, objlen;
1679 robj *o;
1680 REDIS_NOTUSED(el);
1681 REDIS_NOTUSED(mask);
1682
2895e862 1683 /* Use writev() if we have enough buffers to send */
7ea870c0 1684 if (!server.glueoutputbuf &&
1685 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
1686 !(c->flags & REDIS_MASTER))
2895e862 1687 {
1688 sendReplyToClientWritev(el, fd, privdata, mask);
1689 return;
1690 }
2895e862 1691
ed9b544e 1692 while(listLength(c->reply)) {
c28b42ac 1693 if (server.glueoutputbuf && listLength(c->reply) > 1)
1694 glueReplyBuffersIfNeeded(c);
1695
ed9b544e 1696 o = listNodeValue(listFirst(c->reply));
1697 objlen = sdslen(o->ptr);
1698
1699 if (objlen == 0) {
1700 listDelNode(c->reply,listFirst(c->reply));
1701 continue;
1702 }
1703
1704 if (c->flags & REDIS_MASTER) {
6f376729 1705 /* Don't reply to a master */
ed9b544e 1706 nwritten = objlen - c->sentlen;
1707 } else {
a4d1ba9a 1708 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 1709 if (nwritten <= 0) break;
1710 }
1711 c->sentlen += nwritten;
1712 totwritten += nwritten;
1713 /* If we fully sent the object on head go to the next one */
1714 if (c->sentlen == objlen) {
1715 listDelNode(c->reply,listFirst(c->reply));
1716 c->sentlen = 0;
1717 }
6f376729 1718 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
12f9d551 1719 * bytes, in a single threaded server it's a good idea to serve
6f376729 1720 * other clients as well, even if a very large request comes from
1721 * super fast link that is always able to accept data (in real world
12f9d551 1722 * scenario think about 'KEYS *' against the loopback interfae) */
6f376729 1723 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 1724 }
1725 if (nwritten == -1) {
1726 if (errno == EAGAIN) {
1727 nwritten = 0;
1728 } else {
f870935d 1729 redisLog(REDIS_VERBOSE,
ed9b544e 1730 "Error writing to client: %s", strerror(errno));
1731 freeClient(c);
1732 return;
1733 }
1734 }
1735 if (totwritten > 0) c->lastinteraction = time(NULL);
1736 if (listLength(c->reply) == 0) {
1737 c->sentlen = 0;
1738 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1739 }
1740}
1741
2895e862 1742static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
1743{
1744 redisClient *c = privdata;
1745 int nwritten = 0, totwritten = 0, objlen, willwrite;
1746 robj *o;
1747 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
1748 int offset, ion = 0;
1749 REDIS_NOTUSED(el);
1750 REDIS_NOTUSED(mask);
1751
1752 listNode *node;
1753 while (listLength(c->reply)) {
1754 offset = c->sentlen;
1755 ion = 0;
1756 willwrite = 0;
1757
1758 /* fill-in the iov[] array */
1759 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
1760 o = listNodeValue(node);
1761 objlen = sdslen(o->ptr);
1762
1763 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
1764 break;
1765
1766 if(ion == REDIS_WRITEV_IOVEC_COUNT)
1767 break; /* no more iovecs */
1768
1769 iov[ion].iov_base = ((char*)o->ptr) + offset;
1770 iov[ion].iov_len = objlen - offset;
1771 willwrite += objlen - offset;
1772 offset = 0; /* just for the first item */
1773 ion++;
1774 }
1775
1776 if(willwrite == 0)
1777 break;
1778
1779 /* write all collected blocks at once */
1780 if((nwritten = writev(fd, iov, ion)) < 0) {
1781 if (errno != EAGAIN) {
f870935d 1782 redisLog(REDIS_VERBOSE,
2895e862 1783 "Error writing to client: %s", strerror(errno));
1784 freeClient(c);
1785 return;
1786 }
1787 break;
1788 }
1789
1790 totwritten += nwritten;
1791 offset = c->sentlen;
1792
1793 /* remove written robjs from c->reply */
1794 while (nwritten && listLength(c->reply)) {
1795 o = listNodeValue(listFirst(c->reply));
1796 objlen = sdslen(o->ptr);
1797
1798 if(nwritten >= objlen - offset) {
1799 listDelNode(c->reply, listFirst(c->reply));
1800 nwritten -= objlen - offset;
1801 c->sentlen = 0;
1802 } else {
1803 /* partial write */
1804 c->sentlen += nwritten;
1805 break;
1806 }
1807 offset = 0;
1808 }
1809 }
1810
1811 if (totwritten > 0)
1812 c->lastinteraction = time(NULL);
1813
1814 if (listLength(c->reply) == 0) {
1815 c->sentlen = 0;
1816 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1817 }
1818}
1819
ed9b544e 1820static struct redisCommand *lookupCommand(char *name) {
1821 int j = 0;
1822 while(cmdTable[j].name != NULL) {
bb0b03a3 1823 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
ed9b544e 1824 j++;
1825 }
1826 return NULL;
1827}
1828
1829/* resetClient prepare the client to process the next command */
1830static void resetClient(redisClient *c) {
1831 freeClientArgv(c);
1832 c->bulklen = -1;
e8a74421 1833 c->multibulk = 0;
ed9b544e 1834}
1835
6e469882 1836/* Call() is the core of Redis execution of a command */
1837static void call(redisClient *c, struct redisCommand *cmd) {
1838 long long dirty;
1839
1840 dirty = server.dirty;
1841 cmd->proc(c);
1842 if (server.appendonly && server.dirty-dirty)
1843 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
1844 if (server.dirty-dirty && listLength(server.slaves))
1845 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
1846 if (listLength(server.monitors))
1847 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
1848 server.stat_numcommands++;
1849}
1850
ed9b544e 1851/* If this function gets called we already read a whole
1852 * command, argments are in the client argv/argc fields.
1853 * processCommand() execute the command or prepare the
1854 * server for a bulk read from the client.
1855 *
1856 * If 1 is returned the client is still alive and valid and
1857 * and other operations can be performed by the caller. Otherwise
1858 * if 0 is returned the client was destroied (i.e. after QUIT). */
1859static int processCommand(redisClient *c) {
1860 struct redisCommand *cmd;
ed9b544e 1861
3fd78bcd 1862 /* Free some memory if needed (maxmemory setting) */
1863 if (server.maxmemory) freeMemoryIfNeeded();
1864
e8a74421 1865 /* Handle the multi bulk command type. This is an alternative protocol
1866 * supported by Redis in order to receive commands that are composed of
1867 * multiple binary-safe "bulk" arguments. The latency of processing is
1868 * a bit higher but this allows things like multi-sets, so if this
1869 * protocol is used only for MSET and similar commands this is a big win. */
1870 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1871 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1872 if (c->multibulk <= 0) {
1873 resetClient(c);
1874 return 1;
1875 } else {
1876 decrRefCount(c->argv[c->argc-1]);
1877 c->argc--;
1878 return 1;
1879 }
1880 } else if (c->multibulk) {
1881 if (c->bulklen == -1) {
1882 if (((char*)c->argv[0]->ptr)[0] != '$') {
1883 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1884 resetClient(c);
1885 return 1;
1886 } else {
1887 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1888 decrRefCount(c->argv[0]);
1889 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1890 c->argc--;
1891 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1892 resetClient(c);
1893 return 1;
1894 }
1895 c->argc--;
1896 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1897 return 1;
1898 }
1899 } else {
1900 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1901 c->mbargv[c->mbargc] = c->argv[0];
1902 c->mbargc++;
1903 c->argc--;
1904 c->multibulk--;
1905 if (c->multibulk == 0) {
1906 robj **auxargv;
1907 int auxargc;
1908
1909 /* Here we need to swap the multi-bulk argc/argv with the
1910 * normal argc/argv of the client structure. */
1911 auxargv = c->argv;
1912 c->argv = c->mbargv;
1913 c->mbargv = auxargv;
1914
1915 auxargc = c->argc;
1916 c->argc = c->mbargc;
1917 c->mbargc = auxargc;
1918
1919 /* We need to set bulklen to something different than -1
1920 * in order for the code below to process the command without
1921 * to try to read the last argument of a bulk command as
1922 * a special argument. */
1923 c->bulklen = 0;
1924 /* continue below and process the command */
1925 } else {
1926 c->bulklen = -1;
1927 return 1;
1928 }
1929 }
1930 }
1931 /* -- end of multi bulk commands processing -- */
1932
ed9b544e 1933 /* The QUIT command is handled as a special case. Normal command
1934 * procs are unable to close the client connection safely */
bb0b03a3 1935 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 1936 freeClient(c);
1937 return 0;
1938 }
1939 cmd = lookupCommand(c->argv[0]->ptr);
1940 if (!cmd) {
2c14807b 1941 addReplySds(c,
1942 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
1943 (char*)c->argv[0]->ptr));
ed9b544e 1944 resetClient(c);
1945 return 1;
1946 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1947 (c->argc < -cmd->arity)) {
454d4e43 1948 addReplySds(c,
1949 sdscatprintf(sdsempty(),
1950 "-ERR wrong number of arguments for '%s' command\r\n",
1951 cmd->name));
ed9b544e 1952 resetClient(c);
1953 return 1;
3fd78bcd 1954 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1955 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1956 resetClient(c);
1957 return 1;
ed9b544e 1958 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1959 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1960
1961 decrRefCount(c->argv[c->argc-1]);
1962 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1963 c->argc--;
1964 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1965 resetClient(c);
1966 return 1;
1967 }
1968 c->argc--;
1969 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1970 /* It is possible that the bulk read is already in the
8d0490e7 1971 * buffer. Check this condition and handle it accordingly.
1972 * This is just a fast path, alternative to call processInputBuffer().
1973 * It's a good idea since the code is small and this condition
1974 * happens most of the times. */
ed9b544e 1975 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1976 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1977 c->argc++;
1978 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1979 } else {
1980 return 1;
1981 }
1982 }
10c43610 1983 /* Let's try to share objects on the command arguments vector */
1984 if (server.shareobjects) {
1985 int j;
1986 for(j = 1; j < c->argc; j++)
1987 c->argv[j] = tryObjectSharing(c->argv[j]);
1988 }
942a3961 1989 /* Let's try to encode the bulk object to save space. */
1990 if (cmd->flags & REDIS_CMD_BULK)
1991 tryObjectEncoding(c->argv[c->argc-1]);
1992
e63943a4 1993 /* Check if the user is authenticated */
1994 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1995 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1996 resetClient(c);
1997 return 1;
1998 }
1999
ed9b544e 2000 /* Exec the command */
6e469882 2001 if (c->flags & REDIS_MULTI && cmd->proc != execCommand) {
2002 queueMultiCommand(c,cmd);
2003 addReply(c,shared.queued);
2004 } else {
2005 call(c,cmd);
2006 }
ed9b544e 2007
2008 /* Prepare the client for the next command */
2009 if (c->flags & REDIS_CLOSE) {
2010 freeClient(c);
2011 return 0;
2012 }
2013 resetClient(c);
2014 return 1;
2015}
2016
87eca727 2017static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6208b3a7 2018 listNode *ln;
ed9b544e 2019 int outc = 0, j;
93ea3759 2020 robj **outv;
2021 /* (args*2)+1 is enough room for args, spaces, newlines */
2022 robj *static_outv[REDIS_STATIC_ARGS*2+1];
2023
2024 if (argc <= REDIS_STATIC_ARGS) {
2025 outv = static_outv;
2026 } else {
2027 outv = zmalloc(sizeof(robj*)*(argc*2+1));
93ea3759 2028 }
ed9b544e 2029
2030 for (j = 0; j < argc; j++) {
2031 if (j != 0) outv[outc++] = shared.space;
2032 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
2033 robj *lenobj;
2034
2035 lenobj = createObject(REDIS_STRING,
682ac724 2036 sdscatprintf(sdsempty(),"%lu\r\n",
83c6a618 2037 (unsigned long) stringObjectLen(argv[j])));
ed9b544e 2038 lenobj->refcount = 0;
2039 outv[outc++] = lenobj;
2040 }
2041 outv[outc++] = argv[j];
2042 }
2043 outv[outc++] = shared.crlf;
2044
40d224a9 2045 /* Increment all the refcounts at start and decrement at end in order to
2046 * be sure to free objects if there is no slave in a replication state
2047 * able to be feed with commands */
2048 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
6208b3a7 2049 listRewind(slaves);
2050 while((ln = listYield(slaves))) {
ed9b544e 2051 redisClient *slave = ln->value;
40d224a9 2052
2053 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 2054 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 2055
2056 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 2057 if (slave->slaveseldb != dictid) {
2058 robj *selectcmd;
2059
2060 switch(dictid) {
2061 case 0: selectcmd = shared.select0; break;
2062 case 1: selectcmd = shared.select1; break;
2063 case 2: selectcmd = shared.select2; break;
2064 case 3: selectcmd = shared.select3; break;
2065 case 4: selectcmd = shared.select4; break;
2066 case 5: selectcmd = shared.select5; break;
2067 case 6: selectcmd = shared.select6; break;
2068 case 7: selectcmd = shared.select7; break;
2069 case 8: selectcmd = shared.select8; break;
2070 case 9: selectcmd = shared.select9; break;
2071 default:
2072 selectcmd = createObject(REDIS_STRING,
2073 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
2074 selectcmd->refcount = 0;
2075 break;
2076 }
2077 addReply(slave,selectcmd);
2078 slave->slaveseldb = dictid;
2079 }
2080 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 2081 }
40d224a9 2082 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 2083 if (outv != static_outv) zfree(outv);
ed9b544e 2084}
2085
638e42ac 2086static void processInputBuffer(redisClient *c) {
ed9b544e 2087again:
4409877e 2088 /* Before to process the input buffer, make sure the client is not
2089 * waitig for a blocking operation such as BLPOP. Note that the first
2090 * iteration the client is never blocked, otherwise the processInputBuffer
2091 * would not be called at all, but after the execution of the first commands
2092 * in the input buffer the client may be blocked, and the "goto again"
2093 * will try to reiterate. The following line will make it return asap. */
92f8e882 2094 if (c->flags & REDIS_BLOCKED || c->flags & REDIS_IO_WAIT) return;
ed9b544e 2095 if (c->bulklen == -1) {
2096 /* Read the first line of the query */
2097 char *p = strchr(c->querybuf,'\n');
2098 size_t querylen;
644fafa3 2099
ed9b544e 2100 if (p) {
2101 sds query, *argv;
2102 int argc, j;
2103
2104 query = c->querybuf;
2105 c->querybuf = sdsempty();
2106 querylen = 1+(p-(query));
2107 if (sdslen(query) > querylen) {
2108 /* leave data after the first line of the query in the buffer */
2109 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
2110 }
2111 *p = '\0'; /* remove "\n" */
2112 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
2113 sdsupdatelen(query);
2114
2115 /* Now we can split the query in arguments */
ed9b544e 2116 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 2117 sdsfree(query);
2118
2119 if (c->argv) zfree(c->argv);
2120 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 2121
2122 for (j = 0; j < argc; j++) {
ed9b544e 2123 if (sdslen(argv[j])) {
2124 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
2125 c->argc++;
2126 } else {
2127 sdsfree(argv[j]);
2128 }
2129 }
2130 zfree(argv);
7c49733c 2131 if (c->argc) {
2132 /* Execute the command. If the client is still valid
2133 * after processCommand() return and there is something
2134 * on the query buffer try to process the next command. */
2135 if (processCommand(c) && sdslen(c->querybuf)) goto again;
2136 } else {
2137 /* Nothing to process, argc == 0. Just process the query
2138 * buffer if it's not empty or return to the caller */
2139 if (sdslen(c->querybuf)) goto again;
2140 }
ed9b544e 2141 return;
644fafa3 2142 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
f870935d 2143 redisLog(REDIS_VERBOSE, "Client protocol error");
ed9b544e 2144 freeClient(c);
2145 return;
2146 }
2147 } else {
2148 /* Bulk read handling. Note that if we are at this point
2149 the client already sent a command terminated with a newline,
2150 we are reading the bulk data that is actually the last
2151 argument of the command. */
2152 int qbl = sdslen(c->querybuf);
2153
2154 if (c->bulklen <= qbl) {
2155 /* Copy everything but the final CRLF as final argument */
2156 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
2157 c->argc++;
2158 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 2159 /* Process the command. If the client is still valid after
2160 * the processing and there is more data in the buffer
2161 * try to parse it. */
2162 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 2163 return;
2164 }
2165 }
2166}
2167
638e42ac 2168static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
2169 redisClient *c = (redisClient*) privdata;
2170 char buf[REDIS_IOBUF_LEN];
2171 int nread;
2172 REDIS_NOTUSED(el);
2173 REDIS_NOTUSED(mask);
2174
2175 nread = read(fd, buf, REDIS_IOBUF_LEN);
2176 if (nread == -1) {
2177 if (errno == EAGAIN) {
2178 nread = 0;
2179 } else {
f870935d 2180 redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
638e42ac 2181 freeClient(c);
2182 return;
2183 }
2184 } else if (nread == 0) {
f870935d 2185 redisLog(REDIS_VERBOSE, "Client closed connection");
638e42ac 2186 freeClient(c);
2187 return;
2188 }
2189 if (nread) {
2190 c->querybuf = sdscatlen(c->querybuf, buf, nread);
2191 c->lastinteraction = time(NULL);
2192 } else {
2193 return;
2194 }
2195 processInputBuffer(c);
2196}
2197
ed9b544e 2198static int selectDb(redisClient *c, int id) {
2199 if (id < 0 || id >= server.dbnum)
2200 return REDIS_ERR;
3305306f 2201 c->db = &server.db[id];
ed9b544e 2202 return REDIS_OK;
2203}
2204
40d224a9 2205static void *dupClientReplyValue(void *o) {
2206 incrRefCount((robj*)o);
2207 return 0;
2208}
2209
ed9b544e 2210static redisClient *createClient(int fd) {
2211 redisClient *c = zmalloc(sizeof(*c));
2212
2213 anetNonBlock(NULL,fd);
2214 anetTcpNoDelay(NULL,fd);
2215 if (!c) return NULL;
2216 selectDb(c,0);
2217 c->fd = fd;
2218 c->querybuf = sdsempty();
2219 c->argc = 0;
93ea3759 2220 c->argv = NULL;
ed9b544e 2221 c->bulklen = -1;
e8a74421 2222 c->multibulk = 0;
2223 c->mbargc = 0;
2224 c->mbargv = NULL;
ed9b544e 2225 c->sentlen = 0;
2226 c->flags = 0;
2227 c->lastinteraction = time(NULL);
abcb223e 2228 c->authenticated = 0;
40d224a9 2229 c->replstate = REDIS_REPL_NONE;
6b47e12e 2230 c->reply = listCreate();
ed9b544e 2231 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 2232 listSetDupMethod(c->reply,dupClientReplyValue);
92f8e882 2233 c->blockingkeys = NULL;
2234 c->blockingkeysnum = 0;
2235 c->io_keys = listCreate();
2236 listSetFreeMethod(c->io_keys,decrRefCount);
ed9b544e 2237 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
266373b2 2238 readQueryFromClient, c) == AE_ERR) {
ed9b544e 2239 freeClient(c);
2240 return NULL;
2241 }
6b47e12e 2242 listAddNodeTail(server.clients,c);
6e469882 2243 initClientMultiState(c);
ed9b544e 2244 return c;
2245}
2246
2247static void addReply(redisClient *c, robj *obj) {
2248 if (listLength(c->reply) == 0 &&
6208b3a7 2249 (c->replstate == REDIS_REPL_NONE ||
2250 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 2251 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
266373b2 2252 sendReplyToClient, c) == AE_ERR) return;
e3cadb8a 2253
2254 if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) {
2255 obj = dupStringObject(obj);
2256 obj->refcount = 0; /* getDecodedObject() will increment the refcount */
2257 }
9d65a1bb 2258 listAddNodeTail(c->reply,getDecodedObject(obj));
ed9b544e 2259}
2260
2261static void addReplySds(redisClient *c, sds s) {
2262 robj *o = createObject(REDIS_STRING,s);
2263 addReply(c,o);
2264 decrRefCount(o);
2265}
2266
e2665397 2267static void addReplyDouble(redisClient *c, double d) {
2268 char buf[128];
2269
2270 snprintf(buf,sizeof(buf),"%.17g",d);
682ac724 2271 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
83c6a618 2272 (unsigned long) strlen(buf),buf));
e2665397 2273}
2274
942a3961 2275static void addReplyBulkLen(redisClient *c, robj *obj) {
2276 size_t len;
2277
2278 if (obj->encoding == REDIS_ENCODING_RAW) {
2279 len = sdslen(obj->ptr);
2280 } else {
2281 long n = (long)obj->ptr;
2282
e054afda 2283 /* Compute how many bytes will take this integer as a radix 10 string */
942a3961 2284 len = 1;
2285 if (n < 0) {
2286 len++;
2287 n = -n;
2288 }
2289 while((n = n/10) != 0) {
2290 len++;
2291 }
2292 }
83c6a618 2293 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len));
942a3961 2294}
2295
ed9b544e 2296static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
2297 int cport, cfd;
2298 char cip[128];
285add55 2299 redisClient *c;
ed9b544e 2300 REDIS_NOTUSED(el);
2301 REDIS_NOTUSED(mask);
2302 REDIS_NOTUSED(privdata);
2303
2304 cfd = anetAccept(server.neterr, fd, cip, &cport);
2305 if (cfd == AE_ERR) {
f870935d 2306 redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
ed9b544e 2307 return;
2308 }
f870935d 2309 redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
285add55 2310 if ((c = createClient(cfd)) == NULL) {
ed9b544e 2311 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
2312 close(cfd); /* May be already closed, just ingore errors */
2313 return;
2314 }
285add55 2315 /* If maxclient directive is set and this is one client more... close the
2316 * connection. Note that we create the client instead to check before
2317 * for this condition, since now the socket is already set in nonblocking
2318 * mode and we can send an error for free using the Kernel I/O */
2319 if (server.maxclients && listLength(server.clients) > server.maxclients) {
2320 char *err = "-ERR max number of clients reached\r\n";
2321
2322 /* That's a best effort error message, don't check write errors */
fee803ba 2323 if (write(c->fd,err,strlen(err)) == -1) {
2324 /* Nothing to do, Just to avoid the warning... */
2325 }
285add55 2326 freeClient(c);
2327 return;
2328 }
ed9b544e 2329 server.stat_numconnections++;
2330}
2331
2332/* ======================= Redis objects implementation ===================== */
2333
2334static robj *createObject(int type, void *ptr) {
2335 robj *o;
2336
2337 if (listLength(server.objfreelist)) {
2338 listNode *head = listFirst(server.objfreelist);
2339 o = listNodeValue(head);
2340 listDelNode(server.objfreelist,head);
2341 } else {
75680a3c 2342 if (server.vm_enabled) {
2343 o = zmalloc(sizeof(*o));
2344 } else {
2345 o = zmalloc(sizeof(*o)-sizeof(struct redisObjectVM));
2346 }
ed9b544e 2347 }
ed9b544e 2348 o->type = type;
942a3961 2349 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 2350 o->ptr = ptr;
2351 o->refcount = 1;
3a66edc7 2352 if (server.vm_enabled) {
2353 o->vm.atime = server.unixtime;
2354 o->storage = REDIS_VM_MEMORY;
2355 }
ed9b544e 2356 return o;
2357}
2358
2359static robj *createStringObject(char *ptr, size_t len) {
2360 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
2361}
2362
4ef8de8a 2363static robj *dupStringObject(robj *o) {
2364 return createStringObject(o->ptr,sdslen(o->ptr));
2365}
2366
ed9b544e 2367static robj *createListObject(void) {
2368 list *l = listCreate();
2369
ed9b544e 2370 listSetFreeMethod(l,decrRefCount);
2371 return createObject(REDIS_LIST,l);
2372}
2373
2374static robj *createSetObject(void) {
2375 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 2376 return createObject(REDIS_SET,d);
2377}
2378
1812e024 2379static robj *createZsetObject(void) {
6b47e12e 2380 zset *zs = zmalloc(sizeof(*zs));
2381
2382 zs->dict = dictCreate(&zsetDictType,NULL);
2383 zs->zsl = zslCreate();
2384 return createObject(REDIS_ZSET,zs);
1812e024 2385}
2386
ed9b544e 2387static void freeStringObject(robj *o) {
942a3961 2388 if (o->encoding == REDIS_ENCODING_RAW) {
2389 sdsfree(o->ptr);
2390 }
ed9b544e 2391}
2392
2393static void freeListObject(robj *o) {
2394 listRelease((list*) o->ptr);
2395}
2396
2397static void freeSetObject(robj *o) {
2398 dictRelease((dict*) o->ptr);
2399}
2400
fd8ccf44 2401static void freeZsetObject(robj *o) {
2402 zset *zs = o->ptr;
2403
2404 dictRelease(zs->dict);
2405 zslFree(zs->zsl);
2406 zfree(zs);
2407}
2408
ed9b544e 2409static void freeHashObject(robj *o) {
2410 dictRelease((dict*) o->ptr);
2411}
2412
2413static void incrRefCount(robj *o) {
f2b8ab34 2414 redisAssert(!server.vm_enabled || o->storage == REDIS_VM_MEMORY);
ed9b544e 2415 o->refcount++;
2416}
2417
2418static void decrRefCount(void *obj) {
2419 robj *o = obj;
94754ccc 2420
a35ddf12 2421 /* REDIS_VM_SWAPPED */
2422 if (server.vm_enabled && o->storage == REDIS_VM_SWAPPED) {
f2b8ab34 2423 redisAssert(o->refcount == 1);
2424 redisAssert(o->type == REDIS_STRING);
a35ddf12 2425 freeStringObject(o);
2426 vmMarkPagesFree(o->vm.page,o->vm.usedpages);
2427 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2428 !listAddNodeHead(server.objfreelist,o))
2429 zfree(o);
7d98e08c 2430 server.vm_stats_swapped_objects--;
a35ddf12 2431 return;
2432 }
2433 /* REDIS_VM_MEMORY */
ed9b544e 2434 if (--(o->refcount) == 0) {
2435 switch(o->type) {
2436 case REDIS_STRING: freeStringObject(o); break;
2437 case REDIS_LIST: freeListObject(o); break;
2438 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 2439 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 2440 case REDIS_HASH: freeHashObject(o); break;
dfc5e96c 2441 default: redisAssert(0 != 0); break;
ed9b544e 2442 }
2443 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2444 !listAddNodeHead(server.objfreelist,o))
2445 zfree(o);
2446 }
2447}
2448
942a3961 2449static robj *lookupKey(redisDb *db, robj *key) {
2450 dictEntry *de = dictFind(db->dict,key);
3a66edc7 2451 if (de) {
55cf8433 2452 robj *key = dictGetEntryKey(de);
2453 robj *val = dictGetEntryVal(de);
3a66edc7 2454
55cf8433 2455 if (server.vm_enabled) {
2456 if (key->storage == REDIS_VM_MEMORY) {
2457 /* Update the access time of the key for the aging algorithm. */
2458 key->vm.atime = server.unixtime;
2459 } else {
2460 /* Our value was swapped on disk. Bring it at home. */
f2b8ab34 2461 redisAssert(val == NULL);
55cf8433 2462 val = vmLoadObject(key);
2463 dictGetEntryVal(de) = val;
2464 }
2465 }
2466 return val;
3a66edc7 2467 } else {
2468 return NULL;
2469 }
942a3961 2470}
2471
2472static robj *lookupKeyRead(redisDb *db, robj *key) {
2473 expireIfNeeded(db,key);
2474 return lookupKey(db,key);
2475}
2476
2477static robj *lookupKeyWrite(redisDb *db, robj *key) {
2478 deleteIfVolatile(db,key);
2479 return lookupKey(db,key);
2480}
2481
2482static int deleteKey(redisDb *db, robj *key) {
2483 int retval;
2484
2485 /* We need to protect key from destruction: after the first dictDelete()
2486 * it may happen that 'key' is no longer valid if we don't increment
2487 * it's count. This may happen when we get the object reference directly
2488 * from the hash table with dictRandomKey() or dict iterators */
2489 incrRefCount(key);
2490 if (dictSize(db->expires)) dictDelete(db->expires,key);
2491 retval = dictDelete(db->dict,key);
2492 decrRefCount(key);
2493
2494 return retval == DICT_OK;
2495}
2496
10c43610 2497/* Try to share an object against the shared objects pool */
2498static robj *tryObjectSharing(robj *o) {
2499 struct dictEntry *de;
2500 unsigned long c;
2501
3305306f 2502 if (o == NULL || server.shareobjects == 0) return o;
10c43610 2503
dfc5e96c 2504 redisAssert(o->type == REDIS_STRING);
10c43610 2505 de = dictFind(server.sharingpool,o);
2506 if (de) {
2507 robj *shared = dictGetEntryKey(de);
2508
2509 c = ((unsigned long) dictGetEntryVal(de))+1;
2510 dictGetEntryVal(de) = (void*) c;
2511 incrRefCount(shared);
2512 decrRefCount(o);
2513 return shared;
2514 } else {
2515 /* Here we are using a stream algorihtm: Every time an object is
2516 * shared we increment its count, everytime there is a miss we
2517 * recrement the counter of a random object. If this object reaches
2518 * zero we remove the object and put the current object instead. */
3305306f 2519 if (dictSize(server.sharingpool) >=
10c43610 2520 server.sharingpoolsize) {
2521 de = dictGetRandomKey(server.sharingpool);
dfc5e96c 2522 redisAssert(de != NULL);
10c43610 2523 c = ((unsigned long) dictGetEntryVal(de))-1;
2524 dictGetEntryVal(de) = (void*) c;
2525 if (c == 0) {
2526 dictDelete(server.sharingpool,de->key);
2527 }
2528 } else {
2529 c = 0; /* If the pool is empty we want to add this object */
2530 }
2531 if (c == 0) {
2532 int retval;
2533
2534 retval = dictAdd(server.sharingpool,o,(void*)1);
dfc5e96c 2535 redisAssert(retval == DICT_OK);
10c43610 2536 incrRefCount(o);
2537 }
2538 return o;
2539 }
2540}
2541
724a51b1 2542/* Check if the nul-terminated string 's' can be represented by a long
2543 * (that is, is a number that fits into long without any other space or
2544 * character before or after the digits).
2545 *
2546 * If so, the function returns REDIS_OK and *longval is set to the value
2547 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 2548static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 2549 char buf[32], *endptr;
2550 long value;
2551 int slen;
2552
2553 value = strtol(s, &endptr, 10);
2554 if (endptr[0] != '\0') return REDIS_ERR;
2555 slen = snprintf(buf,32,"%ld",value);
2556
2557 /* If the number converted back into a string is not identical
2558 * then it's not possible to encode the string as integer */
f69f2cba 2559 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 2560 if (longval) *longval = value;
2561 return REDIS_OK;
2562}
2563
942a3961 2564/* Try to encode a string object in order to save space */
2565static int tryObjectEncoding(robj *o) {
2566 long value;
942a3961 2567 sds s = o->ptr;
3305306f 2568
942a3961 2569 if (o->encoding != REDIS_ENCODING_RAW)
2570 return REDIS_ERR; /* Already encoded */
3305306f 2571
942a3961 2572 /* It's not save to encode shared objects: shared objects can be shared
2573 * everywhere in the "object space" of Redis. Encoded objects can only
2574 * appear as "values" (and not, for instance, as keys) */
2575 if (o->refcount > 1) return REDIS_ERR;
3305306f 2576
942a3961 2577 /* Currently we try to encode only strings */
dfc5e96c 2578 redisAssert(o->type == REDIS_STRING);
94754ccc 2579
724a51b1 2580 /* Check if we can represent this string as a long integer */
2581 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
942a3961 2582
2583 /* Ok, this object can be encoded */
2584 o->encoding = REDIS_ENCODING_INT;
2585 sdsfree(o->ptr);
2586 o->ptr = (void*) value;
2587 return REDIS_OK;
2588}
2589
9d65a1bb 2590/* Get a decoded version of an encoded object (returned as a new object).
2591 * If the object is already raw-encoded just increment the ref count. */
2592static robj *getDecodedObject(robj *o) {
942a3961 2593 robj *dec;
2594
9d65a1bb 2595 if (o->encoding == REDIS_ENCODING_RAW) {
2596 incrRefCount(o);
2597 return o;
2598 }
942a3961 2599 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2600 char buf[32];
2601
2602 snprintf(buf,32,"%ld",(long)o->ptr);
2603 dec = createStringObject(buf,strlen(buf));
2604 return dec;
2605 } else {
dfc5e96c 2606 redisAssert(1 != 1);
942a3961 2607 }
3305306f 2608}
2609
d7f43c08 2610/* Compare two string objects via strcmp() or alike.
2611 * Note that the objects may be integer-encoded. In such a case we
2612 * use snprintf() to get a string representation of the numbers on the stack
1fd9bc8a 2613 * and compare the strings, it's much faster than calling getDecodedObject().
2614 *
2615 * Important note: if objects are not integer encoded, but binary-safe strings,
2616 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2617 * binary safe. */
724a51b1 2618static int compareStringObjects(robj *a, robj *b) {
dfc5e96c 2619 redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 2620 char bufa[128], bufb[128], *astr, *bstr;
2621 int bothsds = 1;
724a51b1 2622
e197b441 2623 if (a == b) return 0;
d7f43c08 2624 if (a->encoding != REDIS_ENCODING_RAW) {
2625 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2626 astr = bufa;
2627 bothsds = 0;
724a51b1 2628 } else {
d7f43c08 2629 astr = a->ptr;
724a51b1 2630 }
d7f43c08 2631 if (b->encoding != REDIS_ENCODING_RAW) {
2632 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2633 bstr = bufb;
2634 bothsds = 0;
2635 } else {
2636 bstr = b->ptr;
2637 }
2638 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 2639}
2640
0ea663ea 2641static size_t stringObjectLen(robj *o) {
dfc5e96c 2642 redisAssert(o->type == REDIS_STRING);
0ea663ea 2643 if (o->encoding == REDIS_ENCODING_RAW) {
2644 return sdslen(o->ptr);
2645 } else {
2646 char buf[32];
2647
2648 return snprintf(buf,32,"%ld",(long)o->ptr);
2649 }
2650}
2651
06233c45 2652/*============================ RDB saving/loading =========================== */
ed9b544e 2653
f78fd11b 2654static int rdbSaveType(FILE *fp, unsigned char type) {
2655 if (fwrite(&type,1,1,fp) == 0) return -1;
2656 return 0;
2657}
2658
bb32ede5 2659static int rdbSaveTime(FILE *fp, time_t t) {
2660 int32_t t32 = (int32_t) t;
2661 if (fwrite(&t32,4,1,fp) == 0) return -1;
2662 return 0;
2663}
2664
e3566d4b 2665/* check rdbLoadLen() comments for more info */
f78fd11b 2666static int rdbSaveLen(FILE *fp, uint32_t len) {
2667 unsigned char buf[2];
2668
2669 if (len < (1<<6)) {
2670 /* Save a 6 bit len */
10c43610 2671 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 2672 if (fwrite(buf,1,1,fp) == 0) return -1;
2673 } else if (len < (1<<14)) {
2674 /* Save a 14 bit len */
10c43610 2675 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 2676 buf[1] = len&0xFF;
17be1a4a 2677 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 2678 } else {
2679 /* Save a 32 bit len */
10c43610 2680 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 2681 if (fwrite(buf,1,1,fp) == 0) return -1;
2682 len = htonl(len);
2683 if (fwrite(&len,4,1,fp) == 0) return -1;
2684 }
2685 return 0;
2686}
2687
e3566d4b 2688/* String objects in the form "2391" "-100" without any space and with a
2689 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2690 * encoded as integers to save space */
56906eef 2691static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
e3566d4b 2692 long long value;
2693 char *endptr, buf[32];
2694
2695 /* Check if it's possible to encode this value as a number */
2696 value = strtoll(s, &endptr, 10);
2697 if (endptr[0] != '\0') return 0;
2698 snprintf(buf,32,"%lld",value);
2699
2700 /* If the number converted back into a string is not identical
2701 * then it's not possible to encode the string as integer */
2702 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2703
2704 /* Finally check if it fits in our ranges */
2705 if (value >= -(1<<7) && value <= (1<<7)-1) {
2706 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2707 enc[1] = value&0xFF;
2708 return 2;
2709 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2710 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2711 enc[1] = value&0xFF;
2712 enc[2] = (value>>8)&0xFF;
2713 return 3;
2714 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2715 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2716 enc[1] = value&0xFF;
2717 enc[2] = (value>>8)&0xFF;
2718 enc[3] = (value>>16)&0xFF;
2719 enc[4] = (value>>24)&0xFF;
2720 return 5;
2721 } else {
2722 return 0;
2723 }
2724}
2725
774e3047 2726static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2727 unsigned int comprlen, outlen;
2728 unsigned char byte;
2729 void *out;
2730
2731 /* We require at least four bytes compression for this to be worth it */
2732 outlen = sdslen(obj->ptr)-4;
2733 if (outlen <= 0) return 0;
3a2694c4 2734 if ((out = zmalloc(outlen+1)) == NULL) return 0;
774e3047 2735 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2736 if (comprlen == 0) {
88e85998 2737 zfree(out);
774e3047 2738 return 0;
2739 }
2740 /* Data compressed! Let's save it on disk */
2741 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2742 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2743 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2744 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2745 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 2746 zfree(out);
774e3047 2747 return comprlen;
2748
2749writeerr:
88e85998 2750 zfree(out);
774e3047 2751 return -1;
2752}
2753
e3566d4b 2754/* Save a string objet as [len][data] on disk. If the object is a string
2755 * representation of an integer value we try to safe it in a special form */
942a3961 2756static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2757 size_t len;
e3566d4b 2758 int enclen;
10c43610 2759
942a3961 2760 len = sdslen(obj->ptr);
2761
774e3047 2762 /* Try integer encoding */
e3566d4b 2763 if (len <= 11) {
2764 unsigned char buf[5];
2765 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2766 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2767 return 0;
2768 }
2769 }
774e3047 2770
2771 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 2772 * aaaaaaaaaaaaaaaaaa so skip it */
121f70cf 2773 if (server.rdbcompression && len > 20) {
774e3047 2774 int retval;
2775
2776 retval = rdbSaveLzfStringObject(fp,obj);
2777 if (retval == -1) return -1;
2778 if (retval > 0) return 0;
2779 /* retval == 0 means data can't be compressed, save the old way */
2780 }
2781
2782 /* Store verbatim */
10c43610 2783 if (rdbSaveLen(fp,len) == -1) return -1;
2784 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2785 return 0;
2786}
2787
942a3961 2788/* Like rdbSaveStringObjectRaw() but handle encoded objects */
2789static int rdbSaveStringObject(FILE *fp, robj *obj) {
2790 int retval;
942a3961 2791
9d65a1bb 2792 obj = getDecodedObject(obj);
2793 retval = rdbSaveStringObjectRaw(fp,obj);
2794 decrRefCount(obj);
2795 return retval;
942a3961 2796}
2797
a7866db6 2798/* Save a double value. Doubles are saved as strings prefixed by an unsigned
2799 * 8 bit integer specifing the length of the representation.
2800 * This 8 bit integer has special values in order to specify the following
2801 * conditions:
2802 * 253: not a number
2803 * 254: + inf
2804 * 255: - inf
2805 */
2806static int rdbSaveDoubleValue(FILE *fp, double val) {
2807 unsigned char buf[128];
2808 int len;
2809
2810 if (isnan(val)) {
2811 buf[0] = 253;
2812 len = 1;
2813 } else if (!isfinite(val)) {
2814 len = 1;
2815 buf[0] = (val < 0) ? 255 : 254;
2816 } else {
eaa256ad 2817 snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
6c446631 2818 buf[0] = strlen((char*)buf+1);
a7866db6 2819 len = buf[0]+1;
2820 }
2821 if (fwrite(buf,len,1,fp) == 0) return -1;
2822 return 0;
2823}
2824
06233c45 2825/* Save a Redis object. */
2826static int rdbSaveObject(FILE *fp, robj *o) {
2827 if (o->type == REDIS_STRING) {
2828 /* Save a string value */
2829 if (rdbSaveStringObject(fp,o) == -1) return -1;
2830 } else if (o->type == REDIS_LIST) {
2831 /* Save a list value */
2832 list *list = o->ptr;
2833 listNode *ln;
2834
2835 listRewind(list);
2836 if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
2837 while((ln = listYield(list))) {
2838 robj *eleobj = listNodeValue(ln);
2839
2840 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
2841 }
2842 } else if (o->type == REDIS_SET) {
2843 /* Save a set value */
2844 dict *set = o->ptr;
2845 dictIterator *di = dictGetIterator(set);
2846 dictEntry *de;
2847
2848 if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
2849 while((de = dictNext(di)) != NULL) {
2850 robj *eleobj = dictGetEntryKey(de);
2851
2852 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
2853 }
2854 dictReleaseIterator(di);
2855 } else if (o->type == REDIS_ZSET) {
2856 /* Save a set value */
2857 zset *zs = o->ptr;
2858 dictIterator *di = dictGetIterator(zs->dict);
2859 dictEntry *de;
2860
2861 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
2862 while((de = dictNext(di)) != NULL) {
2863 robj *eleobj = dictGetEntryKey(de);
2864 double *score = dictGetEntryVal(de);
2865
2866 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
2867 if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
2868 }
2869 dictReleaseIterator(di);
2870 } else {
2871 redisAssert(0 != 0);
2872 }
2873 return 0;
2874}
2875
2876/* Return the length the object will have on disk if saved with
2877 * the rdbSaveObject() function. Currently we use a trick to get
2878 * this length with very little changes to the code. In the future
2879 * we could switch to a faster solution. */
2880static off_t rdbSavedObjectLen(robj *o) {
2881 static FILE *fp = NULL;
2882
2883 if (fp == NULL) fp = fopen("/dev/null","w");
2884 assert(fp != NULL);
2885
2886 rewind(fp);
2887 assert(rdbSaveObject(fp,o) != 1);
2888 return ftello(fp);
2889}
2890
06224fec 2891/* Return the number of pages required to save this object in the swap file */
2892static off_t rdbSavedObjectPages(robj *o) {
2893 off_t bytes = rdbSavedObjectLen(o);
2894
2895 return (bytes+(server.vm_page_size-1))/server.vm_page_size;
2896}
2897
ed9b544e 2898/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 2899static int rdbSave(char *filename) {
ed9b544e 2900 dictIterator *di = NULL;
2901 dictEntry *de;
ed9b544e 2902 FILE *fp;
2903 char tmpfile[256];
2904 int j;
bb32ede5 2905 time_t now = time(NULL);
ed9b544e 2906
a3b21203 2907 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 2908 fp = fopen(tmpfile,"w");
2909 if (!fp) {
2910 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2911 return REDIS_ERR;
2912 }
f78fd11b 2913 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 2914 for (j = 0; j < server.dbnum; j++) {
bb32ede5 2915 redisDb *db = server.db+j;
2916 dict *d = db->dict;
3305306f 2917 if (dictSize(d) == 0) continue;
ed9b544e 2918 di = dictGetIterator(d);
2919 if (!di) {
2920 fclose(fp);
2921 return REDIS_ERR;
2922 }
2923
2924 /* Write the SELECT DB opcode */
f78fd11b 2925 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2926 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 2927
2928 /* Iterate this DB writing every entry */
2929 while((de = dictNext(di)) != NULL) {
2930 robj *key = dictGetEntryKey(de);
2931 robj *o = dictGetEntryVal(de);
bb32ede5 2932 time_t expiretime = getExpire(db,key);
2933
2934 /* Save the expire time */
2935 if (expiretime != -1) {
2936 /* If this key is already expired skip it */
2937 if (expiretime < now) continue;
2938 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2939 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2940 }
7e69548d 2941 /* Save the key and associated value. This requires special
2942 * handling if the value is swapped out. */
38823f08 2943 if (!server.vm_enabled || key->storage == REDIS_VM_MEMORY) {
7e69548d 2944 /* Save type, key, value */
2945 if (rdbSaveType(fp,o->type) == -1) goto werr;
2946 if (rdbSaveStringObject(fp,key) == -1) goto werr;
2947 if (rdbSaveObject(fp,o) == -1) goto werr;
2948 } else {
2949 robj *po, *newkey;
2950 /* Get a preview of the object in memory */
2951 po = vmPreviewObject(key);
2952 /* Also duplicate the key object, to pass around a standard
2953 * string object. */
2954 newkey = dupStringObject(key);
2955 /* Save type, key, value */
2956 if (rdbSaveType(fp,key->vtype) == -1) goto werr;
2957 if (rdbSaveStringObject(fp,newkey) == -1) goto werr;
2958 if (rdbSaveObject(fp,po) == -1) goto werr;
2959 /* Remove the loaded object from memory */
2960 decrRefCount(po);
2961 decrRefCount(newkey);
2962 }
ed9b544e 2963 }
2964 dictReleaseIterator(di);
2965 }
2966 /* EOF opcode */
f78fd11b 2967 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2968
2969 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 2970 fflush(fp);
2971 fsync(fileno(fp));
2972 fclose(fp);
2973
2974 /* Use RENAME to make sure the DB file is changed atomically only
2975 * if the generate DB file is ok. */
2976 if (rename(tmpfile,filename) == -1) {
325d1eb4 2977 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 2978 unlink(tmpfile);
2979 return REDIS_ERR;
2980 }
2981 redisLog(REDIS_NOTICE,"DB saved on disk");
2982 server.dirty = 0;
2983 server.lastsave = time(NULL);
2984 return REDIS_OK;
2985
2986werr:
2987 fclose(fp);
2988 unlink(tmpfile);
2989 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2990 if (di) dictReleaseIterator(di);
2991 return REDIS_ERR;
2992}
2993
f78fd11b 2994static int rdbSaveBackground(char *filename) {
ed9b544e 2995 pid_t childpid;
2996
9d65a1bb 2997 if (server.bgsavechildpid != -1) return REDIS_ERR;
ed9b544e 2998 if ((childpid = fork()) == 0) {
2999 /* Child */
3000 close(server.fd);
f78fd11b 3001 if (rdbSave(filename) == REDIS_OK) {
ed9b544e 3002 exit(0);
3003 } else {
3004 exit(1);
3005 }
3006 } else {
3007 /* Parent */
5a7c647e 3008 if (childpid == -1) {
3009 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
3010 strerror(errno));
3011 return REDIS_ERR;
3012 }
ed9b544e 3013 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
9f3c422c 3014 server.bgsavechildpid = childpid;
ed9b544e 3015 return REDIS_OK;
3016 }
3017 return REDIS_OK; /* unreached */
3018}
3019
a3b21203 3020static void rdbRemoveTempFile(pid_t childpid) {
3021 char tmpfile[256];
3022
3023 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
3024 unlink(tmpfile);
3025}
3026
f78fd11b 3027static int rdbLoadType(FILE *fp) {
3028 unsigned char type;
7b45bfb2 3029 if (fread(&type,1,1,fp) == 0) return -1;
3030 return type;
3031}
3032
bb32ede5 3033static time_t rdbLoadTime(FILE *fp) {
3034 int32_t t32;
3035 if (fread(&t32,4,1,fp) == 0) return -1;
3036 return (time_t) t32;
3037}
3038
e3566d4b 3039/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3040 * of this file for a description of how this are stored on disk.
3041 *
3042 * isencoded is set to 1 if the readed length is not actually a length but
3043 * an "encoding type", check the above comments for more info */
c78a8ccc 3044static uint32_t rdbLoadLen(FILE *fp, int *isencoded) {
f78fd11b 3045 unsigned char buf[2];
3046 uint32_t len;
c78a8ccc 3047 int type;
f78fd11b 3048
e3566d4b 3049 if (isencoded) *isencoded = 0;
c78a8ccc 3050 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
3051 type = (buf[0]&0xC0)>>6;
3052 if (type == REDIS_RDB_6BITLEN) {
3053 /* Read a 6 bit len */
3054 return buf[0]&0x3F;
3055 } else if (type == REDIS_RDB_ENCVAL) {
3056 /* Read a 6 bit len encoding type */
3057 if (isencoded) *isencoded = 1;
3058 return buf[0]&0x3F;
3059 } else if (type == REDIS_RDB_14BITLEN) {
3060 /* Read a 14 bit len */
3061 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
3062 return ((buf[0]&0x3F)<<8)|buf[1];
3063 } else {
3064 /* Read a 32 bit len */
f78fd11b 3065 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
3066 return ntohl(len);
f78fd11b 3067 }
f78fd11b 3068}
3069
e3566d4b 3070static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
3071 unsigned char enc[4];
3072 long long val;
3073
3074 if (enctype == REDIS_RDB_ENC_INT8) {
3075 if (fread(enc,1,1,fp) == 0) return NULL;
3076 val = (signed char)enc[0];
3077 } else if (enctype == REDIS_RDB_ENC_INT16) {
3078 uint16_t v;
3079 if (fread(enc,2,1,fp) == 0) return NULL;
3080 v = enc[0]|(enc[1]<<8);
3081 val = (int16_t)v;
3082 } else if (enctype == REDIS_RDB_ENC_INT32) {
3083 uint32_t v;
3084 if (fread(enc,4,1,fp) == 0) return NULL;
3085 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
3086 val = (int32_t)v;
3087 } else {
3088 val = 0; /* anti-warning */
dfc5e96c 3089 redisAssert(0!=0);
e3566d4b 3090 }
3091 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
3092}
3093
c78a8ccc 3094static robj *rdbLoadLzfStringObject(FILE*fp) {
88e85998 3095 unsigned int len, clen;
3096 unsigned char *c = NULL;
3097 sds val = NULL;
3098
c78a8ccc 3099 if ((clen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
3100 if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
88e85998 3101 if ((c = zmalloc(clen)) == NULL) goto err;
3102 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
3103 if (fread(c,clen,1,fp) == 0) goto err;
3104 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 3105 zfree(c);
88e85998 3106 return createObject(REDIS_STRING,val);
3107err:
3108 zfree(c);
3109 sdsfree(val);
3110 return NULL;
3111}
3112
c78a8ccc 3113static robj *rdbLoadStringObject(FILE*fp) {
e3566d4b 3114 int isencoded;
3115 uint32_t len;
f78fd11b 3116 sds val;
3117
c78a8ccc 3118 len = rdbLoadLen(fp,&isencoded);
e3566d4b 3119 if (isencoded) {
3120 switch(len) {
3121 case REDIS_RDB_ENC_INT8:
3122 case REDIS_RDB_ENC_INT16:
3123 case REDIS_RDB_ENC_INT32:
3305306f 3124 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
88e85998 3125 case REDIS_RDB_ENC_LZF:
c78a8ccc 3126 return tryObjectSharing(rdbLoadLzfStringObject(fp));
e3566d4b 3127 default:
dfc5e96c 3128 redisAssert(0!=0);
e3566d4b 3129 }
3130 }
3131
f78fd11b 3132 if (len == REDIS_RDB_LENERR) return NULL;
3133 val = sdsnewlen(NULL,len);
3134 if (len && fread(val,len,1,fp) == 0) {
3135 sdsfree(val);
3136 return NULL;
3137 }
10c43610 3138 return tryObjectSharing(createObject(REDIS_STRING,val));
f78fd11b 3139}
3140
a7866db6 3141/* For information about double serialization check rdbSaveDoubleValue() */
3142static int rdbLoadDoubleValue(FILE *fp, double *val) {
3143 char buf[128];
3144 unsigned char len;
3145
3146 if (fread(&len,1,1,fp) == 0) return -1;
3147 switch(len) {
3148 case 255: *val = R_NegInf; return 0;
3149 case 254: *val = R_PosInf; return 0;
3150 case 253: *val = R_Nan; return 0;
3151 default:
3152 if (fread(buf,len,1,fp) == 0) return -1;
231d758e 3153 buf[len] = '\0';
a7866db6 3154 sscanf(buf, "%lg", val);
3155 return 0;
3156 }
3157}
3158
c78a8ccc 3159/* Load a Redis object of the specified type from the specified file.
3160 * On success a newly allocated object is returned, otherwise NULL. */
3161static robj *rdbLoadObject(int type, FILE *fp) {
3162 robj *o;
3163
3164 if (type == REDIS_STRING) {
3165 /* Read string value */
3166 if ((o = rdbLoadStringObject(fp)) == NULL) return NULL;
3167 tryObjectEncoding(o);
3168 } else if (type == REDIS_LIST || type == REDIS_SET) {
3169 /* Read list/set value */
3170 uint32_t listlen;
3171
3172 if ((listlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
3173 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
3174 /* Load every single element of the list/set */
3175 while(listlen--) {
3176 robj *ele;
3177
3178 if ((ele = rdbLoadStringObject(fp)) == NULL) return NULL;
3179 tryObjectEncoding(ele);
3180 if (type == REDIS_LIST) {
3181 listAddNodeTail((list*)o->ptr,ele);
3182 } else {
3183 dictAdd((dict*)o->ptr,ele,NULL);
3184 }
3185 }
3186 } else if (type == REDIS_ZSET) {
3187 /* Read list/set value */
3188 uint32_t zsetlen;
3189 zset *zs;
3190
3191 if ((zsetlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
3192 o = createZsetObject();
3193 zs = o->ptr;
3194 /* Load every single element of the list/set */
3195 while(zsetlen--) {
3196 robj *ele;
3197 double *score = zmalloc(sizeof(double));
3198
3199 if ((ele = rdbLoadStringObject(fp)) == NULL) return NULL;
3200 tryObjectEncoding(ele);
3201 if (rdbLoadDoubleValue(fp,score) == -1) return NULL;
3202 dictAdd(zs->dict,ele,score);
3203 zslInsert(zs->zsl,*score,ele);
3204 incrRefCount(ele); /* added to skiplist */
3205 }
3206 } else {
3207 redisAssert(0 != 0);
3208 }
3209 return o;
3210}
3211
f78fd11b 3212static int rdbLoad(char *filename) {
ed9b544e 3213 FILE *fp;
f78fd11b 3214 robj *keyobj = NULL;
3215 uint32_t dbid;
bb32ede5 3216 int type, retval, rdbver;
3305306f 3217 dict *d = server.db[0].dict;
bb32ede5 3218 redisDb *db = server.db+0;
f78fd11b 3219 char buf[1024];
bb32ede5 3220 time_t expiretime = -1, now = time(NULL);
b492cf00 3221 long long loadedkeys = 0;
bb32ede5 3222
ed9b544e 3223 fp = fopen(filename,"r");
3224 if (!fp) return REDIS_ERR;
3225 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 3226 buf[9] = '\0';
3227 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 3228 fclose(fp);
3229 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
3230 return REDIS_ERR;
3231 }
f78fd11b 3232 rdbver = atoi(buf+5);
c78a8ccc 3233 if (rdbver != 1) {
f78fd11b 3234 fclose(fp);
3235 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
3236 return REDIS_ERR;
3237 }
ed9b544e 3238 while(1) {
3239 robj *o;
3240
3241 /* Read type. */
f78fd11b 3242 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 3243 if (type == REDIS_EXPIRETIME) {
3244 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
3245 /* We read the time so we need to read the object type again */
3246 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
3247 }
ed9b544e 3248 if (type == REDIS_EOF) break;
3249 /* Handle SELECT DB opcode as a special case */
3250 if (type == REDIS_SELECTDB) {
c78a8ccc 3251 if ((dbid = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR)
e3566d4b 3252 goto eoferr;
ed9b544e 3253 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 3254 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 3255 exit(1);
3256 }
bb32ede5 3257 db = server.db+dbid;
3258 d = db->dict;
ed9b544e 3259 continue;
3260 }
3261 /* Read key */
c78a8ccc 3262 if ((keyobj = rdbLoadStringObject(fp)) == NULL) goto eoferr;
3263 /* Read value */
3264 if ((o = rdbLoadObject(type,fp)) == NULL) goto eoferr;
ed9b544e 3265 /* Add the new object in the hash table */
f78fd11b 3266 retval = dictAdd(d,keyobj,o);
ed9b544e 3267 if (retval == DICT_ERR) {
f78fd11b 3268 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
ed9b544e 3269 exit(1);
3270 }
bb32ede5 3271 /* Set the expire time if needed */
3272 if (expiretime != -1) {
3273 setExpire(db,keyobj,expiretime);
3274 /* Delete this key if already expired */
3275 if (expiretime < now) deleteKey(db,keyobj);
3276 expiretime = -1;
3277 }
f78fd11b 3278 keyobj = o = NULL;
b492cf00 3279 /* Handle swapping while loading big datasets when VM is on */
3280 loadedkeys++;
3281 if (server.vm_enabled && (loadedkeys % 5000) == 0) {
3282 while (zmalloc_used_memory() > server.vm_max_memory) {
3283 if (vmSwapOneObject() == REDIS_ERR) break;
3284 }
3285 }
ed9b544e 3286 }
3287 fclose(fp);
3288 return REDIS_OK;
3289
3290eoferr: /* unexpected end of file is handled here with a fatal exit */
e3566d4b 3291 if (keyobj) decrRefCount(keyobj);
f80dff62 3292 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
ed9b544e 3293 exit(1);
3294 return REDIS_ERR; /* Just to avoid warning */
3295}
3296
3297/*================================== Commands =============================== */
3298
abcb223e 3299static void authCommand(redisClient *c) {
2e77c2ee 3300 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
3301 c->authenticated = 1;
3302 addReply(c,shared.ok);
3303 } else {
3304 c->authenticated = 0;
fa4c0aba 3305 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
3306 }
3307}
3308
ed9b544e 3309static void pingCommand(redisClient *c) {
3310 addReply(c,shared.pong);
3311}
3312
3313static void echoCommand(redisClient *c) {
942a3961 3314 addReplyBulkLen(c,c->argv[1]);
ed9b544e 3315 addReply(c,c->argv[1]);
3316 addReply(c,shared.crlf);
3317}
3318
3319/*=================================== Strings =============================== */
3320
3321static void setGenericCommand(redisClient *c, int nx) {
3322 int retval;
3323
333fd216 3324 if (nx) deleteIfVolatile(c->db,c->argv[1]);
3305306f 3325 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 3326 if (retval == DICT_ERR) {
3327 if (!nx) {
1b03836c 3328 /* If the key is about a swapped value, we want a new key object
3329 * to overwrite the old. So we delete the old key in the database.
3330 * This will also make sure that swap pages about the old object
3331 * will be marked as free. */
3332 if (deleteIfSwapped(c->db,c->argv[1]))
3333 incrRefCount(c->argv[1]);
3305306f 3334 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 3335 incrRefCount(c->argv[2]);
3336 } else {
c937aa89 3337 addReply(c,shared.czero);
ed9b544e 3338 return;
3339 }
3340 } else {
3341 incrRefCount(c->argv[1]);
3342 incrRefCount(c->argv[2]);
3343 }
3344 server.dirty++;
3305306f 3345 removeExpire(c->db,c->argv[1]);
c937aa89 3346 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 3347}
3348
3349static void setCommand(redisClient *c) {
a4d1ba9a 3350 setGenericCommand(c,0);
ed9b544e 3351}
3352
3353static void setnxCommand(redisClient *c) {
a4d1ba9a 3354 setGenericCommand(c,1);
ed9b544e 3355}
3356
322fc7d8 3357static int getGenericCommand(redisClient *c) {
3305306f 3358 robj *o = lookupKeyRead(c->db,c->argv[1]);
3359
3360 if (o == NULL) {
c937aa89 3361 addReply(c,shared.nullbulk);
322fc7d8 3362 return REDIS_OK;
ed9b544e 3363 } else {
ed9b544e 3364 if (o->type != REDIS_STRING) {
c937aa89 3365 addReply(c,shared.wrongtypeerr);
322fc7d8 3366 return REDIS_ERR;
ed9b544e 3367 } else {
942a3961 3368 addReplyBulkLen(c,o);
ed9b544e 3369 addReply(c,o);
3370 addReply(c,shared.crlf);
322fc7d8 3371 return REDIS_OK;
ed9b544e 3372 }
3373 }
3374}
3375
322fc7d8 3376static void getCommand(redisClient *c) {
3377 getGenericCommand(c);
3378}
3379
f6b141c5 3380static void getsetCommand(redisClient *c) {
322fc7d8 3381 if (getGenericCommand(c) == REDIS_ERR) return;
a431eb74 3382 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
3383 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
3384 } else {
3385 incrRefCount(c->argv[1]);
3386 }
3387 incrRefCount(c->argv[2]);
3388 server.dirty++;
3389 removeExpire(c->db,c->argv[1]);
3390}
3391
70003d28 3392static void mgetCommand(redisClient *c) {
70003d28 3393 int j;
3394
c937aa89 3395 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 3396 for (j = 1; j < c->argc; j++) {
3305306f 3397 robj *o = lookupKeyRead(c->db,c->argv[j]);
3398 if (o == NULL) {
c937aa89 3399 addReply(c,shared.nullbulk);
70003d28 3400 } else {
70003d28 3401 if (o->type != REDIS_STRING) {
c937aa89 3402 addReply(c,shared.nullbulk);
70003d28 3403 } else {
942a3961 3404 addReplyBulkLen(c,o);
70003d28 3405 addReply(c,o);
3406 addReply(c,shared.crlf);
3407 }
3408 }
3409 }
3410}
3411
6c446631 3412static void msetGenericCommand(redisClient *c, int nx) {
906573e7 3413 int j, busykeys = 0;
6c446631 3414
3415 if ((c->argc % 2) == 0) {
454d4e43 3416 addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
6c446631 3417 return;
3418 }
3419 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3420 * set nothing at all if at least one already key exists. */
3421 if (nx) {
3422 for (j = 1; j < c->argc; j += 2) {
906573e7 3423 if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
3424 busykeys++;
6c446631 3425 }
3426 }
3427 }
906573e7 3428 if (busykeys) {
3429 addReply(c, shared.czero);
3430 return;
3431 }
6c446631 3432
3433 for (j = 1; j < c->argc; j += 2) {
3434 int retval;
3435
17511391 3436 tryObjectEncoding(c->argv[j+1]);
6c446631 3437 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
3438 if (retval == DICT_ERR) {
3439 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
3440 incrRefCount(c->argv[j+1]);
3441 } else {
3442 incrRefCount(c->argv[j]);
3443 incrRefCount(c->argv[j+1]);
3444 }
3445 removeExpire(c->db,c->argv[j]);
3446 }
3447 server.dirty += (c->argc-1)/2;
3448 addReply(c, nx ? shared.cone : shared.ok);
3449}
3450
3451static void msetCommand(redisClient *c) {
3452 msetGenericCommand(c,0);
3453}
3454
3455static void msetnxCommand(redisClient *c) {
3456 msetGenericCommand(c,1);
3457}
3458
d68ed120 3459static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 3460 long long value;
3461 int retval;
3462 robj *o;
3463
3305306f 3464 o = lookupKeyWrite(c->db,c->argv[1]);
3465 if (o == NULL) {
ed9b544e 3466 value = 0;
3467 } else {
ed9b544e 3468 if (o->type != REDIS_STRING) {
3469 value = 0;
3470 } else {
3471 char *eptr;
3472
942a3961 3473 if (o->encoding == REDIS_ENCODING_RAW)
3474 value = strtoll(o->ptr, &eptr, 10);
3475 else if (o->encoding == REDIS_ENCODING_INT)
3476 value = (long)o->ptr;
3477 else
dfc5e96c 3478 redisAssert(1 != 1);
ed9b544e 3479 }
3480 }
3481
3482 value += incr;
3483 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
942a3961 3484 tryObjectEncoding(o);
3305306f 3485 retval = dictAdd(c->db->dict,c->argv[1],o);
ed9b544e 3486 if (retval == DICT_ERR) {
3305306f 3487 dictReplace(c->db->dict,c->argv[1],o);
3488 removeExpire(c->db,c->argv[1]);
ed9b544e 3489 } else {
3490 incrRefCount(c->argv[1]);
3491 }
3492 server.dirty++;
c937aa89 3493 addReply(c,shared.colon);
ed9b544e 3494 addReply(c,o);
3495 addReply(c,shared.crlf);
3496}
3497
3498static void incrCommand(redisClient *c) {
a4d1ba9a 3499 incrDecrCommand(c,1);
ed9b544e 3500}
3501
3502static void decrCommand(redisClient *c) {
a4d1ba9a 3503 incrDecrCommand(c,-1);
ed9b544e 3504}
3505
3506static void incrbyCommand(redisClient *c) {
d68ed120 3507 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3508 incrDecrCommand(c,incr);
ed9b544e 3509}
3510
3511static void decrbyCommand(redisClient *c) {
d68ed120 3512 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3513 incrDecrCommand(c,-incr);
ed9b544e 3514}
3515
3516/* ========================= Type agnostic commands ========================= */
3517
3518static void delCommand(redisClient *c) {
5109cdff 3519 int deleted = 0, j;
3520
3521 for (j = 1; j < c->argc; j++) {
3522 if (deleteKey(c->db,c->argv[j])) {
3523 server.dirty++;
3524 deleted++;
3525 }
3526 }
3527 switch(deleted) {
3528 case 0:
c937aa89 3529 addReply(c,shared.czero);
5109cdff 3530 break;
3531 case 1:
3532 addReply(c,shared.cone);
3533 break;
3534 default:
3535 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
3536 break;
ed9b544e 3537 }
3538}
3539
3540static void existsCommand(redisClient *c) {
3305306f 3541 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
ed9b544e 3542}
3543
3544static void selectCommand(redisClient *c) {
3545 int id = atoi(c->argv[1]->ptr);
3546
3547 if (selectDb(c,id) == REDIS_ERR) {
774e3047 3548 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 3549 } else {
3550 addReply(c,shared.ok);
3551 }
3552}
3553
3554static void randomkeyCommand(redisClient *c) {
3555 dictEntry *de;
3305306f 3556
3557 while(1) {
3558 de = dictGetRandomKey(c->db->dict);
ce7bef07 3559 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3305306f 3560 }
ed9b544e 3561 if (de == NULL) {
ce7bef07 3562 addReply(c,shared.plus);
ed9b544e 3563 addReply(c,shared.crlf);
3564 } else {
c937aa89 3565 addReply(c,shared.plus);
ed9b544e 3566 addReply(c,dictGetEntryKey(de));
3567 addReply(c,shared.crlf);
3568 }
3569}
3570
3571static void keysCommand(redisClient *c) {
3572 dictIterator *di;
3573 dictEntry *de;
3574 sds pattern = c->argv[1]->ptr;
3575 int plen = sdslen(pattern);
682ac724 3576 unsigned long numkeys = 0, keyslen = 0;
ed9b544e 3577 robj *lenobj = createObject(REDIS_STRING,NULL);
3578
3305306f 3579 di = dictGetIterator(c->db->dict);
ed9b544e 3580 addReply(c,lenobj);
3581 decrRefCount(lenobj);
3582 while((de = dictNext(di)) != NULL) {
3583 robj *keyobj = dictGetEntryKey(de);
3305306f 3584
ed9b544e 3585 sds key = keyobj->ptr;
3586 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3587 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3305306f 3588 if (expireIfNeeded(c->db,keyobj) == 0) {
3589 if (numkeys != 0)
3590 addReply(c,shared.space);
3591 addReply(c,keyobj);
3592 numkeys++;
3593 keyslen += sdslen(key);
3594 }
ed9b544e 3595 }
3596 }
3597 dictReleaseIterator(di);
c937aa89 3598 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
ed9b544e 3599 addReply(c,shared.crlf);
3600}
3601
3602static void dbsizeCommand(redisClient *c) {
3603 addReplySds(c,
3305306f 3604 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 3605}
3606
3607static void lastsaveCommand(redisClient *c) {
3608 addReplySds(c,
c937aa89 3609 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 3610}
3611
3612static void typeCommand(redisClient *c) {
3305306f 3613 robj *o;
ed9b544e 3614 char *type;
3305306f 3615
3616 o = lookupKeyRead(c->db,c->argv[1]);
3617 if (o == NULL) {
c937aa89 3618 type = "+none";
ed9b544e 3619 } else {
ed9b544e 3620 switch(o->type) {
c937aa89 3621 case REDIS_STRING: type = "+string"; break;
3622 case REDIS_LIST: type = "+list"; break;
3623 case REDIS_SET: type = "+set"; break;
412a8bce 3624 case REDIS_ZSET: type = "+zset"; break;
ed9b544e 3625 default: type = "unknown"; break;
3626 }
3627 }
3628 addReplySds(c,sdsnew(type));
3629 addReply(c,shared.crlf);
3630}
3631
3632static void saveCommand(redisClient *c) {
9d65a1bb 3633 if (server.bgsavechildpid != -1) {
05557f6d 3634 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3635 return;
3636 }
f78fd11b 3637 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 3638 addReply(c,shared.ok);
3639 } else {
3640 addReply(c,shared.err);
3641 }
3642}
3643
3644static void bgsaveCommand(redisClient *c) {
9d65a1bb 3645 if (server.bgsavechildpid != -1) {
ed9b544e 3646 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3647 return;
3648 }
f78fd11b 3649 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
49b99ab4 3650 char *status = "+Background saving started\r\n";
3651 addReplySds(c,sdsnew(status));
ed9b544e 3652 } else {
3653 addReply(c,shared.err);
3654 }
3655}
3656
3657static void shutdownCommand(redisClient *c) {
3658 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
a3b21203 3659 /* Kill the saving child if there is a background saving in progress.
3660 We want to avoid race conditions, for instance our saving child may
3661 overwrite the synchronous saving did by SHUTDOWN. */
9d65a1bb 3662 if (server.bgsavechildpid != -1) {
9f3c422c 3663 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3664 kill(server.bgsavechildpid,SIGKILL);
a3b21203 3665 rdbRemoveTempFile(server.bgsavechildpid);
9f3c422c 3666 }
ac945e2d 3667 if (server.appendonly) {
3668 /* Append only file: fsync() the AOF and exit */
3669 fsync(server.appendfd);
3670 exit(0);
ed9b544e 3671 } else {
ac945e2d 3672 /* Snapshotting. Perform a SYNC SAVE and exit */
3673 if (rdbSave(server.dbfilename) == REDIS_OK) {
3674 if (server.daemonize)
3675 unlink(server.pidfile);
3676 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
3677 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3678 exit(0);
3679 } else {
3680 /* Ooops.. error saving! The best we can do is to continue operating.
3681 * Note that if there was a background saving process, in the next
3682 * cron() Redis will be notified that the background saving aborted,
3683 * handling special stuff like slaves pending for synchronization... */
3684 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3685 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3686 }
ed9b544e 3687 }
3688}
3689
3690static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 3691 robj *o;
3692
3693 /* To use the same key as src and dst is probably an error */
3694 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 3695 addReply(c,shared.sameobjecterr);
ed9b544e 3696 return;
3697 }
3698
3305306f 3699 o = lookupKeyWrite(c->db,c->argv[1]);
3700 if (o == NULL) {
c937aa89 3701 addReply(c,shared.nokeyerr);
ed9b544e 3702 return;
3703 }
ed9b544e 3704 incrRefCount(o);
3305306f 3705 deleteIfVolatile(c->db,c->argv[2]);
3706 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
ed9b544e 3707 if (nx) {
3708 decrRefCount(o);
c937aa89 3709 addReply(c,shared.czero);
ed9b544e 3710 return;
3711 }
3305306f 3712 dictReplace(c->db->dict,c->argv[2],o);
ed9b544e 3713 } else {
3714 incrRefCount(c->argv[2]);
3715 }
3305306f 3716 deleteKey(c->db,c->argv[1]);
ed9b544e 3717 server.dirty++;
c937aa89 3718 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 3719}
3720
3721static void renameCommand(redisClient *c) {
3722 renameGenericCommand(c,0);
3723}
3724
3725static void renamenxCommand(redisClient *c) {
3726 renameGenericCommand(c,1);
3727}
3728
3729static void moveCommand(redisClient *c) {
3305306f 3730 robj *o;
3731 redisDb *src, *dst;
ed9b544e 3732 int srcid;
3733
3734 /* Obtain source and target DB pointers */
3305306f 3735 src = c->db;
3736 srcid = c->db->id;
ed9b544e 3737 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 3738 addReply(c,shared.outofrangeerr);
ed9b544e 3739 return;
3740 }
3305306f 3741 dst = c->db;
3742 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 3743
3744 /* If the user is moving using as target the same
3745 * DB as the source DB it is probably an error. */
3746 if (src == dst) {
c937aa89 3747 addReply(c,shared.sameobjecterr);
ed9b544e 3748 return;
3749 }
3750
3751 /* Check if the element exists and get a reference */
3305306f 3752 o = lookupKeyWrite(c->db,c->argv[1]);
3753 if (!o) {
c937aa89 3754 addReply(c,shared.czero);
ed9b544e 3755 return;
3756 }
3757
3758 /* Try to add the element to the target DB */
3305306f 3759 deleteIfVolatile(dst,c->argv[1]);
3760 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
c937aa89 3761 addReply(c,shared.czero);
ed9b544e 3762 return;
3763 }
3305306f 3764 incrRefCount(c->argv[1]);
ed9b544e 3765 incrRefCount(o);
3766
3767 /* OK! key moved, free the entry in the source DB */
3305306f 3768 deleteKey(src,c->argv[1]);
ed9b544e 3769 server.dirty++;
c937aa89 3770 addReply(c,shared.cone);
ed9b544e 3771}
3772
3773/* =================================== Lists ================================ */
3774static void pushGenericCommand(redisClient *c, int where) {
3775 robj *lobj;
ed9b544e 3776 list *list;
3305306f 3777
3778 lobj = lookupKeyWrite(c->db,c->argv[1]);
3779 if (lobj == NULL) {
95242ab5 3780 if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
3781 addReply(c,shared.ok);
3782 return;
3783 }
ed9b544e 3784 lobj = createListObject();
3785 list = lobj->ptr;
3786 if (where == REDIS_HEAD) {
6b47e12e 3787 listAddNodeHead(list,c->argv[2]);
ed9b544e 3788 } else {
6b47e12e 3789 listAddNodeTail(list,c->argv[2]);
ed9b544e 3790 }
3305306f 3791 dictAdd(c->db->dict,c->argv[1],lobj);
ed9b544e 3792 incrRefCount(c->argv[1]);
3793 incrRefCount(c->argv[2]);
3794 } else {
ed9b544e 3795 if (lobj->type != REDIS_LIST) {
3796 addReply(c,shared.wrongtypeerr);
3797 return;
3798 }
95242ab5 3799 if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
3800 addReply(c,shared.ok);
3801 return;
3802 }
ed9b544e 3803 list = lobj->ptr;
3804 if (where == REDIS_HEAD) {
6b47e12e 3805 listAddNodeHead(list,c->argv[2]);
ed9b544e 3806 } else {
6b47e12e 3807 listAddNodeTail(list,c->argv[2]);
ed9b544e 3808 }
3809 incrRefCount(c->argv[2]);
3810 }
3811 server.dirty++;
3812 addReply(c,shared.ok);
3813}
3814
3815static void lpushCommand(redisClient *c) {
3816 pushGenericCommand(c,REDIS_HEAD);
3817}
3818
3819static void rpushCommand(redisClient *c) {
3820 pushGenericCommand(c,REDIS_TAIL);
3821}
3822
3823static void llenCommand(redisClient *c) {
3305306f 3824 robj *o;
ed9b544e 3825 list *l;
3826
3305306f 3827 o = lookupKeyRead(c->db,c->argv[1]);
3828 if (o == NULL) {
c937aa89 3829 addReply(c,shared.czero);
ed9b544e 3830 return;
3831 } else {
ed9b544e 3832 if (o->type != REDIS_LIST) {
c937aa89 3833 addReply(c,shared.wrongtypeerr);
ed9b544e 3834 } else {
3835 l = o->ptr;
c937aa89 3836 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
ed9b544e 3837 }
3838 }
3839}
3840
3841static void lindexCommand(redisClient *c) {
3305306f 3842 robj *o;
ed9b544e 3843 int index = atoi(c->argv[2]->ptr);
3844
3305306f 3845 o = lookupKeyRead(c->db,c->argv[1]);
3846 if (o == NULL) {
c937aa89 3847 addReply(c,shared.nullbulk);
ed9b544e 3848 } else {
ed9b544e 3849 if (o->type != REDIS_LIST) {
c937aa89 3850 addReply(c,shared.wrongtypeerr);
ed9b544e 3851 } else {
3852 list *list = o->ptr;
3853 listNode *ln;
3854
3855 ln = listIndex(list, index);
3856 if (ln == NULL) {
c937aa89 3857 addReply(c,shared.nullbulk);
ed9b544e 3858 } else {
3859 robj *ele = listNodeValue(ln);
942a3961 3860 addReplyBulkLen(c,ele);
ed9b544e 3861 addReply(c,ele);
3862 addReply(c,shared.crlf);
3863 }
3864 }
3865 }
3866}
3867
3868static void lsetCommand(redisClient *c) {
3305306f 3869 robj *o;
ed9b544e 3870 int index = atoi(c->argv[2]->ptr);
3871
3305306f 3872 o = lookupKeyWrite(c->db,c->argv[1]);
3873 if (o == NULL) {
ed9b544e 3874 addReply(c,shared.nokeyerr);
3875 } else {
ed9b544e 3876 if (o->type != REDIS_LIST) {
3877 addReply(c,shared.wrongtypeerr);
3878 } else {
3879 list *list = o->ptr;
3880 listNode *ln;
3881
3882 ln = listIndex(list, index);
3883 if (ln == NULL) {
c937aa89 3884 addReply(c,shared.outofrangeerr);
ed9b544e 3885 } else {
3886 robj *ele = listNodeValue(ln);
3887
3888 decrRefCount(ele);
3889 listNodeValue(ln) = c->argv[3];
3890 incrRefCount(c->argv[3]);
3891 addReply(c,shared.ok);
3892 server.dirty++;
3893 }
3894 }
3895 }
3896}
3897
3898static void popGenericCommand(redisClient *c, int where) {
3305306f 3899 robj *o;
3900
3901 o = lookupKeyWrite(c->db,c->argv[1]);
3902 if (o == NULL) {
c937aa89 3903 addReply(c,shared.nullbulk);
ed9b544e 3904 } else {
ed9b544e 3905 if (o->type != REDIS_LIST) {
c937aa89 3906 addReply(c,shared.wrongtypeerr);
ed9b544e 3907 } else {
3908 list *list = o->ptr;
3909 listNode *ln;
3910
3911 if (where == REDIS_HEAD)
3912 ln = listFirst(list);
3913 else
3914 ln = listLast(list);
3915
3916 if (ln == NULL) {
c937aa89 3917 addReply(c,shared.nullbulk);
ed9b544e 3918 } else {
3919 robj *ele = listNodeValue(ln);
942a3961 3920 addReplyBulkLen(c,ele);
ed9b544e 3921 addReply(c,ele);
3922 addReply(c,shared.crlf);
3923 listDelNode(list,ln);
3924 server.dirty++;
3925 }
3926 }
3927 }
3928}
3929
3930static void lpopCommand(redisClient *c) {
3931 popGenericCommand(c,REDIS_HEAD);
3932}
3933
3934static void rpopCommand(redisClient *c) {
3935 popGenericCommand(c,REDIS_TAIL);
3936}
3937
3938static void lrangeCommand(redisClient *c) {
3305306f 3939 robj *o;
ed9b544e 3940 int start = atoi(c->argv[2]->ptr);
3941 int end = atoi(c->argv[3]->ptr);
3305306f 3942
3943 o = lookupKeyRead(c->db,c->argv[1]);
3944 if (o == NULL) {
c937aa89 3945 addReply(c,shared.nullmultibulk);
ed9b544e 3946 } else {
ed9b544e 3947 if (o->type != REDIS_LIST) {
c937aa89 3948 addReply(c,shared.wrongtypeerr);
ed9b544e 3949 } else {
3950 list *list = o->ptr;
3951 listNode *ln;
3952 int llen = listLength(list);
3953 int rangelen, j;
3954 robj *ele;
3955
3956 /* convert negative indexes */
3957 if (start < 0) start = llen+start;
3958 if (end < 0) end = llen+end;
3959 if (start < 0) start = 0;
3960 if (end < 0) end = 0;
3961
3962 /* indexes sanity checks */
3963 if (start > end || start >= llen) {
3964 /* Out of range start or start > end result in empty list */
c937aa89 3965 addReply(c,shared.emptymultibulk);
ed9b544e 3966 return;
3967 }
3968 if (end >= llen) end = llen-1;
3969 rangelen = (end-start)+1;
3970
3971 /* Return the result in form of a multi-bulk reply */
3972 ln = listIndex(list, start);
c937aa89 3973 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
ed9b544e 3974 for (j = 0; j < rangelen; j++) {
3975 ele = listNodeValue(ln);
942a3961 3976 addReplyBulkLen(c,ele);
ed9b544e 3977 addReply(c,ele);
3978 addReply(c,shared.crlf);
3979 ln = ln->next;
3980 }
3981 }
3982 }
3983}
3984
3985static void ltrimCommand(redisClient *c) {
3305306f 3986 robj *o;
ed9b544e 3987 int start = atoi(c->argv[2]->ptr);
3988 int end = atoi(c->argv[3]->ptr);
3989
3305306f 3990 o = lookupKeyWrite(c->db,c->argv[1]);
3991 if (o == NULL) {
ab9d4cb1 3992 addReply(c,shared.ok);
ed9b544e 3993 } else {
ed9b544e 3994 if (o->type != REDIS_LIST) {
3995 addReply(c,shared.wrongtypeerr);
3996 } else {
3997 list *list = o->ptr;
3998 listNode *ln;
3999 int llen = listLength(list);
4000 int j, ltrim, rtrim;
4001
4002 /* convert negative indexes */
4003 if (start < 0) start = llen+start;
4004 if (end < 0) end = llen+end;
4005 if (start < 0) start = 0;
4006 if (end < 0) end = 0;
4007
4008 /* indexes sanity checks */
4009 if (start > end || start >= llen) {
4010 /* Out of range start or start > end result in empty list */
4011 ltrim = llen;
4012 rtrim = 0;
4013 } else {
4014 if (end >= llen) end = llen-1;
4015 ltrim = start;
4016 rtrim = llen-end-1;
4017 }
4018
4019 /* Remove list elements to perform the trim */
4020 for (j = 0; j < ltrim; j++) {
4021 ln = listFirst(list);
4022 listDelNode(list,ln);
4023 }
4024 for (j = 0; j < rtrim; j++) {
4025 ln = listLast(list);
4026 listDelNode(list,ln);
4027 }
ed9b544e 4028 server.dirty++;
e59229a2 4029 addReply(c,shared.ok);
ed9b544e 4030 }
4031 }
4032}
4033
4034static void lremCommand(redisClient *c) {
3305306f 4035 robj *o;
ed9b544e 4036
3305306f 4037 o = lookupKeyWrite(c->db,c->argv[1]);
4038 if (o == NULL) {
33c08b39 4039 addReply(c,shared.czero);
ed9b544e 4040 } else {
ed9b544e 4041 if (o->type != REDIS_LIST) {
c937aa89 4042 addReply(c,shared.wrongtypeerr);
ed9b544e 4043 } else {
4044 list *list = o->ptr;
4045 listNode *ln, *next;
4046 int toremove = atoi(c->argv[2]->ptr);
4047 int removed = 0;
4048 int fromtail = 0;
4049
4050 if (toremove < 0) {
4051 toremove = -toremove;
4052 fromtail = 1;
4053 }
4054 ln = fromtail ? list->tail : list->head;
4055 while (ln) {
ed9b544e 4056 robj *ele = listNodeValue(ln);
a4d1ba9a 4057
4058 next = fromtail ? ln->prev : ln->next;
724a51b1 4059 if (compareStringObjects(ele,c->argv[3]) == 0) {
ed9b544e 4060 listDelNode(list,ln);
4061 server.dirty++;
4062 removed++;
4063 if (toremove && removed == toremove) break;
4064 }
4065 ln = next;
4066 }
c937aa89 4067 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 4068 }
4069 }
4070}
4071
12f9d551 4072/* This is the semantic of this command:
0f5f7e9a 4073 * RPOPLPUSH srclist dstlist:
12f9d551 4074 * IF LLEN(srclist) > 0
4075 * element = RPOP srclist
4076 * LPUSH dstlist element
4077 * RETURN element
4078 * ELSE
4079 * RETURN nil
4080 * END
4081 * END
4082 *
4083 * The idea is to be able to get an element from a list in a reliable way
4084 * since the element is not just returned but pushed against another list
4085 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4086 */
0f5f7e9a 4087static void rpoplpushcommand(redisClient *c) {
12f9d551 4088 robj *sobj;
4089
4090 sobj = lookupKeyWrite(c->db,c->argv[1]);
4091 if (sobj == NULL) {
4092 addReply(c,shared.nullbulk);
4093 } else {
4094 if (sobj->type != REDIS_LIST) {
4095 addReply(c,shared.wrongtypeerr);
4096 } else {
4097 list *srclist = sobj->ptr;
4098 listNode *ln = listLast(srclist);
4099
4100 if (ln == NULL) {
4101 addReply(c,shared.nullbulk);
4102 } else {
4103 robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
4104 robj *ele = listNodeValue(ln);
4105 list *dstlist;
4106
e20fb74f 4107 if (dobj && dobj->type != REDIS_LIST) {
12f9d551 4108 addReply(c,shared.wrongtypeerr);
4109 return;
4110 }
e20fb74f 4111
4112 /* Add the element to the target list (unless it's directly
4113 * passed to some BLPOP-ing client */
4114 if (!handleClientsWaitingListPush(c,c->argv[2],ele)) {
4115 if (dobj == NULL) {
4116 /* Create the list if the key does not exist */
4117 dobj = createListObject();
4118 dictAdd(c->db->dict,c->argv[2],dobj);
4119 incrRefCount(c->argv[2]);
4120 }
4121 dstlist = dobj->ptr;
4122 listAddNodeHead(dstlist,ele);
4123 incrRefCount(ele);
4124 }
12f9d551 4125
4126 /* Send the element to the client as reply as well */
4127 addReplyBulkLen(c,ele);
4128 addReply(c,ele);
4129 addReply(c,shared.crlf);
4130
4131 /* Finally remove the element from the source list */
4132 listDelNode(srclist,ln);
4133 server.dirty++;
4134 }
4135 }
4136 }
4137}
4138
4139
ed9b544e 4140/* ==================================== Sets ================================ */
4141
4142static void saddCommand(redisClient *c) {
ed9b544e 4143 robj *set;
4144
3305306f 4145 set = lookupKeyWrite(c->db,c->argv[1]);
4146 if (set == NULL) {
ed9b544e 4147 set = createSetObject();
3305306f 4148 dictAdd(c->db->dict,c->argv[1],set);
ed9b544e 4149 incrRefCount(c->argv[1]);
4150 } else {
ed9b544e 4151 if (set->type != REDIS_SET) {
c937aa89 4152 addReply(c,shared.wrongtypeerr);
ed9b544e 4153 return;
4154 }
4155 }
4156 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
4157 incrRefCount(c->argv[2]);
4158 server.dirty++;
c937aa89 4159 addReply(c,shared.cone);
ed9b544e 4160 } else {
c937aa89 4161 addReply(c,shared.czero);
ed9b544e 4162 }
4163}
4164
4165static void sremCommand(redisClient *c) {
3305306f 4166 robj *set;
ed9b544e 4167
3305306f 4168 set = lookupKeyWrite(c->db,c->argv[1]);
4169 if (set == NULL) {
c937aa89 4170 addReply(c,shared.czero);
ed9b544e 4171 } else {
ed9b544e 4172 if (set->type != REDIS_SET) {
c937aa89 4173 addReply(c,shared.wrongtypeerr);
ed9b544e 4174 return;
4175 }
4176 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
4177 server.dirty++;
12fea928 4178 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
c937aa89 4179 addReply(c,shared.cone);
ed9b544e 4180 } else {
c937aa89 4181 addReply(c,shared.czero);
ed9b544e 4182 }
4183 }
4184}
4185
a4460ef4 4186static void smoveCommand(redisClient *c) {
4187 robj *srcset, *dstset;
4188
4189 srcset = lookupKeyWrite(c->db,c->argv[1]);
4190 dstset = lookupKeyWrite(c->db,c->argv[2]);
4191
4192 /* If the source key does not exist return 0, if it's of the wrong type
4193 * raise an error */
4194 if (srcset == NULL || srcset->type != REDIS_SET) {
4195 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
4196 return;
4197 }
4198 /* Error if the destination key is not a set as well */
4199 if (dstset && dstset->type != REDIS_SET) {
4200 addReply(c,shared.wrongtypeerr);
4201 return;
4202 }
4203 /* Remove the element from the source set */
4204 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
4205 /* Key not found in the src set! return zero */
4206 addReply(c,shared.czero);
4207 return;
4208 }
4209 server.dirty++;
4210 /* Add the element to the destination set */
4211 if (!dstset) {
4212 dstset = createSetObject();
4213 dictAdd(c->db->dict,c->argv[2],dstset);
4214 incrRefCount(c->argv[2]);
4215 }
4216 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
4217 incrRefCount(c->argv[3]);
4218 addReply(c,shared.cone);
4219}
4220
ed9b544e 4221static void sismemberCommand(redisClient *c) {
3305306f 4222 robj *set;
ed9b544e 4223
3305306f 4224 set = lookupKeyRead(c->db,c->argv[1]);
4225 if (set == NULL) {
c937aa89 4226 addReply(c,shared.czero);
ed9b544e 4227 } else {
ed9b544e 4228 if (set->type != REDIS_SET) {
c937aa89 4229 addReply(c,shared.wrongtypeerr);
ed9b544e 4230 return;
4231 }
4232 if (dictFind(set->ptr,c->argv[2]))
c937aa89 4233 addReply(c,shared.cone);
ed9b544e 4234 else
c937aa89 4235 addReply(c,shared.czero);
ed9b544e 4236 }
4237}
4238
4239static void scardCommand(redisClient *c) {
3305306f 4240 robj *o;
ed9b544e 4241 dict *s;
4242
3305306f 4243 o = lookupKeyRead(c->db,c->argv[1]);
4244 if (o == NULL) {
c937aa89 4245 addReply(c,shared.czero);
ed9b544e 4246 return;
4247 } else {
ed9b544e 4248 if (o->type != REDIS_SET) {
c937aa89 4249 addReply(c,shared.wrongtypeerr);
ed9b544e 4250 } else {
4251 s = o->ptr;
682ac724 4252 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
3305306f 4253 dictSize(s)));
ed9b544e 4254 }
4255 }
4256}
4257
12fea928 4258static void spopCommand(redisClient *c) {
4259 robj *set;
4260 dictEntry *de;
4261
4262 set = lookupKeyWrite(c->db,c->argv[1]);
4263 if (set == NULL) {
4264 addReply(c,shared.nullbulk);
4265 } else {
4266 if (set->type != REDIS_SET) {
4267 addReply(c,shared.wrongtypeerr);
4268 return;
4269 }
4270 de = dictGetRandomKey(set->ptr);
4271 if (de == NULL) {
4272 addReply(c,shared.nullbulk);
4273 } else {
4274 robj *ele = dictGetEntryKey(de);
4275
942a3961 4276 addReplyBulkLen(c,ele);
12fea928 4277 addReply(c,ele);
4278 addReply(c,shared.crlf);
4279 dictDelete(set->ptr,ele);
4280 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
4281 server.dirty++;
4282 }
4283 }
4284}
4285
2abb95a9 4286static void srandmemberCommand(redisClient *c) {
4287 robj *set;
4288 dictEntry *de;
4289
4290 set = lookupKeyRead(c->db,c->argv[1]);
4291 if (set == NULL) {
4292 addReply(c,shared.nullbulk);
4293 } else {
4294 if (set->type != REDIS_SET) {
4295 addReply(c,shared.wrongtypeerr);
4296 return;
4297 }
4298 de = dictGetRandomKey(set->ptr);
4299 if (de == NULL) {
4300 addReply(c,shared.nullbulk);
4301 } else {
4302 robj *ele = dictGetEntryKey(de);
4303
4304 addReplyBulkLen(c,ele);
4305 addReply(c,ele);
4306 addReply(c,shared.crlf);
4307 }
4308 }
4309}
4310
ed9b544e 4311static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
4312 dict **d1 = (void*) s1, **d2 = (void*) s2;
4313
3305306f 4314 return dictSize(*d1)-dictSize(*d2);
ed9b544e 4315}
4316
682ac724 4317static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) {
ed9b544e 4318 dict **dv = zmalloc(sizeof(dict*)*setsnum);
4319 dictIterator *di;
4320 dictEntry *de;
4321 robj *lenobj = NULL, *dstset = NULL;
682ac724 4322 unsigned long j, cardinality = 0;
ed9b544e 4323
ed9b544e 4324 for (j = 0; j < setsnum; j++) {
4325 robj *setobj;
3305306f 4326
4327 setobj = dstkey ?
4328 lookupKeyWrite(c->db,setskeys[j]) :
4329 lookupKeyRead(c->db,setskeys[j]);
4330 if (!setobj) {
ed9b544e 4331 zfree(dv);
5faa6025 4332 if (dstkey) {
fdcaae84 4333 if (deleteKey(c->db,dstkey))
4334 server.dirty++;
0d36ded0 4335 addReply(c,shared.czero);
5faa6025 4336 } else {
4337 addReply(c,shared.nullmultibulk);
4338 }
ed9b544e 4339 return;
4340 }
ed9b544e 4341 if (setobj->type != REDIS_SET) {
4342 zfree(dv);
c937aa89 4343 addReply(c,shared.wrongtypeerr);
ed9b544e 4344 return;
4345 }
4346 dv[j] = setobj->ptr;
4347 }
4348 /* Sort sets from the smallest to largest, this will improve our
4349 * algorithm's performace */
4350 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
4351
4352 /* The first thing we should output is the total number of elements...
4353 * since this is a multi-bulk write, but at this stage we don't know
4354 * the intersection set size, so we use a trick, append an empty object
4355 * to the output list and save the pointer to later modify it with the
4356 * right length */
4357 if (!dstkey) {
4358 lenobj = createObject(REDIS_STRING,NULL);
4359 addReply(c,lenobj);
4360 decrRefCount(lenobj);
4361 } else {
4362 /* If we have a target key where to store the resulting set
4363 * create this key with an empty set inside */
4364 dstset = createSetObject();
ed9b544e 4365 }
4366
4367 /* Iterate all the elements of the first (smallest) set, and test
4368 * the element against all the other sets, if at least one set does
4369 * not include the element it is discarded */
4370 di = dictGetIterator(dv[0]);
ed9b544e 4371
4372 while((de = dictNext(di)) != NULL) {
4373 robj *ele;
4374
4375 for (j = 1; j < setsnum; j++)
4376 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
4377 if (j != setsnum)
4378 continue; /* at least one set does not contain the member */
4379 ele = dictGetEntryKey(de);
4380 if (!dstkey) {
942a3961 4381 addReplyBulkLen(c,ele);
ed9b544e 4382 addReply(c,ele);
4383 addReply(c,shared.crlf);
4384 cardinality++;
4385 } else {
4386 dictAdd(dstset->ptr,ele,NULL);
4387 incrRefCount(ele);
4388 }
4389 }
4390 dictReleaseIterator(di);
4391
83cdfe18
AG
4392 if (dstkey) {
4393 /* Store the resulting set into the target */
4394 deleteKey(c->db,dstkey);
4395 dictAdd(c->db->dict,dstkey,dstset);
4396 incrRefCount(dstkey);
4397 }
4398
40d224a9 4399 if (!dstkey) {
682ac724 4400 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality);
40d224a9 4401 } else {
682ac724 4402 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
03fd01c7 4403 dictSize((dict*)dstset->ptr)));
40d224a9 4404 server.dirty++;
4405 }
ed9b544e 4406 zfree(dv);
4407}
4408
4409static void sinterCommand(redisClient *c) {
4410 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
4411}
4412
4413static void sinterstoreCommand(redisClient *c) {
4414 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
4415}
4416
f4f56e1d 4417#define REDIS_OP_UNION 0
4418#define REDIS_OP_DIFF 1
4419
4420static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 4421 dict **dv = zmalloc(sizeof(dict*)*setsnum);
4422 dictIterator *di;
4423 dictEntry *de;
f4f56e1d 4424 robj *dstset = NULL;
40d224a9 4425 int j, cardinality = 0;
4426
40d224a9 4427 for (j = 0; j < setsnum; j++) {
4428 robj *setobj;
4429
4430 setobj = dstkey ?
4431 lookupKeyWrite(c->db,setskeys[j]) :
4432 lookupKeyRead(c->db,setskeys[j]);
4433 if (!setobj) {
4434 dv[j] = NULL;
4435 continue;
4436 }
4437 if (setobj->type != REDIS_SET) {
4438 zfree(dv);
4439 addReply(c,shared.wrongtypeerr);
4440 return;
4441 }
4442 dv[j] = setobj->ptr;
4443 }
4444
4445 /* We need a temp set object to store our union. If the dstkey
4446 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4447 * this set object will be the resulting object to set into the target key*/
4448 dstset = createSetObject();
4449
40d224a9 4450 /* Iterate all the elements of all the sets, add every element a single
4451 * time to the result set */
4452 for (j = 0; j < setsnum; j++) {
51829ed3 4453 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 4454 if (!dv[j]) continue; /* non existing keys are like empty sets */
4455
4456 di = dictGetIterator(dv[j]);
40d224a9 4457
4458 while((de = dictNext(di)) != NULL) {
4459 robj *ele;
4460
4461 /* dictAdd will not add the same element multiple times */
4462 ele = dictGetEntryKey(de);
f4f56e1d 4463 if (op == REDIS_OP_UNION || j == 0) {
4464 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
4465 incrRefCount(ele);
40d224a9 4466 cardinality++;
4467 }
f4f56e1d 4468 } else if (op == REDIS_OP_DIFF) {
4469 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
4470 cardinality--;
4471 }
40d224a9 4472 }
4473 }
4474 dictReleaseIterator(di);
51829ed3
AG
4475
4476 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
40d224a9 4477 }
4478
f4f56e1d 4479 /* Output the content of the resulting set, if not in STORE mode */
4480 if (!dstkey) {
4481 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
4482 di = dictGetIterator(dstset->ptr);
f4f56e1d 4483 while((de = dictNext(di)) != NULL) {
4484 robj *ele;
4485
4486 ele = dictGetEntryKey(de);
942a3961 4487 addReplyBulkLen(c,ele);
f4f56e1d 4488 addReply(c,ele);
4489 addReply(c,shared.crlf);
4490 }
4491 dictReleaseIterator(di);
83cdfe18
AG
4492 } else {
4493 /* If we have a target key where to store the resulting set
4494 * create this key with the result set inside */
4495 deleteKey(c->db,dstkey);
4496 dictAdd(c->db->dict,dstkey,dstset);
4497 incrRefCount(dstkey);
f4f56e1d 4498 }
4499
4500 /* Cleanup */
40d224a9 4501 if (!dstkey) {
40d224a9 4502 decrRefCount(dstset);
4503 } else {
682ac724 4504 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
03fd01c7 4505 dictSize((dict*)dstset->ptr)));
40d224a9 4506 server.dirty++;
4507 }
4508 zfree(dv);
4509}
4510
4511static void sunionCommand(redisClient *c) {
f4f56e1d 4512 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 4513}
4514
4515static void sunionstoreCommand(redisClient *c) {
f4f56e1d 4516 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
4517}
4518
4519static void sdiffCommand(redisClient *c) {
4520 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
4521}
4522
4523static void sdiffstoreCommand(redisClient *c) {
4524 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 4525}
4526
6b47e12e 4527/* ==================================== ZSets =============================== */
4528
4529/* ZSETs are ordered sets using two data structures to hold the same elements
4530 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4531 * data structure.
4532 *
4533 * The elements are added to an hash table mapping Redis objects to scores.
4534 * At the same time the elements are added to a skip list mapping scores
4535 * to Redis objects (so objects are sorted by scores in this "view"). */
4536
4537/* This skiplist implementation is almost a C translation of the original
4538 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4539 * Alternative to Balanced Trees", modified in three ways:
4540 * a) this implementation allows for repeated values.
4541 * b) the comparison is not just by key (our 'score') but by satellite data.
4542 * c) there is a back pointer, so it's a doubly linked list with the back
4543 * pointers being only at "level 1". This allows to traverse the list
4544 * from tail to head, useful for ZREVRANGE. */
4545
4546static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
4547 zskiplistNode *zn = zmalloc(sizeof(*zn));
4548
4549 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
4550 zn->score = score;
4551 zn->obj = obj;
4552 return zn;
4553}
4554
4555static zskiplist *zslCreate(void) {
4556 int j;
4557 zskiplist *zsl;
4558
4559 zsl = zmalloc(sizeof(*zsl));
4560 zsl->level = 1;
cc812361 4561 zsl->length = 0;
6b47e12e 4562 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
4563 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
4564 zsl->header->forward[j] = NULL;
e3870fab 4565 zsl->header->backward = NULL;
4566 zsl->tail = NULL;
6b47e12e 4567 return zsl;
4568}
4569
fd8ccf44 4570static void zslFreeNode(zskiplistNode *node) {
4571 decrRefCount(node->obj);
ad807e6f 4572 zfree(node->forward);
fd8ccf44 4573 zfree(node);
4574}
4575
4576static void zslFree(zskiplist *zsl) {
ad807e6f 4577 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 4578
ad807e6f 4579 zfree(zsl->header->forward);
4580 zfree(zsl->header);
fd8ccf44 4581 while(node) {
599379dd 4582 next = node->forward[0];
fd8ccf44 4583 zslFreeNode(node);
4584 node = next;
4585 }
ad807e6f 4586 zfree(zsl);
fd8ccf44 4587}
4588
6b47e12e 4589static int zslRandomLevel(void) {
4590 int level = 1;
4591 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
4592 level += 1;
4593 return level;
4594}
4595
4596static void zslInsert(zskiplist *zsl, double score, robj *obj) {
4597 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4598 int i, level;
4599
4600 x = zsl->header;
4601 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4602 while (x->forward[i] &&
4603 (x->forward[i]->score < score ||
4604 (x->forward[i]->score == score &&
4605 compareStringObjects(x->forward[i]->obj,obj) < 0)))
6b47e12e 4606 x = x->forward[i];
4607 update[i] = x;
4608 }
6b47e12e 4609 /* we assume the key is not already inside, since we allow duplicated
4610 * scores, and the re-insertion of score and redis object should never
4611 * happpen since the caller of zslInsert() should test in the hash table
4612 * if the element is already inside or not. */
4613 level = zslRandomLevel();
4614 if (level > zsl->level) {
4615 for (i = zsl->level; i < level; i++)
4616 update[i] = zsl->header;
4617 zsl->level = level;
4618 }
4619 x = zslCreateNode(level,score,obj);
4620 for (i = 0; i < level; i++) {
4621 x->forward[i] = update[i]->forward[i];
4622 update[i]->forward[i] = x;
4623 }
bb975144 4624 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 4625 if (x->forward[0])
4626 x->forward[0]->backward = x;
4627 else
4628 zsl->tail = x;
cc812361 4629 zsl->length++;
6b47e12e 4630}
4631
50c55df5 4632/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 4633static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 4634 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4635 int i;
4636
4637 x = zsl->header;
4638 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4639 while (x->forward[i] &&
4640 (x->forward[i]->score < score ||
4641 (x->forward[i]->score == score &&
4642 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 4643 x = x->forward[i];
4644 update[i] = x;
4645 }
4646 /* We may have multiple elements with the same score, what we need
4647 * is to find the element with both the right score and object. */
4648 x = x->forward[0];
50c55df5 4649 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
9d60e6e4 4650 for (i = 0; i < zsl->level; i++) {
4651 if (update[i]->forward[i] != x) break;
4652 update[i]->forward[i] = x->forward[i];
4653 }
4654 if (x->forward[0]) {
4655 x->forward[0]->backward = (x->backward == zsl->header) ?
4656 NULL : x->backward;
e197b441 4657 } else {
9d60e6e4 4658 zsl->tail = x->backward;
e197b441 4659 }
9d60e6e4 4660 zslFreeNode(x);
4661 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4662 zsl->level--;
4663 zsl->length--;
4664 return 1;
4665 } else {
4666 return 0; /* not found */
e197b441 4667 }
4668 return 0; /* not found */
fd8ccf44 4669}
4670
1807985b 4671/* Delete all the elements with score between min and max from the skiplist.
4672 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4673 * Note that this function takes the reference to the hash table view of the
4674 * sorted set, in order to remove the elements from the hash table too. */
4675static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4676 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4677 unsigned long removed = 0;
4678 int i;
4679
4680 x = zsl->header;
4681 for (i = zsl->level-1; i >= 0; i--) {
4682 while (x->forward[i] && x->forward[i]->score < min)
4683 x = x->forward[i];
4684 update[i] = x;
4685 }
4686 /* We may have multiple elements with the same score, what we need
4687 * is to find the element with both the right score and object. */
4688 x = x->forward[0];
4689 while (x && x->score <= max) {
4690 zskiplistNode *next;
4691
4692 for (i = 0; i < zsl->level; i++) {
4693 if (update[i]->forward[i] != x) break;
4694 update[i]->forward[i] = x->forward[i];
4695 }
4696 if (x->forward[0]) {
4697 x->forward[0]->backward = (x->backward == zsl->header) ?
4698 NULL : x->backward;
4699 } else {
4700 zsl->tail = x->backward;
4701 }
4702 next = x->forward[0];
4703 dictDelete(dict,x->obj);
4704 zslFreeNode(x);
4705 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4706 zsl->level--;
4707 zsl->length--;
4708 removed++;
4709 x = next;
4710 }
4711 return removed; /* not found */
4712}
4713
50c55df5 4714/* Find the first node having a score equal or greater than the specified one.
4715 * Returns NULL if there is no match. */
4716static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4717 zskiplistNode *x;
4718 int i;
4719
4720 x = zsl->header;
4721 for (i = zsl->level-1; i >= 0; i--) {
4722 while (x->forward[i] && x->forward[i]->score < score)
4723 x = x->forward[i];
4724 }
4725 /* We may have multiple elements with the same score, what we need
4726 * is to find the element with both the right score and object. */
4727 return x->forward[0];
4728}
4729
fd8ccf44 4730/* The actual Z-commands implementations */
4731
7db723ad 4732/* This generic command implements both ZADD and ZINCRBY.
e2665397 4733 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
7db723ad 4734 * the increment if the operation is a ZINCRBY (doincrement == 1). */
e2665397 4735static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
fd8ccf44 4736 robj *zsetobj;
4737 zset *zs;
4738 double *score;
4739
e2665397 4740 zsetobj = lookupKeyWrite(c->db,key);
fd8ccf44 4741 if (zsetobj == NULL) {
4742 zsetobj = createZsetObject();
e2665397 4743 dictAdd(c->db->dict,key,zsetobj);
4744 incrRefCount(key);
fd8ccf44 4745 } else {
4746 if (zsetobj->type != REDIS_ZSET) {
4747 addReply(c,shared.wrongtypeerr);
4748 return;
4749 }
4750 }
fd8ccf44 4751 zs = zsetobj->ptr;
e2665397 4752
7db723ad 4753 /* Ok now since we implement both ZADD and ZINCRBY here the code
e2665397 4754 * needs to handle the two different conditions. It's all about setting
4755 * '*score', that is, the new score to set, to the right value. */
4756 score = zmalloc(sizeof(double));
4757 if (doincrement) {
4758 dictEntry *de;
4759
4760 /* Read the old score. If the element was not present starts from 0 */
4761 de = dictFind(zs->dict,ele);
4762 if (de) {
4763 double *oldscore = dictGetEntryVal(de);
4764 *score = *oldscore + scoreval;
4765 } else {
4766 *score = scoreval;
4767 }
4768 } else {
4769 *score = scoreval;
4770 }
4771
4772 /* What follows is a simple remove and re-insert operation that is common
7db723ad 4773 * to both ZADD and ZINCRBY... */
e2665397 4774 if (dictAdd(zs->dict,ele,score) == DICT_OK) {
fd8ccf44 4775 /* case 1: New element */
e2665397 4776 incrRefCount(ele); /* added to hash */
4777 zslInsert(zs->zsl,*score,ele);
4778 incrRefCount(ele); /* added to skiplist */
fd8ccf44 4779 server.dirty++;
e2665397 4780 if (doincrement)
e2665397 4781 addReplyDouble(c,*score);
91d71bfc 4782 else
4783 addReply(c,shared.cone);
fd8ccf44 4784 } else {
4785 dictEntry *de;
4786 double *oldscore;
4787
4788 /* case 2: Score update operation */
e2665397 4789 de = dictFind(zs->dict,ele);
dfc5e96c 4790 redisAssert(de != NULL);
fd8ccf44 4791 oldscore = dictGetEntryVal(de);
4792 if (*score != *oldscore) {
4793 int deleted;
4794
e2665397 4795 /* Remove and insert the element in the skip list with new score */
4796 deleted = zslDelete(zs->zsl,*oldscore,ele);
dfc5e96c 4797 redisAssert(deleted != 0);
e2665397 4798 zslInsert(zs->zsl,*score,ele);
4799 incrRefCount(ele);
4800 /* Update the score in the hash table */
4801 dictReplace(zs->dict,ele,score);
fd8ccf44 4802 server.dirty++;
2161a965 4803 } else {
4804 zfree(score);
fd8ccf44 4805 }
e2665397 4806 if (doincrement)
4807 addReplyDouble(c,*score);
4808 else
4809 addReply(c,shared.czero);
fd8ccf44 4810 }
4811}
4812
e2665397 4813static void zaddCommand(redisClient *c) {
4814 double scoreval;
4815
4816 scoreval = strtod(c->argv[2]->ptr,NULL);
4817 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
4818}
4819
7db723ad 4820static void zincrbyCommand(redisClient *c) {
e2665397 4821 double scoreval;
4822
4823 scoreval = strtod(c->argv[2]->ptr,NULL);
4824 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
4825}
4826
1b7106e7 4827static void zremCommand(redisClient *c) {
4828 robj *zsetobj;
4829 zset *zs;
4830
4831 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4832 if (zsetobj == NULL) {
4833 addReply(c,shared.czero);
4834 } else {
4835 dictEntry *de;
4836 double *oldscore;
4837 int deleted;
4838
4839 if (zsetobj->type != REDIS_ZSET) {
4840 addReply(c,shared.wrongtypeerr);
4841 return;
4842 }
4843 zs = zsetobj->ptr;
4844 de = dictFind(zs->dict,c->argv[2]);
4845 if (de == NULL) {
4846 addReply(c,shared.czero);
4847 return;
4848 }
4849 /* Delete from the skiplist */
4850 oldscore = dictGetEntryVal(de);
4851 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
dfc5e96c 4852 redisAssert(deleted != 0);
1b7106e7 4853
4854 /* Delete from the hash table */
4855 dictDelete(zs->dict,c->argv[2]);
4856 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4857 server.dirty++;
4858 addReply(c,shared.cone);
4859 }
4860}
4861
1807985b 4862static void zremrangebyscoreCommand(redisClient *c) {
4863 double min = strtod(c->argv[2]->ptr,NULL);
4864 double max = strtod(c->argv[3]->ptr,NULL);
4865 robj *zsetobj;
4866 zset *zs;
4867
4868 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4869 if (zsetobj == NULL) {
4870 addReply(c,shared.czero);
4871 } else {
4872 long deleted;
4873
4874 if (zsetobj->type != REDIS_ZSET) {
4875 addReply(c,shared.wrongtypeerr);
4876 return;
4877 }
4878 zs = zsetobj->ptr;
4879 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4880 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4881 server.dirty += deleted;
4882 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4883 }
4884}
4885
e3870fab 4886static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 4887 robj *o;
4888 int start = atoi(c->argv[2]->ptr);
4889 int end = atoi(c->argv[3]->ptr);
752da584 4890 int withscores = 0;
4891
4892 if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
4893 withscores = 1;
4894 } else if (c->argc >= 5) {
4895 addReply(c,shared.syntaxerr);
4896 return;
4897 }
cc812361 4898
4899 o = lookupKeyRead(c->db,c->argv[1]);
4900 if (o == NULL) {
4901 addReply(c,shared.nullmultibulk);
4902 } else {
4903 if (o->type != REDIS_ZSET) {
4904 addReply(c,shared.wrongtypeerr);
4905 } else {
4906 zset *zsetobj = o->ptr;
4907 zskiplist *zsl = zsetobj->zsl;
4908 zskiplistNode *ln;
4909
4910 int llen = zsl->length;
4911 int rangelen, j;
4912 robj *ele;
4913
4914 /* convert negative indexes */
4915 if (start < 0) start = llen+start;
4916 if (end < 0) end = llen+end;
4917 if (start < 0) start = 0;
4918 if (end < 0) end = 0;
4919
4920 /* indexes sanity checks */
4921 if (start > end || start >= llen) {
4922 /* Out of range start or start > end result in empty list */
4923 addReply(c,shared.emptymultibulk);
4924 return;
4925 }
4926 if (end >= llen) end = llen-1;
4927 rangelen = (end-start)+1;
4928
4929 /* Return the result in form of a multi-bulk reply */
e3870fab 4930 if (reverse) {
4931 ln = zsl->tail;
4932 while (start--)
4933 ln = ln->backward;
4934 } else {
4935 ln = zsl->header->forward[0];
4936 while (start--)
4937 ln = ln->forward[0];
4938 }
cc812361 4939
752da584 4940 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
4941 withscores ? (rangelen*2) : rangelen));
cc812361 4942 for (j = 0; j < rangelen; j++) {
0aad7a19 4943 ele = ln->obj;
cc812361 4944 addReplyBulkLen(c,ele);
4945 addReply(c,ele);
4946 addReply(c,shared.crlf);
752da584 4947 if (withscores)
4948 addReplyDouble(c,ln->score);
e3870fab 4949 ln = reverse ? ln->backward : ln->forward[0];
cc812361 4950 }
4951 }
4952 }
4953}
4954
e3870fab 4955static void zrangeCommand(redisClient *c) {
4956 zrangeGenericCommand(c,0);
4957}
4958
4959static void zrevrangeCommand(redisClient *c) {
4960 zrangeGenericCommand(c,1);
4961}
4962
50c55df5 4963static void zrangebyscoreCommand(redisClient *c) {
4964 robj *o;
4965 double min = strtod(c->argv[2]->ptr,NULL);
4966 double max = strtod(c->argv[3]->ptr,NULL);
80181f78 4967 int offset = 0, limit = -1;
4968
4969 if (c->argc != 4 && c->argc != 7) {
454d4e43 4970 addReplySds(c,
4971 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
80181f78 4972 return;
4973 } else if (c->argc == 7 && strcasecmp(c->argv[4]->ptr,"limit")) {
4974 addReply(c,shared.syntaxerr);
4975 return;
4976 } else if (c->argc == 7) {
4977 offset = atoi(c->argv[5]->ptr);
4978 limit = atoi(c->argv[6]->ptr);
0b13687c 4979 if (offset < 0) offset = 0;
80181f78 4980 }
50c55df5 4981
4982 o = lookupKeyRead(c->db,c->argv[1]);
4983 if (o == NULL) {
4984 addReply(c,shared.nullmultibulk);
4985 } else {
4986 if (o->type != REDIS_ZSET) {
4987 addReply(c,shared.wrongtypeerr);
4988 } else {
4989 zset *zsetobj = o->ptr;
4990 zskiplist *zsl = zsetobj->zsl;
4991 zskiplistNode *ln;
4992 robj *ele, *lenobj;
4993 unsigned int rangelen = 0;
4994
4995 /* Get the first node with the score >= min */
4996 ln = zslFirstWithScore(zsl,min);
4997 if (ln == NULL) {
4998 /* No element matching the speciifed interval */
4999 addReply(c,shared.emptymultibulk);
5000 return;
5001 }
5002
5003 /* We don't know in advance how many matching elements there
5004 * are in the list, so we push this object that will represent
5005 * the multi-bulk length in the output buffer, and will "fix"
5006 * it later */
5007 lenobj = createObject(REDIS_STRING,NULL);
5008 addReply(c,lenobj);
c74e7c77 5009 decrRefCount(lenobj);
50c55df5 5010
dbbc7285 5011 while(ln && ln->score <= max) {
80181f78 5012 if (offset) {
5013 offset--;
5014 ln = ln->forward[0];
5015 continue;
5016 }
5017 if (limit == 0) break;
50c55df5 5018 ele = ln->obj;
5019 addReplyBulkLen(c,ele);
5020 addReply(c,ele);
5021 addReply(c,shared.crlf);
5022 ln = ln->forward[0];
5023 rangelen++;
80181f78 5024 if (limit > 0) limit--;
50c55df5 5025 }
5026 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
5027 }
5028 }
5029}
5030
3c41331e 5031static void zcardCommand(redisClient *c) {
e197b441 5032 robj *o;
5033 zset *zs;
5034
5035 o = lookupKeyRead(c->db,c->argv[1]);
5036 if (o == NULL) {
5037 addReply(c,shared.czero);
5038 return;
5039 } else {
5040 if (o->type != REDIS_ZSET) {
5041 addReply(c,shared.wrongtypeerr);
5042 } else {
5043 zs = o->ptr;
682ac724 5044 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",zs->zsl->length));
e197b441 5045 }
5046 }
5047}
5048
6e333bbe 5049static void zscoreCommand(redisClient *c) {
5050 robj *o;
5051 zset *zs;
5052
5053 o = lookupKeyRead(c->db,c->argv[1]);
5054 if (o == NULL) {
96d8b4ee 5055 addReply(c,shared.nullbulk);
6e333bbe 5056 return;
5057 } else {
5058 if (o->type != REDIS_ZSET) {
5059 addReply(c,shared.wrongtypeerr);
5060 } else {
5061 dictEntry *de;
5062
5063 zs = o->ptr;
5064 de = dictFind(zs->dict,c->argv[2]);
5065 if (!de) {
5066 addReply(c,shared.nullbulk);
5067 } else {
6e333bbe 5068 double *score = dictGetEntryVal(de);
5069
e2665397 5070 addReplyDouble(c,*score);
6e333bbe 5071 }
5072 }
5073 }
5074}
5075
6b47e12e 5076/* ========================= Non type-specific commands ==================== */
5077
ed9b544e 5078static void flushdbCommand(redisClient *c) {
ca37e9cd 5079 server.dirty += dictSize(c->db->dict);
3305306f 5080 dictEmpty(c->db->dict);
5081 dictEmpty(c->db->expires);
ed9b544e 5082 addReply(c,shared.ok);
ed9b544e 5083}
5084
5085static void flushallCommand(redisClient *c) {
ca37e9cd 5086 server.dirty += emptyDb();
ed9b544e 5087 addReply(c,shared.ok);
f78fd11b 5088 rdbSave(server.dbfilename);
ca37e9cd 5089 server.dirty++;
ed9b544e 5090}
5091
56906eef 5092static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 5093 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 5094 so->type = type;
5095 so->pattern = pattern;
5096 return so;
5097}
5098
5099/* Return the value associated to the key with a name obtained
5100 * substituting the first occurence of '*' in 'pattern' with 'subst' */
56906eef 5101static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
ed9b544e 5102 char *p;
5103 sds spat, ssub;
5104 robj keyobj;
5105 int prefixlen, sublen, postfixlen;
ed9b544e 5106 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
5107 struct {
f1017b3f 5108 long len;
5109 long free;
ed9b544e 5110 char buf[REDIS_SORTKEY_MAX+1];
5111 } keyname;
5112
28173a49 5113 /* If the pattern is "#" return the substitution object itself in order
5114 * to implement the "SORT ... GET #" feature. */
5115 spat = pattern->ptr;
5116 if (spat[0] == '#' && spat[1] == '\0') {
5117 return subst;
5118 }
5119
5120 /* The substitution object may be specially encoded. If so we create
9d65a1bb 5121 * a decoded object on the fly. Otherwise getDecodedObject will just
5122 * increment the ref count, that we'll decrement later. */
5123 subst = getDecodedObject(subst);
942a3961 5124
ed9b544e 5125 ssub = subst->ptr;
5126 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
5127 p = strchr(spat,'*');
ed5a857a 5128 if (!p) {
5129 decrRefCount(subst);
5130 return NULL;
5131 }
ed9b544e 5132
5133 prefixlen = p-spat;
5134 sublen = sdslen(ssub);
5135 postfixlen = sdslen(spat)-(prefixlen+1);
5136 memcpy(keyname.buf,spat,prefixlen);
5137 memcpy(keyname.buf+prefixlen,ssub,sublen);
5138 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
5139 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
5140 keyname.len = prefixlen+sublen+postfixlen;
5141
dfc5e96c 5142 initStaticStringObject(keyobj,((char*)&keyname)+(sizeof(long)*2))
942a3961 5143 decrRefCount(subst);
5144
a4d1ba9a 5145 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3305306f 5146 return lookupKeyRead(db,&keyobj);
ed9b544e 5147}
5148
5149/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
5150 * the additional parameter is not standard but a BSD-specific we have to
5151 * pass sorting parameters via the global 'server' structure */
5152static int sortCompare(const void *s1, const void *s2) {
5153 const redisSortObject *so1 = s1, *so2 = s2;
5154 int cmp;
5155
5156 if (!server.sort_alpha) {
5157 /* Numeric sorting. Here it's trivial as we precomputed scores */
5158 if (so1->u.score > so2->u.score) {
5159 cmp = 1;
5160 } else if (so1->u.score < so2->u.score) {
5161 cmp = -1;
5162 } else {
5163 cmp = 0;
5164 }
5165 } else {
5166 /* Alphanumeric sorting */
5167 if (server.sort_bypattern) {
5168 if (!so1->u.cmpobj || !so2->u.cmpobj) {
5169 /* At least one compare object is NULL */
5170 if (so1->u.cmpobj == so2->u.cmpobj)
5171 cmp = 0;
5172 else if (so1->u.cmpobj == NULL)
5173 cmp = -1;
5174 else
5175 cmp = 1;
5176 } else {
5177 /* We have both the objects, use strcoll */
5178 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
5179 }
5180 } else {
5181 /* Compare elements directly */
9d65a1bb 5182 robj *dec1, *dec2;
5183
5184 dec1 = getDecodedObject(so1->obj);
5185 dec2 = getDecodedObject(so2->obj);
5186 cmp = strcoll(dec1->ptr,dec2->ptr);
5187 decrRefCount(dec1);
5188 decrRefCount(dec2);
ed9b544e 5189 }
5190 }
5191 return server.sort_desc ? -cmp : cmp;
5192}
5193
5194/* The SORT command is the most complex command in Redis. Warning: this code
5195 * is optimized for speed and a bit less for readability */
5196static void sortCommand(redisClient *c) {
ed9b544e 5197 list *operations;
5198 int outputlen = 0;
5199 int desc = 0, alpha = 0;
5200 int limit_start = 0, limit_count = -1, start, end;
5201 int j, dontsort = 0, vectorlen;
5202 int getop = 0; /* GET operation counter */
443c6409 5203 robj *sortval, *sortby = NULL, *storekey = NULL;
ed9b544e 5204 redisSortObject *vector; /* Resulting vector to sort */
5205
5206 /* Lookup the key to sort. It must be of the right types */
3305306f 5207 sortval = lookupKeyRead(c->db,c->argv[1]);
5208 if (sortval == NULL) {
d922ae65 5209 addReply(c,shared.nullmultibulk);
ed9b544e 5210 return;
5211 }
a5eb649b 5212 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
5213 sortval->type != REDIS_ZSET)
5214 {
c937aa89 5215 addReply(c,shared.wrongtypeerr);
ed9b544e 5216 return;
5217 }
5218
5219 /* Create a list of operations to perform for every sorted element.
5220 * Operations can be GET/DEL/INCR/DECR */
5221 operations = listCreate();
092dac2a 5222 listSetFreeMethod(operations,zfree);
ed9b544e 5223 j = 2;
5224
5225 /* Now we need to protect sortval incrementing its count, in the future
5226 * SORT may have options able to overwrite/delete keys during the sorting
5227 * and the sorted key itself may get destroied */
5228 incrRefCount(sortval);
5229
5230 /* The SORT command has an SQL-alike syntax, parse it */
5231 while(j < c->argc) {
5232 int leftargs = c->argc-j-1;
5233 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
5234 desc = 0;
5235 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
5236 desc = 1;
5237 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
5238 alpha = 1;
5239 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
5240 limit_start = atoi(c->argv[j+1]->ptr);
5241 limit_count = atoi(c->argv[j+2]->ptr);
5242 j+=2;
443c6409 5243 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
5244 storekey = c->argv[j+1];
5245 j++;
ed9b544e 5246 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
5247 sortby = c->argv[j+1];
5248 /* If the BY pattern does not contain '*', i.e. it is constant,
5249 * we don't need to sort nor to lookup the weight keys. */
5250 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
5251 j++;
5252 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
5253 listAddNodeTail(operations,createSortOperation(
5254 REDIS_SORT_GET,c->argv[j+1]));
5255 getop++;
5256 j++;
ed9b544e 5257 } else {
5258 decrRefCount(sortval);
5259 listRelease(operations);
c937aa89 5260 addReply(c,shared.syntaxerr);
ed9b544e 5261 return;
5262 }
5263 j++;
5264 }
5265
5266 /* Load the sorting vector with all the objects to sort */
a5eb649b 5267 switch(sortval->type) {
5268 case REDIS_LIST: vectorlen = listLength((list*)sortval->ptr); break;
5269 case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break;
5270 case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
dfc5e96c 5271 default: vectorlen = 0; redisAssert(0); /* Avoid GCC warning */
a5eb649b 5272 }
ed9b544e 5273 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 5274 j = 0;
a5eb649b 5275
ed9b544e 5276 if (sortval->type == REDIS_LIST) {
5277 list *list = sortval->ptr;
6208b3a7 5278 listNode *ln;
5279
5280 listRewind(list);
5281 while((ln = listYield(list))) {
ed9b544e 5282 robj *ele = ln->value;
5283 vector[j].obj = ele;
5284 vector[j].u.score = 0;
5285 vector[j].u.cmpobj = NULL;
ed9b544e 5286 j++;
5287 }
5288 } else {
a5eb649b 5289 dict *set;
ed9b544e 5290 dictIterator *di;
5291 dictEntry *setele;
5292
a5eb649b 5293 if (sortval->type == REDIS_SET) {
5294 set = sortval->ptr;
5295 } else {
5296 zset *zs = sortval->ptr;
5297 set = zs->dict;
5298 }
5299
ed9b544e 5300 di = dictGetIterator(set);
ed9b544e 5301 while((setele = dictNext(di)) != NULL) {
5302 vector[j].obj = dictGetEntryKey(setele);
5303 vector[j].u.score = 0;
5304 vector[j].u.cmpobj = NULL;
5305 j++;
5306 }
5307 dictReleaseIterator(di);
5308 }
dfc5e96c 5309 redisAssert(j == vectorlen);
ed9b544e 5310
5311 /* Now it's time to load the right scores in the sorting vector */
5312 if (dontsort == 0) {
5313 for (j = 0; j < vectorlen; j++) {
5314 if (sortby) {
5315 robj *byval;
5316
3305306f 5317 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
ed9b544e 5318 if (!byval || byval->type != REDIS_STRING) continue;
5319 if (alpha) {
9d65a1bb 5320 vector[j].u.cmpobj = getDecodedObject(byval);
ed9b544e 5321 } else {
942a3961 5322 if (byval->encoding == REDIS_ENCODING_RAW) {
5323 vector[j].u.score = strtod(byval->ptr,NULL);
5324 } else {
9d65a1bb 5325 /* Don't need to decode the object if it's
5326 * integer-encoded (the only encoding supported) so
5327 * far. We can just cast it */
f1017b3f 5328 if (byval->encoding == REDIS_ENCODING_INT) {
942a3961 5329 vector[j].u.score = (long)byval->ptr;
f1017b3f 5330 } else
dfc5e96c 5331 redisAssert(1 != 1);
942a3961 5332 }
ed9b544e 5333 }
5334 } else {
942a3961 5335 if (!alpha) {
5336 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
5337 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
5338 else {
5339 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
5340 vector[j].u.score = (long) vector[j].obj->ptr;
5341 else
dfc5e96c 5342 redisAssert(1 != 1);
942a3961 5343 }
5344 }
ed9b544e 5345 }
5346 }
5347 }
5348
5349 /* We are ready to sort the vector... perform a bit of sanity check
5350 * on the LIMIT option too. We'll use a partial version of quicksort. */
5351 start = (limit_start < 0) ? 0 : limit_start;
5352 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
5353 if (start >= vectorlen) {
5354 start = vectorlen-1;
5355 end = vectorlen-2;
5356 }
5357 if (end >= vectorlen) end = vectorlen-1;
5358
5359 if (dontsort == 0) {
5360 server.sort_desc = desc;
5361 server.sort_alpha = alpha;
5362 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 5363 if (sortby && (start != 0 || end != vectorlen-1))
5364 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
5365 else
5366 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 5367 }
5368
5369 /* Send command output to the output buffer, performing the specified
5370 * GET/DEL/INCR/DECR operations if any. */
5371 outputlen = getop ? getop*(end-start+1) : end-start+1;
443c6409 5372 if (storekey == NULL) {
5373 /* STORE option not specified, sent the sorting result to client */
5374 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
5375 for (j = start; j <= end; j++) {
5376 listNode *ln;
5377 if (!getop) {
5378 addReplyBulkLen(c,vector[j].obj);
5379 addReply(c,vector[j].obj);
5380 addReply(c,shared.crlf);
5381 }
5382 listRewind(operations);
5383 while((ln = listYield(operations))) {
5384 redisSortOperation *sop = ln->value;
5385 robj *val = lookupKeyByPattern(c->db,sop->pattern,
5386 vector[j].obj);
5387
5388 if (sop->type == REDIS_SORT_GET) {
5389 if (!val || val->type != REDIS_STRING) {
5390 addReply(c,shared.nullbulk);
5391 } else {
5392 addReplyBulkLen(c,val);
5393 addReply(c,val);
5394 addReply(c,shared.crlf);
5395 }
5396 } else {
dfc5e96c 5397 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
443c6409 5398 }
5399 }
ed9b544e 5400 }
443c6409 5401 } else {
5402 robj *listObject = createListObject();
5403 list *listPtr = (list*) listObject->ptr;
5404
5405 /* STORE option specified, set the sorting result as a List object */
5406 for (j = start; j <= end; j++) {
5407 listNode *ln;
5408 if (!getop) {
5409 listAddNodeTail(listPtr,vector[j].obj);
5410 incrRefCount(vector[j].obj);
5411 }
5412 listRewind(operations);
5413 while((ln = listYield(operations))) {
5414 redisSortOperation *sop = ln->value;
5415 robj *val = lookupKeyByPattern(c->db,sop->pattern,
5416 vector[j].obj);
5417
5418 if (sop->type == REDIS_SORT_GET) {
5419 if (!val || val->type != REDIS_STRING) {
5420 listAddNodeTail(listPtr,createStringObject("",0));
5421 } else {
5422 listAddNodeTail(listPtr,val);
5423 incrRefCount(val);
5424 }
ed9b544e 5425 } else {
dfc5e96c 5426 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
ed9b544e 5427 }
ed9b544e 5428 }
ed9b544e 5429 }
121796f7 5430 if (dictReplace(c->db->dict,storekey,listObject)) {
5431 incrRefCount(storekey);
5432 }
443c6409 5433 /* Note: we add 1 because the DB is dirty anyway since even if the
5434 * SORT result is empty a new key is set and maybe the old content
5435 * replaced. */
5436 server.dirty += 1+outputlen;
5437 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
ed9b544e 5438 }
5439
5440 /* Cleanup */
5441 decrRefCount(sortval);
5442 listRelease(operations);
5443 for (j = 0; j < vectorlen; j++) {
5444 if (sortby && alpha && vector[j].u.cmpobj)
5445 decrRefCount(vector[j].u.cmpobj);
5446 }
5447 zfree(vector);
5448}
5449
ec6c7a1d 5450/* Convert an amount of bytes into a human readable string in the form
5451 * of 100B, 2G, 100M, 4K, and so forth. */
5452static void bytesToHuman(char *s, unsigned long long n) {
5453 double d;
5454
5455 if (n < 1024) {
5456 /* Bytes */
5457 sprintf(s,"%lluB",n);
5458 return;
5459 } else if (n < (1024*1024)) {
5460 d = (double)n/(1024);
5461 sprintf(s,"%.2fK",d);
5462 } else if (n < (1024LL*1024*1024)) {
5463 d = (double)n/(1024*1024);
5464 sprintf(s,"%.2fM",d);
5465 } else if (n < (1024LL*1024*1024*1024)) {
5466 d = (double)n/(1024LL*1024*1024);
5467 sprintf(s,"%.2fM",d);
5468 }
5469}
5470
1c85b79f 5471/* Create the string returned by the INFO command. This is decoupled
5472 * by the INFO command itself as we need to report the same information
5473 * on memory corruption problems. */
5474static sds genRedisInfoString(void) {
ed9b544e 5475 sds info;
5476 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 5477 int j;
ec6c7a1d 5478 char hmem[64];
5479
5480 bytesToHuman(hmem,server.usedmemory);
ed9b544e 5481 info = sdscatprintf(sdsempty(),
5482 "redis_version:%s\r\n"
f1017b3f 5483 "arch_bits:%s\r\n"
7a932b74 5484 "multiplexing_api:%s\r\n"
0d7170a4 5485 "process_id:%ld\r\n"
682ac724 5486 "uptime_in_seconds:%ld\r\n"
5487 "uptime_in_days:%ld\r\n"
ed9b544e 5488 "connected_clients:%d\r\n"
5489 "connected_slaves:%d\r\n"
f86a74e9 5490 "blocked_clients:%d\r\n"
5fba9f71 5491 "used_memory:%zu\r\n"
ec6c7a1d 5492 "used_memory_human:%s\r\n"
ed9b544e 5493 "changes_since_last_save:%lld\r\n"
be2bb6b0 5494 "bgsave_in_progress:%d\r\n"
682ac724 5495 "last_save_time:%ld\r\n"
b3fad521 5496 "bgrewriteaof_in_progress:%d\r\n"
ed9b544e 5497 "total_connections_received:%lld\r\n"
5498 "total_commands_processed:%lld\r\n"
7d98e08c 5499 "vm_enabled:%d\r\n"
a0f643ea 5500 "role:%s\r\n"
ed9b544e 5501 ,REDIS_VERSION,
f1017b3f 5502 (sizeof(long) == 8) ? "64" : "32",
7a932b74 5503 aeGetApiName(),
0d7170a4 5504 (long) getpid(),
a0f643ea 5505 uptime,
5506 uptime/(3600*24),
ed9b544e 5507 listLength(server.clients)-listLength(server.slaves),
5508 listLength(server.slaves),
f86a74e9 5509 server.blockedclients,
ed9b544e 5510 server.usedmemory,
ec6c7a1d 5511 hmem,
ed9b544e 5512 server.dirty,
9d65a1bb 5513 server.bgsavechildpid != -1,
ed9b544e 5514 server.lastsave,
b3fad521 5515 server.bgrewritechildpid != -1,
ed9b544e 5516 server.stat_numconnections,
5517 server.stat_numcommands,
7d98e08c 5518 server.vm_enabled != 0,
a0f643ea 5519 server.masterhost == NULL ? "master" : "slave"
ed9b544e 5520 );
a0f643ea 5521 if (server.masterhost) {
5522 info = sdscatprintf(info,
5523 "master_host:%s\r\n"
5524 "master_port:%d\r\n"
5525 "master_link_status:%s\r\n"
5526 "master_last_io_seconds_ago:%d\r\n"
5527 ,server.masterhost,
5528 server.masterport,
5529 (server.replstate == REDIS_REPL_CONNECTED) ?
5530 "up" : "down",
f72b934d 5531 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 5532 );
5533 }
7d98e08c 5534 if (server.vm_enabled) {
5535 info = sdscatprintf(info,
5536 "vm_conf_max_memory:%llu\r\n"
5537 "vm_conf_page_size:%llu\r\n"
5538 "vm_conf_pages:%llu\r\n"
5539 "vm_stats_used_pages:%llu\r\n"
5540 "vm_stats_swapped_objects:%llu\r\n"
5541 "vm_stats_swappin_count:%llu\r\n"
5542 "vm_stats_swappout_count:%llu\r\n"
5543 ,(unsigned long long) server.vm_max_memory,
5544 (unsigned long long) server.vm_page_size,
5545 (unsigned long long) server.vm_pages,
5546 (unsigned long long) server.vm_stats_used_pages,
5547 (unsigned long long) server.vm_stats_swapped_objects,
5548 (unsigned long long) server.vm_stats_swapins,
5549 (unsigned long long) server.vm_stats_swapouts
5550 );
5551 }
c3cb078d 5552 for (j = 0; j < server.dbnum; j++) {
5553 long long keys, vkeys;
5554
5555 keys = dictSize(server.db[j].dict);
5556 vkeys = dictSize(server.db[j].expires);
5557 if (keys || vkeys) {
9d65a1bb 5558 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
c3cb078d 5559 j, keys, vkeys);
5560 }
5561 }
1c85b79f 5562 return info;
5563}
5564
5565static void infoCommand(redisClient *c) {
5566 sds info = genRedisInfoString();
83c6a618 5567 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
5568 (unsigned long)sdslen(info)));
ed9b544e 5569 addReplySds(c,info);
70003d28 5570 addReply(c,shared.crlf);
ed9b544e 5571}
5572
3305306f 5573static void monitorCommand(redisClient *c) {
5574 /* ignore MONITOR if aleady slave or in monitor mode */
5575 if (c->flags & REDIS_SLAVE) return;
5576
5577 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
5578 c->slaveseldb = 0;
6b47e12e 5579 listAddNodeTail(server.monitors,c);
3305306f 5580 addReply(c,shared.ok);
5581}
5582
5583/* ================================= Expire ================================= */
5584static int removeExpire(redisDb *db, robj *key) {
5585 if (dictDelete(db->expires,key) == DICT_OK) {
5586 return 1;
5587 } else {
5588 return 0;
5589 }
5590}
5591
5592static int setExpire(redisDb *db, robj *key, time_t when) {
5593 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
5594 return 0;
5595 } else {
5596 incrRefCount(key);
5597 return 1;
5598 }
5599}
5600
bb32ede5 5601/* Return the expire time of the specified key, or -1 if no expire
5602 * is associated with this key (i.e. the key is non volatile) */
5603static time_t getExpire(redisDb *db, robj *key) {
5604 dictEntry *de;
5605
5606 /* No expire? return ASAP */
5607 if (dictSize(db->expires) == 0 ||
5608 (de = dictFind(db->expires,key)) == NULL) return -1;
5609
5610 return (time_t) dictGetEntryVal(de);
5611}
5612
3305306f 5613static int expireIfNeeded(redisDb *db, robj *key) {
5614 time_t when;
5615 dictEntry *de;
5616
5617 /* No expire? return ASAP */
5618 if (dictSize(db->expires) == 0 ||
5619 (de = dictFind(db->expires,key)) == NULL) return 0;
5620
5621 /* Lookup the expire */
5622 when = (time_t) dictGetEntryVal(de);
5623 if (time(NULL) <= when) return 0;
5624
5625 /* Delete the key */
5626 dictDelete(db->expires,key);
5627 return dictDelete(db->dict,key) == DICT_OK;
5628}
5629
5630static int deleteIfVolatile(redisDb *db, robj *key) {
5631 dictEntry *de;
5632
5633 /* No expire? return ASAP */
5634 if (dictSize(db->expires) == 0 ||
5635 (de = dictFind(db->expires,key)) == NULL) return 0;
5636
5637 /* Delete the key */
0c66a471 5638 server.dirty++;
3305306f 5639 dictDelete(db->expires,key);
5640 return dictDelete(db->dict,key) == DICT_OK;
5641}
5642
802e8373 5643static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
3305306f 5644 dictEntry *de;
3305306f 5645
802e8373 5646 de = dictFind(c->db->dict,key);
3305306f 5647 if (de == NULL) {
5648 addReply(c,shared.czero);
5649 return;
5650 }
43e5ccdf 5651 if (seconds < 0) {
5652 if (deleteKey(c->db,key)) server.dirty++;
5653 addReply(c, shared.cone);
3305306f 5654 return;
5655 } else {
5656 time_t when = time(NULL)+seconds;
802e8373 5657 if (setExpire(c->db,key,when)) {
3305306f 5658 addReply(c,shared.cone);
77423026 5659 server.dirty++;
5660 } else {
3305306f 5661 addReply(c,shared.czero);
77423026 5662 }
3305306f 5663 return;
5664 }
5665}
5666
802e8373 5667static void expireCommand(redisClient *c) {
5668 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
5669}
5670
5671static void expireatCommand(redisClient *c) {
5672 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
5673}
5674
fd88489a 5675static void ttlCommand(redisClient *c) {
5676 time_t expire;
5677 int ttl = -1;
5678
5679 expire = getExpire(c->db,c->argv[1]);
5680 if (expire != -1) {
5681 ttl = (int) (expire-time(NULL));
5682 if (ttl < 0) ttl = -1;
5683 }
5684 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
5685}
5686
6e469882 5687/* ================================ MULTI/EXEC ============================== */
5688
5689/* Client state initialization for MULTI/EXEC */
5690static void initClientMultiState(redisClient *c) {
5691 c->mstate.commands = NULL;
5692 c->mstate.count = 0;
5693}
5694
5695/* Release all the resources associated with MULTI/EXEC state */
5696static void freeClientMultiState(redisClient *c) {
5697 int j;
5698
5699 for (j = 0; j < c->mstate.count; j++) {
5700 int i;
5701 multiCmd *mc = c->mstate.commands+j;
5702
5703 for (i = 0; i < mc->argc; i++)
5704 decrRefCount(mc->argv[i]);
5705 zfree(mc->argv);
5706 }
5707 zfree(c->mstate.commands);
5708}
5709
5710/* Add a new command into the MULTI commands queue */
5711static void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
5712 multiCmd *mc;
5713 int j;
5714
5715 c->mstate.commands = zrealloc(c->mstate.commands,
5716 sizeof(multiCmd)*(c->mstate.count+1));
5717 mc = c->mstate.commands+c->mstate.count;
5718 mc->cmd = cmd;
5719 mc->argc = c->argc;
5720 mc->argv = zmalloc(sizeof(robj*)*c->argc);
5721 memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
5722 for (j = 0; j < c->argc; j++)
5723 incrRefCount(mc->argv[j]);
5724 c->mstate.count++;
5725}
5726
5727static void multiCommand(redisClient *c) {
5728 c->flags |= REDIS_MULTI;
36c548f0 5729 addReply(c,shared.ok);
6e469882 5730}
5731
5732static void execCommand(redisClient *c) {
5733 int j;
5734 robj **orig_argv;
5735 int orig_argc;
5736
5737 if (!(c->flags & REDIS_MULTI)) {
5738 addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
5739 return;
5740 }
5741
5742 orig_argv = c->argv;
5743 orig_argc = c->argc;
5744 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
5745 for (j = 0; j < c->mstate.count; j++) {
5746 c->argc = c->mstate.commands[j].argc;
5747 c->argv = c->mstate.commands[j].argv;
5748 call(c,c->mstate.commands[j].cmd);
5749 }
5750 c->argv = orig_argv;
5751 c->argc = orig_argc;
5752 freeClientMultiState(c);
5753 initClientMultiState(c);
5754 c->flags &= (~REDIS_MULTI);
5755}
5756
4409877e 5757/* =========================== Blocking Operations ========================= */
5758
5759/* Currently Redis blocking operations support is limited to list POP ops,
5760 * so the current implementation is not fully generic, but it is also not
5761 * completely specific so it will not require a rewrite to support new
5762 * kind of blocking operations in the future.
5763 *
5764 * Still it's important to note that list blocking operations can be already
5765 * used as a notification mechanism in order to implement other blocking
5766 * operations at application level, so there must be a very strong evidence
5767 * of usefulness and generality before new blocking operations are implemented.
5768 *
5769 * This is how the current blocking POP works, we use BLPOP as example:
5770 * - If the user calls BLPOP and the key exists and contains a non empty list
5771 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
5772 * if there is not to block.
5773 * - If instead BLPOP is called and the key does not exists or the list is
5774 * empty we need to block. In order to do so we remove the notification for
5775 * new data to read in the client socket (so that we'll not serve new
5776 * requests if the blocking request is not served). Also we put the client
95242ab5 5777 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
4409877e 5778 * blocking for this keys.
5779 * - If a PUSH operation against a key with blocked clients waiting is
5780 * performed, we serve the first in the list: basically instead to push
5781 * the new element inside the list we return it to the (first / oldest)
5782 * blocking client, unblock the client, and remove it form the list.
5783 *
5784 * The above comment and the source code should be enough in order to understand
5785 * the implementation and modify / fix it later.
5786 */
5787
5788/* Set a client in blocking mode for the specified key, with the specified
5789 * timeout */
b177fd30 5790static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
4409877e 5791 dictEntry *de;
5792 list *l;
b177fd30 5793 int j;
4409877e 5794
b177fd30 5795 c->blockingkeys = zmalloc(sizeof(robj*)*numkeys);
5796 c->blockingkeysnum = numkeys;
4409877e 5797 c->blockingto = timeout;
b177fd30 5798 for (j = 0; j < numkeys; j++) {
5799 /* Add the key in the client structure, to map clients -> keys */
5800 c->blockingkeys[j] = keys[j];
5801 incrRefCount(keys[j]);
4409877e 5802
b177fd30 5803 /* And in the other "side", to map keys -> clients */
5804 de = dictFind(c->db->blockingkeys,keys[j]);
5805 if (de == NULL) {
5806 int retval;
5807
5808 /* For every key we take a list of clients blocked for it */
5809 l = listCreate();
5810 retval = dictAdd(c->db->blockingkeys,keys[j],l);
5811 incrRefCount(keys[j]);
5812 assert(retval == DICT_OK);
5813 } else {
5814 l = dictGetEntryVal(de);
5815 }
5816 listAddNodeTail(l,c);
4409877e 5817 }
b177fd30 5818 /* Mark the client as a blocked client */
4409877e 5819 c->flags |= REDIS_BLOCKED;
5820 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
f86a74e9 5821 server.blockedclients++;
4409877e 5822}
5823
5824/* Unblock a client that's waiting in a blocking operation such as BLPOP */
5825static void unblockClient(redisClient *c) {
5826 dictEntry *de;
5827 list *l;
b177fd30 5828 int j;
4409877e 5829
b177fd30 5830 assert(c->blockingkeys != NULL);
5831 /* The client may wait for multiple keys, so unblock it for every key. */
5832 for (j = 0; j < c->blockingkeysnum; j++) {
5833 /* Remove this client from the list of clients waiting for this key. */
5834 de = dictFind(c->db->blockingkeys,c->blockingkeys[j]);
5835 assert(de != NULL);
5836 l = dictGetEntryVal(de);
5837 listDelNode(l,listSearchKey(l,c));
5838 /* If the list is empty we need to remove it to avoid wasting memory */
5839 if (listLength(l) == 0)
5840 dictDelete(c->db->blockingkeys,c->blockingkeys[j]);
5841 decrRefCount(c->blockingkeys[j]);
5842 }
5843 /* Cleanup the client structure */
5844 zfree(c->blockingkeys);
5845 c->blockingkeys = NULL;
4409877e 5846 c->flags &= (~REDIS_BLOCKED);
f86a74e9 5847 server.blockedclients--;
4409877e 5848 /* Ok now we are ready to get read events from socket, note that we
5849 * can't trap errors here as it's possible that unblockClients() is
5850 * called from freeClient() itself, and the only thing we can do
5851 * if we failed to register the READABLE event is to kill the client.
5852 * Still the following function should never fail in the real world as
5853 * we are sure the file descriptor is sane, and we exit on out of mem. */
5854 aeCreateFileEvent(server.el, c->fd, AE_READABLE, readQueryFromClient, c);
5855 /* As a final step we want to process data if there is some command waiting
5856 * in the input buffer. Note that this is safe even if unblockClient()
5857 * gets called from freeClient() because freeClient() will be smart
5858 * enough to call this function *after* c->querybuf was set to NULL. */
5859 if (c->querybuf && sdslen(c->querybuf) > 0) processInputBuffer(c);
5860}
5861
5862/* This should be called from any function PUSHing into lists.
5863 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
5864 * 'ele' is the element pushed.
5865 *
5866 * If the function returns 0 there was no client waiting for a list push
5867 * against this key.
5868 *
5869 * If the function returns 1 there was a client waiting for a list push
5870 * against this key, the element was passed to this client thus it's not
5871 * needed to actually add it to the list and the caller should return asap. */
5872static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
5873 struct dictEntry *de;
5874 redisClient *receiver;
5875 list *l;
5876 listNode *ln;
5877
5878 de = dictFind(c->db->blockingkeys,key);
5879 if (de == NULL) return 0;
5880 l = dictGetEntryVal(de);
5881 ln = listFirst(l);
5882 assert(ln != NULL);
5883 receiver = ln->value;
4409877e 5884
b177fd30 5885 addReplySds(receiver,sdsnew("*2\r\n"));
5886 addReplyBulkLen(receiver,key);
5887 addReply(receiver,key);
5888 addReply(receiver,shared.crlf);
4409877e 5889 addReplyBulkLen(receiver,ele);
5890 addReply(receiver,ele);
5891 addReply(receiver,shared.crlf);
5892 unblockClient(receiver);
5893 return 1;
5894}
5895
5896/* Blocking RPOP/LPOP */
5897static void blockingPopGenericCommand(redisClient *c, int where) {
5898 robj *o;
5899 time_t timeout;
b177fd30 5900 int j;
4409877e 5901
b177fd30 5902 for (j = 1; j < c->argc-1; j++) {
5903 o = lookupKeyWrite(c->db,c->argv[j]);
5904 if (o != NULL) {
5905 if (o->type != REDIS_LIST) {
5906 addReply(c,shared.wrongtypeerr);
4409877e 5907 return;
b177fd30 5908 } else {
5909 list *list = o->ptr;
5910 if (listLength(list) != 0) {
5911 /* If the list contains elements fall back to the usual
5912 * non-blocking POP operation */
5913 robj *argv[2], **orig_argv;
5914 int orig_argc;
5915
5916 /* We need to alter the command arguments before to call
5917 * popGenericCommand() as the command takes a single key. */
5918 orig_argv = c->argv;
5919 orig_argc = c->argc;
5920 argv[1] = c->argv[j];
5921 c->argv = argv;
5922 c->argc = 2;
5923
5924 /* Also the return value is different, we need to output
5925 * the multi bulk reply header and the key name. The
5926 * "real" command will add the last element (the value)
5927 * for us. If this souds like an hack to you it's just
5928 * because it is... */
5929 addReplySds(c,sdsnew("*2\r\n"));
5930 addReplyBulkLen(c,argv[1]);
5931 addReply(c,argv[1]);
5932 addReply(c,shared.crlf);
5933 popGenericCommand(c,where);
5934
5935 /* Fix the client structure with the original stuff */
5936 c->argv = orig_argv;
5937 c->argc = orig_argc;
5938 return;
5939 }
4409877e 5940 }
5941 }
5942 }
5943 /* If the list is empty or the key does not exists we must block */
b177fd30 5944 timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
4409877e 5945 if (timeout > 0) timeout += time(NULL);
b177fd30 5946 blockForKeys(c,c->argv+1,c->argc-2,timeout);
4409877e 5947}
5948
5949static void blpopCommand(redisClient *c) {
5950 blockingPopGenericCommand(c,REDIS_HEAD);
5951}
5952
5953static void brpopCommand(redisClient *c) {
5954 blockingPopGenericCommand(c,REDIS_TAIL);
5955}
5956
ed9b544e 5957/* =============================== Replication ============================= */
5958
a4d1ba9a 5959static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5960 ssize_t nwritten, ret = size;
5961 time_t start = time(NULL);
5962
5963 timeout++;
5964 while(size) {
5965 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
5966 nwritten = write(fd,ptr,size);
5967 if (nwritten == -1) return -1;
5968 ptr += nwritten;
5969 size -= nwritten;
5970 }
5971 if ((time(NULL)-start) > timeout) {
5972 errno = ETIMEDOUT;
5973 return -1;
5974 }
5975 }
5976 return ret;
5977}
5978
a4d1ba9a 5979static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5980 ssize_t nread, totread = 0;
5981 time_t start = time(NULL);
5982
5983 timeout++;
5984 while(size) {
5985 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
5986 nread = read(fd,ptr,size);
5987 if (nread == -1) return -1;
5988 ptr += nread;
5989 size -= nread;
5990 totread += nread;
5991 }
5992 if ((time(NULL)-start) > timeout) {
5993 errno = ETIMEDOUT;
5994 return -1;
5995 }
5996 }
5997 return totread;
5998}
5999
6000static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
6001 ssize_t nread = 0;
6002
6003 size--;
6004 while(size) {
6005 char c;
6006
6007 if (syncRead(fd,&c,1,timeout) == -1) return -1;
6008 if (c == '\n') {
6009 *ptr = '\0';
6010 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
6011 return nread;
6012 } else {
6013 *ptr++ = c;
6014 *ptr = '\0';
6015 nread++;
6016 }
6017 }
6018 return nread;
6019}
6020
6021static void syncCommand(redisClient *c) {
40d224a9 6022 /* ignore SYNC if aleady slave or in monitor mode */
6023 if (c->flags & REDIS_SLAVE) return;
6024
6025 /* SYNC can't be issued when the server has pending data to send to
6026 * the client about already issued commands. We need a fresh reply
6027 * buffer registering the differences between the BGSAVE and the current
6028 * dataset, so that we can copy to other slaves if needed. */
6029 if (listLength(c->reply) != 0) {
6030 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
6031 return;
6032 }
6033
6034 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
6035 /* Here we need to check if there is a background saving operation
6036 * in progress, or if it is required to start one */
9d65a1bb 6037 if (server.bgsavechildpid != -1) {
40d224a9 6038 /* Ok a background save is in progress. Let's check if it is a good
6039 * one for replication, i.e. if there is another slave that is
6040 * registering differences since the server forked to save */
6041 redisClient *slave;
6042 listNode *ln;
6043
6208b3a7 6044 listRewind(server.slaves);
6045 while((ln = listYield(server.slaves))) {
40d224a9 6046 slave = ln->value;
6047 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 6048 }
6049 if (ln) {
6050 /* Perfect, the server is already registering differences for
6051 * another slave. Set the right state, and copy the buffer. */
6052 listRelease(c->reply);
6053 c->reply = listDup(slave->reply);
40d224a9 6054 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
6055 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
6056 } else {
6057 /* No way, we need to wait for the next BGSAVE in order to
6058 * register differences */
6059 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
6060 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
6061 }
6062 } else {
6063 /* Ok we don't have a BGSAVE in progress, let's start one */
6064 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
6065 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
6066 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
6067 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
6068 return;
6069 }
6070 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
6071 }
6208b3a7 6072 c->repldbfd = -1;
40d224a9 6073 c->flags |= REDIS_SLAVE;
6074 c->slaveseldb = 0;
6b47e12e 6075 listAddNodeTail(server.slaves,c);
40d224a9 6076 return;
6077}
6078
6208b3a7 6079static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
6080 redisClient *slave = privdata;
6081 REDIS_NOTUSED(el);
6082 REDIS_NOTUSED(mask);
6083 char buf[REDIS_IOBUF_LEN];
6084 ssize_t nwritten, buflen;
6085
6086 if (slave->repldboff == 0) {
6087 /* Write the bulk write count before to transfer the DB. In theory here
6088 * we don't know how much room there is in the output buffer of the
6089 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
6090 * operations) will never be smaller than the few bytes we need. */
6091 sds bulkcount;
6092
6093 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
6094 slave->repldbsize);
6095 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
6096 {
6097 sdsfree(bulkcount);
6098 freeClient(slave);
6099 return;
6100 }
6101 sdsfree(bulkcount);
6102 }
6103 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
6104 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
6105 if (buflen <= 0) {
6106 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
6107 (buflen == 0) ? "premature EOF" : strerror(errno));
6108 freeClient(slave);
6109 return;
6110 }
6111 if ((nwritten = write(fd,buf,buflen)) == -1) {
f870935d 6112 redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
6208b3a7 6113 strerror(errno));
6114 freeClient(slave);
6115 return;
6116 }
6117 slave->repldboff += nwritten;
6118 if (slave->repldboff == slave->repldbsize) {
6119 close(slave->repldbfd);
6120 slave->repldbfd = -1;
6121 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
6122 slave->replstate = REDIS_REPL_ONLINE;
6123 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
266373b2 6124 sendReplyToClient, slave) == AE_ERR) {
6208b3a7 6125 freeClient(slave);
6126 return;
6127 }
6128 addReplySds(slave,sdsempty());
6129 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
6130 }
6131}
ed9b544e 6132
a3b21203 6133/* This function is called at the end of every backgrond saving.
6134 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
6135 * otherwise REDIS_ERR is passed to the function.
6136 *
6137 * The goal of this function is to handle slaves waiting for a successful
6138 * background saving in order to perform non-blocking synchronization. */
6139static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 6140 listNode *ln;
6141 int startbgsave = 0;
ed9b544e 6142
6208b3a7 6143 listRewind(server.slaves);
6144 while((ln = listYield(server.slaves))) {
6145 redisClient *slave = ln->value;
ed9b544e 6146
6208b3a7 6147 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
6148 startbgsave = 1;
6149 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
6150 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 6151 struct redis_stat buf;
6208b3a7 6152
6153 if (bgsaveerr != REDIS_OK) {
6154 freeClient(slave);
6155 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
6156 continue;
6157 }
6158 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 6159 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 6160 freeClient(slave);
6161 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
6162 continue;
6163 }
6164 slave->repldboff = 0;
6165 slave->repldbsize = buf.st_size;
6166 slave->replstate = REDIS_REPL_SEND_BULK;
6167 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
266373b2 6168 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
6208b3a7 6169 freeClient(slave);
6170 continue;
6171 }
6172 }
ed9b544e 6173 }
6208b3a7 6174 if (startbgsave) {
6175 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
6176 listRewind(server.slaves);
6177 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
6178 while((ln = listYield(server.slaves))) {
6179 redisClient *slave = ln->value;
ed9b544e 6180
6208b3a7 6181 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
6182 freeClient(slave);
6183 }
6184 }
6185 }
ed9b544e 6186}
6187
6188static int syncWithMaster(void) {
d0ccebcf 6189 char buf[1024], tmpfile[256], authcmd[1024];
ed9b544e 6190 int dumpsize;
6191 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
6192 int dfd;
6193
6194 if (fd == -1) {
6195 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
6196 strerror(errno));
6197 return REDIS_ERR;
6198 }
d0ccebcf 6199
6200 /* AUTH with the master if required. */
6201 if(server.masterauth) {
6202 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
6203 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
6204 close(fd);
6205 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
6206 strerror(errno));
6207 return REDIS_ERR;
6208 }
6209 /* Read the AUTH result. */
6210 if (syncReadLine(fd,buf,1024,3600) == -1) {
6211 close(fd);
6212 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
6213 strerror(errno));
6214 return REDIS_ERR;
6215 }
6216 if (buf[0] != '+') {
6217 close(fd);
6218 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
6219 return REDIS_ERR;
6220 }
6221 }
6222
ed9b544e 6223 /* Issue the SYNC command */
6224 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
6225 close(fd);
6226 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
6227 strerror(errno));
6228 return REDIS_ERR;
6229 }
6230 /* Read the bulk write count */
8c4d91fc 6231 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 6232 close(fd);
6233 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
6234 strerror(errno));
6235 return REDIS_ERR;
6236 }
4aa701c1 6237 if (buf[0] != '$') {
6238 close(fd);
6239 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
6240 return REDIS_ERR;
6241 }
c937aa89 6242 dumpsize = atoi(buf+1);
ed9b544e 6243 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
6244 /* Read the bulk write data on a temp file */
6245 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
6246 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
6247 if (dfd == -1) {
6248 close(fd);
6249 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
6250 return REDIS_ERR;
6251 }
6252 while(dumpsize) {
6253 int nread, nwritten;
6254
6255 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
6256 if (nread == -1) {
6257 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
6258 strerror(errno));
6259 close(fd);
6260 close(dfd);
6261 return REDIS_ERR;
6262 }
6263 nwritten = write(dfd,buf,nread);
6264 if (nwritten == -1) {
6265 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
6266 close(fd);
6267 close(dfd);
6268 return REDIS_ERR;
6269 }
6270 dumpsize -= nread;
6271 }
6272 close(dfd);
6273 if (rename(tmpfile,server.dbfilename) == -1) {
6274 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
6275 unlink(tmpfile);
6276 close(fd);
6277 return REDIS_ERR;
6278 }
6279 emptyDb();
f78fd11b 6280 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 6281 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
6282 close(fd);
6283 return REDIS_ERR;
6284 }
6285 server.master = createClient(fd);
6286 server.master->flags |= REDIS_MASTER;
179b3952 6287 server.master->authenticated = 1;
ed9b544e 6288 server.replstate = REDIS_REPL_CONNECTED;
6289 return REDIS_OK;
6290}
6291
321b0e13 6292static void slaveofCommand(redisClient *c) {
6293 if (!strcasecmp(c->argv[1]->ptr,"no") &&
6294 !strcasecmp(c->argv[2]->ptr,"one")) {
6295 if (server.masterhost) {
6296 sdsfree(server.masterhost);
6297 server.masterhost = NULL;
6298 if (server.master) freeClient(server.master);
6299 server.replstate = REDIS_REPL_NONE;
6300 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
6301 }
6302 } else {
6303 sdsfree(server.masterhost);
6304 server.masterhost = sdsdup(c->argv[1]->ptr);
6305 server.masterport = atoi(c->argv[2]->ptr);
6306 if (server.master) freeClient(server.master);
6307 server.replstate = REDIS_REPL_CONNECT;
6308 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
6309 server.masterhost, server.masterport);
6310 }
6311 addReply(c,shared.ok);
6312}
6313
3fd78bcd 6314/* ============================ Maxmemory directive ======================== */
6315
f870935d 6316/* Free one object form the pre-allocated objects free list. This is useful
6317 * under low mem conditions as by default we take 1 million free objects
6318 * allocated. */
6319static void freeOneObjectFromFreelist(void) {
6320 robj *o;
6321
6322 listNode *head = listFirst(server.objfreelist);
6323 o = listNodeValue(head);
6324 listDelNode(server.objfreelist,head);
6325 zfree(o);
6326}
6327
3fd78bcd 6328/* This function gets called when 'maxmemory' is set on the config file to limit
6329 * the max memory used by the server, and we are out of memory.
6330 * This function will try to, in order:
6331 *
6332 * - Free objects from the free list
6333 * - Try to remove keys with an EXPIRE set
6334 *
6335 * It is not possible to free enough memory to reach used-memory < maxmemory
6336 * the server will start refusing commands that will enlarge even more the
6337 * memory usage.
6338 */
6339static void freeMemoryIfNeeded(void) {
6340 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
6341 if (listLength(server.objfreelist)) {
f870935d 6342 freeOneObjectFromFreelist();
3fd78bcd 6343 } else {
6344 int j, k, freed = 0;
6345
6346 for (j = 0; j < server.dbnum; j++) {
6347 int minttl = -1;
6348 robj *minkey = NULL;
6349 struct dictEntry *de;
6350
6351 if (dictSize(server.db[j].expires)) {
6352 freed = 1;
6353 /* From a sample of three keys drop the one nearest to
6354 * the natural expire */
6355 for (k = 0; k < 3; k++) {
6356 time_t t;
6357
6358 de = dictGetRandomKey(server.db[j].expires);
6359 t = (time_t) dictGetEntryVal(de);
6360 if (minttl == -1 || t < minttl) {
6361 minkey = dictGetEntryKey(de);
6362 minttl = t;
6363 }
6364 }
6365 deleteKey(server.db+j,minkey);
6366 }
6367 }
6368 if (!freed) return; /* nothing to free... */
6369 }
6370 }
6371}
6372
f80dff62 6373/* ============================== Append Only file ========================== */
6374
6375static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6376 sds buf = sdsempty();
6377 int j;
6378 ssize_t nwritten;
6379 time_t now;
6380 robj *tmpargv[3];
6381
6382 /* The DB this command was targetting is not the same as the last command
6383 * we appendend. To issue a SELECT command is needed. */
6384 if (dictid != server.appendseldb) {
6385 char seldb[64];
6386
6387 snprintf(seldb,sizeof(seldb),"%d",dictid);
682ac724 6388 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
83c6a618 6389 (unsigned long)strlen(seldb),seldb);
f80dff62 6390 server.appendseldb = dictid;
6391 }
6392
6393 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
6394 * EXPIREs into EXPIREATs calls */
6395 if (cmd->proc == expireCommand) {
6396 long when;
6397
6398 tmpargv[0] = createStringObject("EXPIREAT",8);
6399 tmpargv[1] = argv[1];
6400 incrRefCount(argv[1]);
6401 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
6402 tmpargv[2] = createObject(REDIS_STRING,
6403 sdscatprintf(sdsempty(),"%ld",when));
6404 argv = tmpargv;
6405 }
6406
6407 /* Append the actual command */
6408 buf = sdscatprintf(buf,"*%d\r\n",argc);
6409 for (j = 0; j < argc; j++) {
6410 robj *o = argv[j];
6411
9d65a1bb 6412 o = getDecodedObject(o);
83c6a618 6413 buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
f80dff62 6414 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
6415 buf = sdscatlen(buf,"\r\n",2);
9d65a1bb 6416 decrRefCount(o);
f80dff62 6417 }
6418
6419 /* Free the objects from the modified argv for EXPIREAT */
6420 if (cmd->proc == expireCommand) {
6421 for (j = 0; j < 3; j++)
6422 decrRefCount(argv[j]);
6423 }
6424
6425 /* We want to perform a single write. This should be guaranteed atomic
6426 * at least if the filesystem we are writing is a real physical one.
6427 * While this will save us against the server being killed I don't think
6428 * there is much to do about the whole server stopping for power problems
6429 * or alike */
6430 nwritten = write(server.appendfd,buf,sdslen(buf));
6431 if (nwritten != (signed)sdslen(buf)) {
6432 /* Ooops, we are in troubles. The best thing to do for now is
6433 * to simply exit instead to give the illusion that everything is
6434 * working as expected. */
6435 if (nwritten == -1) {
6436 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
6437 } else {
6438 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
6439 }
6440 exit(1);
6441 }
85a83172 6442 /* If a background append only file rewriting is in progress we want to
6443 * accumulate the differences between the child DB and the current one
6444 * in a buffer, so that when the child process will do its work we
6445 * can append the differences to the new append only file. */
6446 if (server.bgrewritechildpid != -1)
6447 server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
6448
6449 sdsfree(buf);
f80dff62 6450 now = time(NULL);
6451 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
6452 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
6453 now-server.lastfsync > 1))
6454 {
6455 fsync(server.appendfd); /* Let's try to get this data on the disk */
6456 server.lastfsync = now;
6457 }
6458}
6459
6460/* In Redis commands are always executed in the context of a client, so in
6461 * order to load the append only file we need to create a fake client. */
6462static struct redisClient *createFakeClient(void) {
6463 struct redisClient *c = zmalloc(sizeof(*c));
6464
6465 selectDb(c,0);
6466 c->fd = -1;
6467 c->querybuf = sdsempty();
6468 c->argc = 0;
6469 c->argv = NULL;
6470 c->flags = 0;
9387d17d 6471 /* We set the fake client as a slave waiting for the synchronization
6472 * so that Redis will not try to send replies to this client. */
6473 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
f80dff62 6474 c->reply = listCreate();
6475 listSetFreeMethod(c->reply,decrRefCount);
6476 listSetDupMethod(c->reply,dupClientReplyValue);
6477 return c;
6478}
6479
6480static void freeFakeClient(struct redisClient *c) {
6481 sdsfree(c->querybuf);
6482 listRelease(c->reply);
6483 zfree(c);
6484}
6485
6486/* Replay the append log file. On error REDIS_OK is returned. On non fatal
6487 * error (the append only file is zero-length) REDIS_ERR is returned. On
6488 * fatal error an error message is logged and the program exists. */
6489int loadAppendOnlyFile(char *filename) {
6490 struct redisClient *fakeClient;
6491 FILE *fp = fopen(filename,"r");
6492 struct redis_stat sb;
b492cf00 6493 unsigned long long loadedkeys = 0;
f80dff62 6494
6495 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
6496 return REDIS_ERR;
6497
6498 if (fp == NULL) {
6499 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
6500 exit(1);
6501 }
6502
6503 fakeClient = createFakeClient();
6504 while(1) {
6505 int argc, j;
6506 unsigned long len;
6507 robj **argv;
6508 char buf[128];
6509 sds argsds;
6510 struct redisCommand *cmd;
6511
6512 if (fgets(buf,sizeof(buf),fp) == NULL) {
6513 if (feof(fp))
6514 break;
6515 else
6516 goto readerr;
6517 }
6518 if (buf[0] != '*') goto fmterr;
6519 argc = atoi(buf+1);
6520 argv = zmalloc(sizeof(robj*)*argc);
6521 for (j = 0; j < argc; j++) {
6522 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
6523 if (buf[0] != '$') goto fmterr;
6524 len = strtol(buf+1,NULL,10);
6525 argsds = sdsnewlen(NULL,len);
0f151ef1 6526 if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
f80dff62 6527 argv[j] = createObject(REDIS_STRING,argsds);
6528 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
6529 }
6530
6531 /* Command lookup */
6532 cmd = lookupCommand(argv[0]->ptr);
6533 if (!cmd) {
6534 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
6535 exit(1);
6536 }
6537 /* Try object sharing and encoding */
6538 if (server.shareobjects) {
6539 int j;
6540 for(j = 1; j < argc; j++)
6541 argv[j] = tryObjectSharing(argv[j]);
6542 }
6543 if (cmd->flags & REDIS_CMD_BULK)
6544 tryObjectEncoding(argv[argc-1]);
6545 /* Run the command in the context of a fake client */
6546 fakeClient->argc = argc;
6547 fakeClient->argv = argv;
6548 cmd->proc(fakeClient);
6549 /* Discard the reply objects list from the fake client */
6550 while(listLength(fakeClient->reply))
6551 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
6552 /* Clean up, ready for the next command */
6553 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
6554 zfree(argv);
b492cf00 6555 /* Handle swapping while loading big datasets when VM is on */
6556 loadedkeys++;
6557 if (server.vm_enabled && (loadedkeys % 5000) == 0) {
6558 while (zmalloc_used_memory() > server.vm_max_memory) {
6559 if (vmSwapOneObject() == REDIS_ERR) break;
6560 }
6561 }
f80dff62 6562 }
6563 fclose(fp);
6564 freeFakeClient(fakeClient);
6565 return REDIS_OK;
6566
6567readerr:
6568 if (feof(fp)) {
6569 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
6570 } else {
6571 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
6572 }
6573 exit(1);
6574fmterr:
6575 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
6576 exit(1);
6577}
6578
9d65a1bb 6579/* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
6580static int fwriteBulk(FILE *fp, robj *obj) {
6581 char buf[128];
6582 obj = getDecodedObject(obj);
6583 snprintf(buf,sizeof(buf),"$%ld\r\n",(long)sdslen(obj->ptr));
6584 if (fwrite(buf,strlen(buf),1,fp) == 0) goto err;
e96e4fbf 6585 if (sdslen(obj->ptr) && fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0)
6586 goto err;
9d65a1bb 6587 if (fwrite("\r\n",2,1,fp) == 0) goto err;
6588 decrRefCount(obj);
6589 return 1;
6590err:
6591 decrRefCount(obj);
6592 return 0;
6593}
6594
6595/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
6596static int fwriteBulkDouble(FILE *fp, double d) {
6597 char buf[128], dbuf[128];
6598
6599 snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
6600 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
6601 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
6602 if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
6603 return 1;
6604}
6605
6606/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
6607static int fwriteBulkLong(FILE *fp, long l) {
6608 char buf[128], lbuf[128];
6609
6610 snprintf(lbuf,sizeof(lbuf),"%ld\r\n",l);
6611 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(lbuf)-2);
6612 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
6613 if (fwrite(lbuf,strlen(lbuf),1,fp) == 0) return 0;
6614 return 1;
6615}
6616
6617/* Write a sequence of commands able to fully rebuild the dataset into
6618 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
6619static int rewriteAppendOnlyFile(char *filename) {
6620 dictIterator *di = NULL;
6621 dictEntry *de;
6622 FILE *fp;
6623 char tmpfile[256];
6624 int j;
6625 time_t now = time(NULL);
6626
6627 /* Note that we have to use a different temp name here compared to the
6628 * one used by rewriteAppendOnlyFileBackground() function. */
6629 snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
6630 fp = fopen(tmpfile,"w");
6631 if (!fp) {
6632 redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
6633 return REDIS_ERR;
6634 }
6635 for (j = 0; j < server.dbnum; j++) {
6636 char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
6637 redisDb *db = server.db+j;
6638 dict *d = db->dict;
6639 if (dictSize(d) == 0) continue;
6640 di = dictGetIterator(d);
6641 if (!di) {
6642 fclose(fp);
6643 return REDIS_ERR;
6644 }
6645
6646 /* SELECT the new DB */
6647 if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
85a83172 6648 if (fwriteBulkLong(fp,j) == 0) goto werr;
9d65a1bb 6649
6650 /* Iterate this DB writing every entry */
6651 while((de = dictNext(di)) != NULL) {
e7546c63 6652 robj *key, *o;
6653 time_t expiretime;
6654 int swapped;
6655
6656 key = dictGetEntryKey(de);
38823f08 6657 if (!server.vm_enabled || key->storage == REDIS_VM_MEMORY) {
e7546c63 6658 o = dictGetEntryVal(de);
6659 swapped = 0;
6660 } else {
6661 o = vmPreviewObject(key);
6662 key = dupStringObject(key);
6663 swapped = 1;
6664 }
6665 expiretime = getExpire(db,key);
9d65a1bb 6666
6667 /* Save the key and associated value */
9d65a1bb 6668 if (o->type == REDIS_STRING) {
6669 /* Emit a SET command */
6670 char cmd[]="*3\r\n$3\r\nSET\r\n";
6671 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6672 /* Key and value */
6673 if (fwriteBulk(fp,key) == 0) goto werr;
6674 if (fwriteBulk(fp,o) == 0) goto werr;
6675 } else if (o->type == REDIS_LIST) {
6676 /* Emit the RPUSHes needed to rebuild the list */
6677 list *list = o->ptr;
6678 listNode *ln;
6679
6680 listRewind(list);
6681 while((ln = listYield(list))) {
6682 char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
6683 robj *eleobj = listNodeValue(ln);
6684
6685 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6686 if (fwriteBulk(fp,key) == 0) goto werr;
6687 if (fwriteBulk(fp,eleobj) == 0) goto werr;
6688 }
6689 } else if (o->type == REDIS_SET) {
6690 /* Emit the SADDs needed to rebuild the set */
6691 dict *set = o->ptr;
6692 dictIterator *di = dictGetIterator(set);
6693 dictEntry *de;
6694
6695 while((de = dictNext(di)) != NULL) {
6696 char cmd[]="*3\r\n$4\r\nSADD\r\n";
6697 robj *eleobj = dictGetEntryKey(de);
6698
6699 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6700 if (fwriteBulk(fp,key) == 0) goto werr;
6701 if (fwriteBulk(fp,eleobj) == 0) goto werr;
6702 }
6703 dictReleaseIterator(di);
6704 } else if (o->type == REDIS_ZSET) {
6705 /* Emit the ZADDs needed to rebuild the sorted set */
6706 zset *zs = o->ptr;
6707 dictIterator *di = dictGetIterator(zs->dict);
6708 dictEntry *de;
6709
6710 while((de = dictNext(di)) != NULL) {
6711 char cmd[]="*4\r\n$4\r\nZADD\r\n";
6712 robj *eleobj = dictGetEntryKey(de);
6713 double *score = dictGetEntryVal(de);
6714
6715 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6716 if (fwriteBulk(fp,key) == 0) goto werr;
6717 if (fwriteBulkDouble(fp,*score) == 0) goto werr;
6718 if (fwriteBulk(fp,eleobj) == 0) goto werr;
6719 }
6720 dictReleaseIterator(di);
6721 } else {
dfc5e96c 6722 redisAssert(0 != 0);
9d65a1bb 6723 }
6724 /* Save the expire time */
6725 if (expiretime != -1) {
e96e4fbf 6726 char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
9d65a1bb 6727 /* If this key is already expired skip it */
6728 if (expiretime < now) continue;
6729 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6730 if (fwriteBulk(fp,key) == 0) goto werr;
6731 if (fwriteBulkLong(fp,expiretime) == 0) goto werr;
6732 }
e7546c63 6733 /* We created a few temp objects if the key->value pair
6734 * was about a swapped out object. Free both. */
6735 if (swapped) {
6736 decrRefCount(key);
6737 decrRefCount(o);
6738 }
9d65a1bb 6739 }
6740 dictReleaseIterator(di);
6741 }
6742
6743 /* Make sure data will not remain on the OS's output buffers */
6744 fflush(fp);
6745 fsync(fileno(fp));
6746 fclose(fp);
6747
6748 /* Use RENAME to make sure the DB file is changed atomically only
6749 * if the generate DB file is ok. */
6750 if (rename(tmpfile,filename) == -1) {
6751 redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
6752 unlink(tmpfile);
6753 return REDIS_ERR;
6754 }
6755 redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
6756 return REDIS_OK;
6757
6758werr:
6759 fclose(fp);
6760 unlink(tmpfile);
e96e4fbf 6761 redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
9d65a1bb 6762 if (di) dictReleaseIterator(di);
6763 return REDIS_ERR;
6764}
6765
6766/* This is how rewriting of the append only file in background works:
6767 *
6768 * 1) The user calls BGREWRITEAOF
6769 * 2) Redis calls this function, that forks():
6770 * 2a) the child rewrite the append only file in a temp file.
6771 * 2b) the parent accumulates differences in server.bgrewritebuf.
6772 * 3) When the child finished '2a' exists.
6773 * 4) The parent will trap the exit code, if it's OK, will append the
6774 * data accumulated into server.bgrewritebuf into the temp file, and
6775 * finally will rename(2) the temp file in the actual file name.
6776 * The the new file is reopened as the new append only file. Profit!
6777 */
6778static int rewriteAppendOnlyFileBackground(void) {
6779 pid_t childpid;
6780
6781 if (server.bgrewritechildpid != -1) return REDIS_ERR;
6782 if ((childpid = fork()) == 0) {
6783 /* Child */
6784 char tmpfile[256];
6785 close(server.fd);
6786
6787 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
6788 if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
6789 exit(0);
6790 } else {
6791 exit(1);
6792 }
6793 } else {
6794 /* Parent */
6795 if (childpid == -1) {
6796 redisLog(REDIS_WARNING,
6797 "Can't rewrite append only file in background: fork: %s",
6798 strerror(errno));
6799 return REDIS_ERR;
6800 }
6801 redisLog(REDIS_NOTICE,
6802 "Background append only file rewriting started by pid %d",childpid);
6803 server.bgrewritechildpid = childpid;
85a83172 6804 /* We set appendseldb to -1 in order to force the next call to the
6805 * feedAppendOnlyFile() to issue a SELECT command, so the differences
6806 * accumulated by the parent into server.bgrewritebuf will start
6807 * with a SELECT statement and it will be safe to merge. */
6808 server.appendseldb = -1;
9d65a1bb 6809 return REDIS_OK;
6810 }
6811 return REDIS_OK; /* unreached */
6812}
6813
6814static void bgrewriteaofCommand(redisClient *c) {
6815 if (server.bgrewritechildpid != -1) {
6816 addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
6817 return;
6818 }
6819 if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
49b99ab4 6820 char *status = "+Background append only file rewriting started\r\n";
6821 addReplySds(c,sdsnew(status));
9d65a1bb 6822 } else {
6823 addReply(c,shared.err);
6824 }
6825}
6826
6827static void aofRemoveTempFile(pid_t childpid) {
6828 char tmpfile[256];
6829
6830 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
6831 unlink(tmpfile);
6832}
6833
75680a3c 6834/* =============================== Virtual Memory =========================== */
6835static void vmInit(void) {
6836 off_t totsize;
6837
6838 server.vm_fp = fopen("/tmp/redisvm","w+b");
6839 if (server.vm_fp == NULL) {
6840 redisLog(REDIS_WARNING,"Impossible to open the swap file. Exiting.");
6841 exit(1);
6842 }
6843 server.vm_fd = fileno(server.vm_fp);
6844 server.vm_next_page = 0;
6845 server.vm_near_pages = 0;
7d98e08c 6846 server.vm_stats_used_pages = 0;
6847 server.vm_stats_swapped_objects = 0;
6848 server.vm_stats_swapouts = 0;
6849 server.vm_stats_swapins = 0;
75680a3c 6850 totsize = server.vm_pages*server.vm_page_size;
6851 redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
6852 if (ftruncate(server.vm_fd,totsize) == -1) {
6853 redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
6854 strerror(errno));
6855 exit(1);
6856 } else {
6857 redisLog(REDIS_NOTICE,"Swap file allocated with success");
6858 }
7d30035d 6859 server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
f870935d 6860 redisLog(REDIS_VERBOSE,"Allocated %lld bytes page table for %lld pages",
4ef8de8a 6861 (long long) (server.vm_pages+7)/8, server.vm_pages);
7d30035d 6862 memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
75680a3c 6863 /* Try to remove the swap file, so the OS will really delete it from the
6864 * file system when Redis exists. */
6865 unlink("/tmp/redisvm");
92f8e882 6866
6867 /* Initialize threaded I/O */
6868 server.io_jobs = listCreate();
6869 server.io_done = listCreate();
6870 server.io_clients = listCreate();
6871 pthread_mutex_init(&server.io_mutex,NULL);
6872 server.io_active_threads = 0;
75680a3c 6873}
6874
06224fec 6875/* Mark the page as used */
6876static void vmMarkPageUsed(off_t page) {
6877 off_t byte = page/8;
6878 int bit = page&7;
6879 server.vm_bitmap[byte] |= 1<<bit;
f870935d 6880 redisLog(REDIS_DEBUG,"Mark used: %lld (byte:%lld bit:%d)\n",
6881 (long long)page, (long long)byte, bit);
06224fec 6882}
6883
6884/* Mark N contiguous pages as used, with 'page' being the first. */
6885static void vmMarkPagesUsed(off_t page, off_t count) {
6886 off_t j;
6887
6888 for (j = 0; j < count; j++)
7d30035d 6889 vmMarkPageUsed(page+j);
7d98e08c 6890 server.vm_stats_used_pages += count;
06224fec 6891}
6892
6893/* Mark the page as free */
6894static void vmMarkPageFree(off_t page) {
6895 off_t byte = page/8;
6896 int bit = page&7;
6897 server.vm_bitmap[byte] &= ~(1<<bit);
6898}
6899
6900/* Mark N contiguous pages as free, with 'page' being the first. */
6901static void vmMarkPagesFree(off_t page, off_t count) {
6902 off_t j;
6903
6904 for (j = 0; j < count; j++)
7d30035d 6905 vmMarkPageFree(page+j);
7d98e08c 6906 server.vm_stats_used_pages -= count;
06224fec 6907}
6908
6909/* Test if the page is free */
6910static int vmFreePage(off_t page) {
6911 off_t byte = page/8;
6912 int bit = page&7;
7d30035d 6913 return (server.vm_bitmap[byte] & (1<<bit)) == 0;
06224fec 6914}
6915
6916/* Find N contiguous free pages storing the first page of the cluster in *first.
3a66edc7 6917 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
6918 * REDIS_ERR is returned.
06224fec 6919 *
6920 * This function uses a simple algorithm: we try to allocate
6921 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
6922 * again from the start of the swap file searching for free spaces.
6923 *
6924 * If it looks pretty clear that there are no free pages near our offset
6925 * we try to find less populated places doing a forward jump of
6926 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
6927 * without hurry, and then we jump again and so forth...
6928 *
6929 * This function can be improved using a free list to avoid to guess
6930 * too much, since we could collect data about freed pages.
6931 *
6932 * note: I implemented this function just after watching an episode of
6933 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
6934 */
6935static int vmFindContiguousPages(off_t *first, int n) {
6936 off_t base, offset = 0, since_jump = 0, numfree = 0;
6937
6938 if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
6939 server.vm_near_pages = 0;
6940 server.vm_next_page = 0;
6941 }
6942 server.vm_near_pages++; /* Yet another try for pages near to the old ones */
6943 base = server.vm_next_page;
6944
6945 while(offset < server.vm_pages) {
6946 off_t this = base+offset;
6947
f870935d 6948 redisLog(REDIS_DEBUG, "THIS: %lld (%c)\n", (long long) this, vmFreePage(this) ? 'F' : 'X');
06224fec 6949 /* If we overflow, restart from page zero */
6950 if (this >= server.vm_pages) {
6951 this -= server.vm_pages;
6952 if (this == 0) {
6953 /* Just overflowed, what we found on tail is no longer
6954 * interesting, as it's no longer contiguous. */
6955 numfree = 0;
6956 }
6957 }
6958 if (vmFreePage(this)) {
6959 /* This is a free page */
6960 numfree++;
6961 /* Already got N free pages? Return to the caller, with success */
6962 if (numfree == n) {
7d30035d 6963 *first = this-(n-1);
6964 server.vm_next_page = this+1;
3a66edc7 6965 return REDIS_OK;
06224fec 6966 }
6967 } else {
6968 /* The current one is not a free page */
6969 numfree = 0;
6970 }
6971
6972 /* Fast-forward if the current page is not free and we already
6973 * searched enough near this place. */
6974 since_jump++;
6975 if (!numfree && since_jump >= REDIS_VM_MAX_RANDOM_JUMP/4) {
6976 offset += random() % REDIS_VM_MAX_RANDOM_JUMP;
6977 since_jump = 0;
6978 /* Note that even if we rewind after the jump, we are don't need
6979 * to make sure numfree is set to zero as we only jump *if* it
6980 * is set to zero. */
6981 } else {
6982 /* Otherwise just check the next page */
6983 offset++;
6984 }
6985 }
3a66edc7 6986 return REDIS_ERR;
6987}
6988
6989/* Swap the 'val' object relative to 'key' into disk. Store all the information
6990 * needed to later retrieve the object into the key object.
6991 * If we can't find enough contiguous empty pages to swap the object on disk
6992 * REDIS_ERR is returned. */
6993static int vmSwapObject(robj *key, robj *val) {
6994 off_t pages = rdbSavedObjectPages(val);
6995 off_t page;
6996
6997 assert(key->storage == REDIS_VM_MEMORY);
4ef8de8a 6998 assert(key->refcount == 1);
3a66edc7 6999 if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return REDIS_ERR;
7000 if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
7001 redisLog(REDIS_WARNING,
7002 "Critical VM problem in vmSwapObject(): can't seek: %s",
7003 strerror(errno));
7004 return REDIS_ERR;
7005 }
7006 rdbSaveObject(server.vm_fp,val);
7007 key->vm.page = page;
7008 key->vm.usedpages = pages;
7009 key->storage = REDIS_VM_SWAPPED;
d894161b 7010 key->vtype = val->type;
3a66edc7 7011 decrRefCount(val); /* Deallocate the object from memory. */
7012 vmMarkPagesUsed(page,pages);
7d30035d 7013 redisLog(REDIS_DEBUG,"VM: object %s swapped out at %lld (%lld pages)",
7014 (unsigned char*) key->ptr,
7015 (unsigned long long) page, (unsigned long long) pages);
7d98e08c 7016 server.vm_stats_swapped_objects++;
7017 server.vm_stats_swapouts++;
0841cc92 7018 fflush(server.vm_fp);
3a66edc7 7019 return REDIS_OK;
7020}
7021
7022/* Load the value object relative to the 'key' object from swap to memory.
7e69548d 7023 * The newly allocated object is returned.
7024 *
7025 * If preview is true the unserialized object is returned to the caller but
7026 * no changes are made to the key object, nor the pages are marked as freed */
7027static robj *vmGenericLoadObject(robj *key, int preview) {
3a66edc7 7028 robj *val;
7029
38823f08 7030 redisAssert(key->storage == REDIS_VM_SWAPPED);
3a66edc7 7031 if (fseeko(server.vm_fp,key->vm.page*server.vm_page_size,SEEK_SET) == -1) {
7032 redisLog(REDIS_WARNING,
7033 "Unrecoverable VM problem in vmLoadObject(): can't seek: %s",
7034 strerror(errno));
7035 exit(1);
7036 }
d894161b 7037 val = rdbLoadObject(key->vtype,server.vm_fp);
3a66edc7 7038 if (val == NULL) {
7039 redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmLoadObject(): can't load object from swap file: %s", strerror(errno));
7040 exit(1);
7041 }
7e69548d 7042 if (!preview) {
7043 key->storage = REDIS_VM_MEMORY;
7044 key->vm.atime = server.unixtime;
7045 vmMarkPagesFree(key->vm.page,key->vm.usedpages);
7046 redisLog(REDIS_DEBUG, "VM: object %s loaded from disk",
7047 (unsigned char*) key->ptr);
7d98e08c 7048 server.vm_stats_swapped_objects--;
38aba9a1 7049 } else {
7050 redisLog(REDIS_DEBUG, "VM: object %s previewed from disk",
7051 (unsigned char*) key->ptr);
7e69548d 7052 }
7d98e08c 7053 server.vm_stats_swapins++;
3a66edc7 7054 return val;
06224fec 7055}
7056
7e69548d 7057/* Plain object loading, from swap to memory */
7058static robj *vmLoadObject(robj *key) {
7059 return vmGenericLoadObject(key,0);
7060}
7061
7062/* Just load the value on disk, without to modify the key.
7063 * This is useful when we want to perform some operation on the value
7064 * without to really bring it from swap to memory, like while saving the
7065 * dataset or rewriting the append only log. */
7066static robj *vmPreviewObject(robj *key) {
7067 return vmGenericLoadObject(key,1);
7068}
7069
4ef8de8a 7070/* How a good candidate is this object for swapping?
7071 * The better candidate it is, the greater the returned value.
7072 *
7073 * Currently we try to perform a fast estimation of the object size in
7074 * memory, and combine it with aging informations.
7075 *
7076 * Basically swappability = idle-time * log(estimated size)
7077 *
7078 * Bigger objects are preferred over smaller objects, but not
7079 * proportionally, this is why we use the logarithm. This algorithm is
7080 * just a first try and will probably be tuned later. */
7081static double computeObjectSwappability(robj *o) {
7082 time_t age = server.unixtime - o->vm.atime;
7083 long asize = 0;
7084 list *l;
7085 dict *d;
7086 struct dictEntry *de;
7087 int z;
7088
7089 if (age <= 0) return 0;
7090 switch(o->type) {
7091 case REDIS_STRING:
7092 if (o->encoding != REDIS_ENCODING_RAW) {
7093 asize = sizeof(*o);
7094 } else {
7095 asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
7096 }
7097 break;
7098 case REDIS_LIST:
7099 l = o->ptr;
7100 listNode *ln = listFirst(l);
7101
7102 asize = sizeof(list);
7103 if (ln) {
7104 robj *ele = ln->value;
7105 long elesize;
7106
7107 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
7108 (sizeof(*o)+sdslen(ele->ptr)) :
7109 sizeof(*o);
7110 asize += (sizeof(listNode)+elesize)*listLength(l);
7111 }
7112 break;
7113 case REDIS_SET:
7114 case REDIS_ZSET:
7115 z = (o->type == REDIS_ZSET);
7116 d = z ? ((zset*)o->ptr)->dict : o->ptr;
7117
7118 asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
7119 if (z) asize += sizeof(zset)-sizeof(dict);
7120 if (dictSize(d)) {
7121 long elesize;
7122 robj *ele;
7123
7124 de = dictGetRandomKey(d);
7125 ele = dictGetEntryKey(de);
7126 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
7127 (sizeof(*o)+sdslen(ele->ptr)) :
7128 sizeof(*o);
7129 asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
7130 if (z) asize += sizeof(zskiplistNode)*dictSize(d);
7131 }
7132 break;
7133 }
7134 return (double)asize*log(1+asize);
7135}
7136
7137/* Try to swap an object that's a good candidate for swapping.
7138 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
7139 * to swap any object at all. */
7140static int vmSwapOneObject(void) {
7141 int j, i;
7142 struct dictEntry *best = NULL;
7143 double best_swappability = 0;
7144 robj *key, *val;
7145
7146 for (j = 0; j < server.dbnum; j++) {
7147 redisDb *db = server.db+j;
e3cadb8a 7148 int maxtries = 1000;
4ef8de8a 7149
7150 if (dictSize(db->dict) == 0) continue;
7151 for (i = 0; i < 5; i++) {
7152 dictEntry *de;
7153 double swappability;
7154
e3cadb8a 7155 if (maxtries) maxtries--;
4ef8de8a 7156 de = dictGetRandomKey(db->dict);
7157 key = dictGetEntryKey(de);
7158 val = dictGetEntryVal(de);
e3cadb8a 7159 if (key->storage != REDIS_VM_MEMORY) {
7160 if (maxtries) i--; /* don't count this try */
7161 continue;
7162 }
4ef8de8a 7163 swappability = computeObjectSwappability(val);
7164 if (!best || swappability > best_swappability) {
7165 best = de;
7166 best_swappability = swappability;
7167 }
7168 }
7169 }
e3cadb8a 7170 if (best == NULL) {
7171 redisLog(REDIS_DEBUG,"No swappable key found!");
7172 return REDIS_ERR;
7173 }
4ef8de8a 7174 key = dictGetEntryKey(best);
7175 val = dictGetEntryVal(best);
7176
e3cadb8a 7177 redisLog(REDIS_DEBUG,"Key with best swappability: %s, %f",
4ef8de8a 7178 key->ptr, best_swappability);
7179
7180 /* Unshare the key if needed */
7181 if (key->refcount > 1) {
7182 robj *newkey = dupStringObject(key);
7183 decrRefCount(key);
7184 key = dictGetEntryKey(best) = newkey;
7185 }
7186 /* Swap it */
7187 if (vmSwapObject(key,val) == REDIS_OK) {
7188 dictGetEntryVal(best) = NULL;
7189 return REDIS_OK;
7190 } else {
7191 return REDIS_ERR;
7192 }
7193}
7194
7e69548d 7195/* Return true if it's safe to swap out objects in a given moment.
7196 * Basically we don't want to swap objects out while there is a BGSAVE
7197 * or a BGAEOREWRITE running in backgroud. */
7198static int vmCanSwapOut(void) {
7199 return (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1);
7200}
7201
1b03836c 7202/* Delete a key if swapped. Returns 1 if the key was found, was swapped
7203 * and was deleted. Otherwise 0 is returned. */
7204static int deleteIfSwapped(redisDb *db, robj *key) {
7205 dictEntry *de;
7206 robj *foundkey;
7207
7208 if ((de = dictFind(db->dict,key)) == NULL) return 0;
7209 foundkey = dictGetEntryKey(de);
7210 if (foundkey->storage == REDIS_VM_MEMORY) return 0;
7211 deleteKey(db,key);
7212 return 1;
7213}
7214
7f957c92 7215/* ================================= Debugging ============================== */
7216
7217static void debugCommand(redisClient *c) {
7218 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
7219 *((char*)-1) = 'x';
210e29f7 7220 } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
7221 if (rdbSave(server.dbfilename) != REDIS_OK) {
7222 addReply(c,shared.err);
7223 return;
7224 }
7225 emptyDb();
7226 if (rdbLoad(server.dbfilename) != REDIS_OK) {
7227 addReply(c,shared.err);
7228 return;
7229 }
7230 redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
7231 addReply(c,shared.ok);
71c2b467 7232 } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
7233 emptyDb();
7234 if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
7235 addReply(c,shared.err);
7236 return;
7237 }
7238 redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
7239 addReply(c,shared.ok);
333298da 7240 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
7241 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
7242 robj *key, *val;
7243
7244 if (!de) {
7245 addReply(c,shared.nokeyerr);
7246 return;
7247 }
7248 key = dictGetEntryKey(de);
7249 val = dictGetEntryVal(de);
ace06542 7250 if (server.vm_enabled && key->storage == REDIS_VM_MEMORY) {
7251 addReplySds(c,sdscatprintf(sdsempty(),
7252 "+Key at:%p refcount:%d, value at:%p refcount:%d "
7253 "encoding:%d serializedlength:%lld\r\n",
682ac724 7254 (void*)key, key->refcount, (void*)val, val->refcount,
06233c45 7255 val->encoding, rdbSavedObjectLen(val)));
ace06542 7256 } else {
7257 addReplySds(c,sdscatprintf(sdsempty(),
7258 "+Key at:%p refcount:%d, value swapped at: page %llu "
7259 "using %llu pages\r\n",
7260 (void*)key, key->refcount, (unsigned long long) key->vm.page,
7261 (unsigned long long) key->vm.usedpages));
7262 }
7d30035d 7263 } else if (!strcasecmp(c->argv[1]->ptr,"swapout") && c->argc == 3) {
7264 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
7265 robj *key, *val;
7266
7267 if (!server.vm_enabled) {
7268 addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n"));
7269 return;
7270 }
7271 if (!de) {
7272 addReply(c,shared.nokeyerr);
7273 return;
7274 }
7275 key = dictGetEntryKey(de);
7276 val = dictGetEntryVal(de);
4ef8de8a 7277 /* If the key is shared we want to create a copy */
7278 if (key->refcount > 1) {
7279 robj *newkey = dupStringObject(key);
7280 decrRefCount(key);
7281 key = dictGetEntryKey(de) = newkey;
7282 }
7283 /* Swap it */
7d30035d 7284 if (key->storage != REDIS_VM_MEMORY) {
7285 addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
7286 } else if (vmSwapObject(key,val) == REDIS_OK) {
7287 dictGetEntryVal(de) = NULL;
7288 addReply(c,shared.ok);
7289 } else {
7290 addReply(c,shared.err);
7291 }
7f957c92 7292 } else {
333298da 7293 addReplySds(c,sdsnew(
7d30035d 7294 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
7f957c92 7295 }
7296}
56906eef 7297
dfc5e96c 7298static void _redisAssert(char *estr) {
7299 redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
7300 redisLog(REDIS_WARNING,"==> %s\n",estr);
7301#ifdef HAVE_BACKTRACE
7302 redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
7303 *((char*)-1) = 'x';
7304#endif
7305}
7306
bcfc686d 7307/* =================================== Main! ================================ */
56906eef 7308
bcfc686d 7309#ifdef __linux__
7310int linuxOvercommitMemoryValue(void) {
7311 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
7312 char buf[64];
56906eef 7313
bcfc686d 7314 if (!fp) return -1;
7315 if (fgets(buf,64,fp) == NULL) {
7316 fclose(fp);
7317 return -1;
7318 }
7319 fclose(fp);
56906eef 7320
bcfc686d 7321 return atoi(buf);
7322}
7323
7324void linuxOvercommitMemoryWarning(void) {
7325 if (linuxOvercommitMemoryValue() == 0) {
7326 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
7327 }
7328}
7329#endif /* __linux__ */
7330
7331static void daemonize(void) {
7332 int fd;
7333 FILE *fp;
7334
7335 if (fork() != 0) exit(0); /* parent exits */
71c54b21 7336 printf("New pid: %d\n", getpid());
bcfc686d 7337 setsid(); /* create a new session */
7338
7339 /* Every output goes to /dev/null. If Redis is daemonized but
7340 * the 'logfile' is set to 'stdout' in the configuration file
7341 * it will not log at all. */
7342 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
7343 dup2(fd, STDIN_FILENO);
7344 dup2(fd, STDOUT_FILENO);
7345 dup2(fd, STDERR_FILENO);
7346 if (fd > STDERR_FILENO) close(fd);
7347 }
7348 /* Try to write the pid file */
7349 fp = fopen(server.pidfile,"w");
7350 if (fp) {
7351 fprintf(fp,"%d\n",getpid());
7352 fclose(fp);
56906eef 7353 }
56906eef 7354}
7355
bcfc686d 7356int main(int argc, char **argv) {
7357 initServerConfig();
7358 if (argc == 2) {
7359 resetServerSaveParams();
7360 loadServerConfig(argv[1]);
7361 } else if (argc > 2) {
7362 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
7363 exit(1);
7364 } else {
7365 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
7366 }
bcfc686d 7367 if (server.daemonize) daemonize();
71c54b21 7368 initServer();
bcfc686d 7369 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
7370#ifdef __linux__
7371 linuxOvercommitMemoryWarning();
7372#endif
7373 if (server.appendonly) {
7374 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
7375 redisLog(REDIS_NOTICE,"DB loaded from append only file");
7376 } else {
7377 if (rdbLoad(server.dbfilename) == REDIS_OK)
7378 redisLog(REDIS_NOTICE,"DB loaded from disk");
7379 }
7380 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
266373b2 7381 acceptHandler, NULL) == AE_ERR) oom("creating file event");
bcfc686d 7382 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
7383 aeMain(server.el);
7384 aeDeleteEventLoop(server.el);
7385 return 0;
7386}
7387
7388/* ============================= Backtrace support ========================= */
7389
7390#ifdef HAVE_BACKTRACE
7391static char *findFuncName(void *pointer, unsigned long *offset);
7392
56906eef 7393static void *getMcontextEip(ucontext_t *uc) {
7394#if defined(__FreeBSD__)
7395 return (void*) uc->uc_mcontext.mc_eip;
7396#elif defined(__dietlibc__)
7397 return (void*) uc->uc_mcontext.eip;
06db1f50 7398#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
da0a1620 7399 #if __x86_64__
7400 return (void*) uc->uc_mcontext->__ss.__rip;
7401 #else
56906eef 7402 return (void*) uc->uc_mcontext->__ss.__eip;
da0a1620 7403 #endif
06db1f50 7404#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 7405 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 7406 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 7407 #else
7408 return (void*) uc->uc_mcontext->__ss.__eip;
7409 #endif
c04c9ac9 7410#elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
7411 return (void*) uc->uc_mcontext.gregs[REG_EIP]; /* Linux 32/64 bit */
b91cf5ef 7412#elif defined(__ia64__) /* Linux IA64 */
7413 return (void*) uc->uc_mcontext.sc_ip;
7414#else
7415 return NULL;
56906eef 7416#endif
7417}
7418
7419static void segvHandler(int sig, siginfo_t *info, void *secret) {
7420 void *trace[100];
7421 char **messages = NULL;
7422 int i, trace_size = 0;
7423 unsigned long offset=0;
56906eef 7424 ucontext_t *uc = (ucontext_t*) secret;
1c85b79f 7425 sds infostring;
56906eef 7426 REDIS_NOTUSED(info);
7427
7428 redisLog(REDIS_WARNING,
7429 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
1c85b79f 7430 infostring = genRedisInfoString();
7431 redisLog(REDIS_WARNING, "%s",infostring);
7432 /* It's not safe to sdsfree() the returned string under memory
7433 * corruption conditions. Let it leak as we are going to abort */
56906eef 7434
7435 trace_size = backtrace(trace, 100);
de96dbfe 7436 /* overwrite sigaction with caller's address */
b91cf5ef 7437 if (getMcontextEip(uc) != NULL) {
7438 trace[1] = getMcontextEip(uc);
7439 }
56906eef 7440 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 7441
d76412d1 7442 for (i=1; i<trace_size; ++i) {
56906eef 7443 char *fn = findFuncName(trace[i], &offset), *p;
7444
7445 p = strchr(messages[i],'+');
7446 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
7447 redisLog(REDIS_WARNING,"%s", messages[i]);
7448 } else {
7449 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
7450 }
7451 }
b177fd30 7452 /* free(messages); Don't call free() with possibly corrupted memory. */
56906eef 7453 exit(0);
fe3bbfbe 7454}
56906eef 7455
7456static void setupSigSegvAction(void) {
7457 struct sigaction act;
7458
7459 sigemptyset (&act.sa_mask);
7460 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
7461 * is used. Otherwise, sa_handler is used */
7462 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
7463 act.sa_sigaction = segvHandler;
7464 sigaction (SIGSEGV, &act, NULL);
7465 sigaction (SIGBUS, &act, NULL);
12fea928 7466 sigaction (SIGFPE, &act, NULL);
7467 sigaction (SIGILL, &act, NULL);
7468 sigaction (SIGBUS, &act, NULL);
e65fdc78 7469 return;
56906eef 7470}
e65fdc78 7471
bcfc686d 7472#include "staticsymbols.h"
7473/* This function try to convert a pointer into a function name. It's used in
7474 * oreder to provide a backtrace under segmentation fault that's able to
7475 * display functions declared as static (otherwise the backtrace is useless). */
7476static char *findFuncName(void *pointer, unsigned long *offset){
7477 int i, ret = -1;
7478 unsigned long off, minoff = 0;
ed9b544e 7479
bcfc686d 7480 /* Try to match against the Symbol with the smallest offset */
7481 for (i=0; symsTable[i].pointer; i++) {
7482 unsigned long lp = (unsigned long) pointer;
0bc03378 7483
bcfc686d 7484 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
7485 off=lp-symsTable[i].pointer;
7486 if (ret < 0 || off < minoff) {
7487 minoff=off;
7488 ret=i;
7489 }
7490 }
0bc03378 7491 }
bcfc686d 7492 if (ret == -1) return NULL;
7493 *offset = minoff;
7494 return symsTable[ret].name;
0bc03378 7495}
bcfc686d 7496#else /* HAVE_BACKTRACE */
7497static void setupSigSegvAction(void) {
0bc03378 7498}
bcfc686d 7499#endif /* HAVE_BACKTRACE */
0bc03378 7500
ed9b544e 7501
ed9b544e 7502
bcfc686d 7503/* The End */
7504
7505
ed9b544e 7506