]> git.saurik.com Git - redis.git/blame - redis.c
version is now 1.1.92
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
8879cbfe 30#define REDIS_VERSION "1.1.92"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
c9468bcf 40#define __USE_POSIX199309
ed9b544e 41#include <signal.h>
fbf9bcdb 42
43#ifdef HAVE_BACKTRACE
c9468bcf 44#include <execinfo.h>
45#include <ucontext.h>
fbf9bcdb 46#endif /* HAVE_BACKTRACE */
47
ed9b544e 48#include <sys/wait.h>
49#include <errno.h>
50#include <assert.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <inttypes.h>
54#include <arpa/inet.h>
55#include <sys/stat.h>
56#include <fcntl.h>
57#include <sys/time.h>
58#include <sys/resource.h>
2895e862 59#include <sys/uio.h>
f78fd11b 60#include <limits.h>
a7866db6 61#include <math.h>
0bc1b2f6 62
63#if defined(__sun)
5043dff3 64#include "solarisfixes.h"
65#endif
ed9b544e 66
c9468bcf 67#include "redis.h"
ed9b544e 68#include "ae.h" /* Event driven programming library */
69#include "sds.h" /* Dynamic safe strings */
70#include "anet.h" /* Networking the easy way */
71#include "dict.h" /* Hash tables */
72#include "adlist.h" /* Linked lists */
73#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 74#include "lzf.h" /* LZF compression library */
75#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ed9b544e 76
77/* Error codes */
78#define REDIS_OK 0
79#define REDIS_ERR -1
80
81/* Static server configuration */
82#define REDIS_SERVERPORT 6379 /* TCP port */
83#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 84#define REDIS_IOBUF_LEN 1024
ed9b544e 85#define REDIS_LOADBUF_LEN 1024
93ea3759 86#define REDIS_STATIC_ARGS 4
ed9b544e 87#define REDIS_DEFAULT_DBNUM 16
88#define REDIS_CONFIGLINE_MAX 1024
89#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
90#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94754ccc 91#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
6f376729 92#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
2895e862 93#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
94
95/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
96#define REDIS_WRITEV_THRESHOLD 3
97/* Max number of iovecs used for each writev call */
98#define REDIS_WRITEV_IOVEC_COUNT 256
ed9b544e 99
100/* Hash table parameters */
101#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 102
103/* Command flags */
3fd78bcd 104#define REDIS_CMD_BULK 1 /* Bulk write command */
105#define REDIS_CMD_INLINE 2 /* Inline command */
106/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
107 this flags will return an error when the 'maxmemory' option is set in the
108 config file and the server is using more than maxmemory bytes of memory.
109 In short this commands are denied on low memory conditions. */
110#define REDIS_CMD_DENYOOM 4
ed9b544e 111
112/* Object types */
113#define REDIS_STRING 0
114#define REDIS_LIST 1
115#define REDIS_SET 2
1812e024 116#define REDIS_ZSET 3
117#define REDIS_HASH 4
f78fd11b 118
942a3961 119/* Objects encoding */
120#define REDIS_ENCODING_RAW 0 /* Raw representation */
121#define REDIS_ENCODING_INT 1 /* Encoded as integer */
122
f78fd11b 123/* Object types only used for dumping to disk */
bb32ede5 124#define REDIS_EXPIRETIME 253
ed9b544e 125#define REDIS_SELECTDB 254
126#define REDIS_EOF 255
127
f78fd11b 128/* Defines related to the dump file format. To store 32 bits lengths for short
129 * keys requires a lot of space, so we check the most significant 2 bits of
130 * the first byte to interpreter the length:
131 *
132 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
133 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
134 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 135 * 11|000000 this means: specially encoded object will follow. The six bits
136 * number specify the kind of object that follows.
137 * See the REDIS_RDB_ENC_* defines.
f78fd11b 138 *
10c43610 139 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
140 * values, will fit inside. */
f78fd11b 141#define REDIS_RDB_6BITLEN 0
142#define REDIS_RDB_14BITLEN 1
143#define REDIS_RDB_32BITLEN 2
17be1a4a 144#define REDIS_RDB_ENCVAL 3
f78fd11b 145#define REDIS_RDB_LENERR UINT_MAX
146
a4d1ba9a 147/* When a length of a string object stored on disk has the first two bits
148 * set, the remaining two bits specify a special encoding for the object
149 * accordingly to the following defines: */
150#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
151#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
152#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 153#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 154
ed9b544e 155/* Client flags */
156#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
157#define REDIS_SLAVE 2 /* This client is a slave server */
158#define REDIS_MASTER 4 /* This client is a master server */
87eca727 159#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
ed9b544e 160
40d224a9 161/* Slave replication state - slave side */
ed9b544e 162#define REDIS_REPL_NONE 0 /* No active replication */
163#define REDIS_REPL_CONNECT 1 /* Must connect to master */
164#define REDIS_REPL_CONNECTED 2 /* Connected to master */
165
40d224a9 166/* Slave replication state - from the point of view of master
167 * Note that in SEND_BULK and ONLINE state the slave receives new updates
168 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
169 * to start the next background saving in order to send updates to it. */
170#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
171#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
172#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
173#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
174
ed9b544e 175/* List related stuff */
176#define REDIS_HEAD 0
177#define REDIS_TAIL 1
178
179/* Sort operations */
180#define REDIS_SORT_GET 0
443c6409 181#define REDIS_SORT_ASC 1
182#define REDIS_SORT_DESC 2
ed9b544e 183#define REDIS_SORTKEY_MAX 1024
184
185/* Log levels */
186#define REDIS_DEBUG 0
187#define REDIS_NOTICE 1
188#define REDIS_WARNING 2
189
190/* Anti-warning macro... */
191#define REDIS_NOTUSED(V) ((void) V)
192
6b47e12e 193#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
194#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 195
48f0308a 196/* Append only defines */
197#define APPENDFSYNC_NO 0
198#define APPENDFSYNC_ALWAYS 1
199#define APPENDFSYNC_EVERYSEC 2
200
dfc5e96c 201/* We can print the stacktrace, so our assert is defined this way: */
202#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e),exit(1)))
203static void _redisAssert(char *estr);
204
ed9b544e 205/*================================= Data types ============================== */
206
207/* A redis object, that is a type able to hold a string / list / set */
208typedef struct redisObject {
ed9b544e 209 void *ptr;
942a3961 210 unsigned char type;
211 unsigned char encoding;
212 unsigned char notused[2];
ed9b544e 213 int refcount;
214} robj;
215
dfc5e96c 216/* Macro used to initalize a Redis object allocated on the stack.
217 * Note that this macro is taken near the structure definition to make sure
218 * we'll update it when the structure is changed, to avoid bugs like
219 * bug #85 introduced exactly in this way. */
220#define initStaticStringObject(_var,_ptr) do { \
221 _var.refcount = 1; \
222 _var.type = REDIS_STRING; \
223 _var.encoding = REDIS_ENCODING_RAW; \
224 _var.ptr = _ptr; \
225} while(0);
226
3305306f 227typedef struct redisDb {
228 dict *dict;
229 dict *expires;
230 int id;
231} redisDb;
232
ed9b544e 233/* With multiplexing we need to take per-clinet state.
234 * Clients are taken in a liked list. */
235typedef struct redisClient {
236 int fd;
3305306f 237 redisDb *db;
ed9b544e 238 int dictid;
239 sds querybuf;
e8a74421 240 robj **argv, **mbargv;
241 int argc, mbargc;
40d224a9 242 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 243 int multibulk; /* multi bulk command format active */
ed9b544e 244 list *reply;
245 int sentlen;
246 time_t lastinteraction; /* time of the last interaction, used for timeout */
40d224a9 247 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
248 int slaveseldb; /* slave selected db, if this client is a slave */
249 int authenticated; /* when requirepass is non-NULL */
250 int replstate; /* replication state if this is a slave */
251 int repldbfd; /* replication DB file descriptor */
6208b3a7 252 long repldboff; /* replication DB file offset */
40d224a9 253 off_t repldbsize; /* replication DB file size */
ed9b544e 254} redisClient;
255
256struct saveparam {
257 time_t seconds;
258 int changes;
259};
260
261/* Global server state structure */
262struct redisServer {
263 int port;
264 int fd;
3305306f 265 redisDb *db;
10c43610 266 dict *sharingpool;
267 unsigned int sharingpoolsize;
ed9b544e 268 long long dirty; /* changes to DB from the last save */
269 list *clients;
87eca727 270 list *slaves, *monitors;
ed9b544e 271 char neterr[ANET_ERR_LEN];
272 aeEventLoop *el;
273 int cronloops; /* number of times the cron function run */
274 list *objfreelist; /* A list of freed objects to avoid malloc() */
275 time_t lastsave; /* Unix time of last save succeeede */
5fba9f71 276 size_t usedmemory; /* Used memory in megabytes */
ed9b544e 277 /* Fields used only for stats */
278 time_t stat_starttime; /* server start time */
279 long long stat_numcommands; /* number of processed commands */
280 long long stat_numconnections; /* number of connections received */
281 /* Configuration */
282 int verbosity;
283 int glueoutputbuf;
284 int maxidletime;
285 int dbnum;
286 int daemonize;
44b38ef4 287 int appendonly;
48f0308a 288 int appendfsync;
289 time_t lastfsync;
44b38ef4 290 int appendfd;
291 int appendseldb;
ed329fcf 292 char *pidfile;
9f3c422c 293 pid_t bgsavechildpid;
9d65a1bb 294 pid_t bgrewritechildpid;
295 sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
ed9b544e 296 struct saveparam *saveparams;
297 int saveparamslen;
298 char *logfile;
299 char *bindaddr;
300 char *dbfilename;
44b38ef4 301 char *appendfilename;
abcb223e 302 char *requirepass;
10c43610 303 int shareobjects;
ed9b544e 304 /* Replication related */
305 int isslave;
d0ccebcf 306 char *masterauth;
ed9b544e 307 char *masterhost;
308 int masterport;
40d224a9 309 redisClient *master; /* client that is master for this slave */
ed9b544e 310 int replstate;
285add55 311 unsigned int maxclients;
d4465900 312 unsigned long maxmemory;
ed9b544e 313 /* Sort parameters - qsort_r() is only available under BSD so we
314 * have to take this state global, in order to pass it to sortCompare() */
315 int sort_desc;
316 int sort_alpha;
317 int sort_bypattern;
318};
319
320typedef void redisCommandProc(redisClient *c);
321struct redisCommand {
322 char *name;
323 redisCommandProc *proc;
324 int arity;
325 int flags;
326};
327
de96dbfe 328struct redisFunctionSym {
329 char *name;
56906eef 330 unsigned long pointer;
de96dbfe 331};
332
ed9b544e 333typedef struct _redisSortObject {
334 robj *obj;
335 union {
336 double score;
337 robj *cmpobj;
338 } u;
339} redisSortObject;
340
341typedef struct _redisSortOperation {
342 int type;
343 robj *pattern;
344} redisSortOperation;
345
6b47e12e 346/* ZSETs use a specialized version of Skiplists */
347
348typedef struct zskiplistNode {
349 struct zskiplistNode **forward;
e3870fab 350 struct zskiplistNode *backward;
6b47e12e 351 double score;
352 robj *obj;
353} zskiplistNode;
354
355typedef struct zskiplist {
e3870fab 356 struct zskiplistNode *header, *tail;
d13f767c 357 unsigned long length;
6b47e12e 358 int level;
359} zskiplist;
360
1812e024 361typedef struct zset {
362 dict *dict;
6b47e12e 363 zskiplist *zsl;
1812e024 364} zset;
365
6b47e12e 366/* Our shared "common" objects */
367
ed9b544e 368struct sharedObjectsStruct {
c937aa89 369 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
7b45bfb2 370 *colon, *nullbulk, *nullmultibulk,
c937aa89 371 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
372 *outofrangeerr, *plus,
ed9b544e 373 *select0, *select1, *select2, *select3, *select4,
374 *select5, *select6, *select7, *select8, *select9;
375} shared;
376
a7866db6 377/* Global vars that are actally used as constants. The following double
378 * values are used for double on-disk serialization, and are initialized
379 * at runtime to avoid strange compiler optimizations. */
380
381static double R_Zero, R_PosInf, R_NegInf, R_Nan;
382
ed9b544e 383/*================================ Prototypes =============================== */
384
385static void freeStringObject(robj *o);
386static void freeListObject(robj *o);
387static void freeSetObject(robj *o);
388static void decrRefCount(void *o);
389static robj *createObject(int type, void *ptr);
390static void freeClient(redisClient *c);
f78fd11b 391static int rdbLoad(char *filename);
ed9b544e 392static void addReply(redisClient *c, robj *obj);
393static void addReplySds(redisClient *c, sds s);
394static void incrRefCount(robj *o);
f78fd11b 395static int rdbSaveBackground(char *filename);
ed9b544e 396static robj *createStringObject(char *ptr, size_t len);
87eca727 397static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
44b38ef4 398static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 399static int syncWithMaster(void);
10c43610 400static robj *tryObjectSharing(robj *o);
942a3961 401static int tryObjectEncoding(robj *o);
9d65a1bb 402static robj *getDecodedObject(robj *o);
3305306f 403static int removeExpire(redisDb *db, robj *key);
404static int expireIfNeeded(redisDb *db, robj *key);
405static int deleteIfVolatile(redisDb *db, robj *key);
94754ccc 406static int deleteKey(redisDb *db, robj *key);
bb32ede5 407static time_t getExpire(redisDb *db, robj *key);
408static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 409static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 410static void freeMemoryIfNeeded(void);
de96dbfe 411static int processCommand(redisClient *c);
56906eef 412static void setupSigSegvAction(void);
a3b21203 413static void rdbRemoveTempFile(pid_t childpid);
9d65a1bb 414static void aofRemoveTempFile(pid_t childpid);
0ea663ea 415static size_t stringObjectLen(robj *o);
638e42ac 416static void processInputBuffer(redisClient *c);
6b47e12e 417static zskiplist *zslCreate(void);
fd8ccf44 418static void zslFree(zskiplist *zsl);
2b59cfdf 419static void zslInsert(zskiplist *zsl, double score, robj *obj);
2895e862 420static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
ed9b544e 421
abcb223e 422static void authCommand(redisClient *c);
ed9b544e 423static void pingCommand(redisClient *c);
424static void echoCommand(redisClient *c);
425static void setCommand(redisClient *c);
426static void setnxCommand(redisClient *c);
427static void getCommand(redisClient *c);
428static void delCommand(redisClient *c);
429static void existsCommand(redisClient *c);
430static void incrCommand(redisClient *c);
431static void decrCommand(redisClient *c);
432static void incrbyCommand(redisClient *c);
433static void decrbyCommand(redisClient *c);
434static void selectCommand(redisClient *c);
435static void randomkeyCommand(redisClient *c);
436static void keysCommand(redisClient *c);
437static void dbsizeCommand(redisClient *c);
438static void lastsaveCommand(redisClient *c);
439static void saveCommand(redisClient *c);
440static void bgsaveCommand(redisClient *c);
9d65a1bb 441static void bgrewriteaofCommand(redisClient *c);
ed9b544e 442static void shutdownCommand(redisClient *c);
443static void moveCommand(redisClient *c);
444static void renameCommand(redisClient *c);
445static void renamenxCommand(redisClient *c);
446static void lpushCommand(redisClient *c);
447static void rpushCommand(redisClient *c);
448static void lpopCommand(redisClient *c);
449static void rpopCommand(redisClient *c);
450static void llenCommand(redisClient *c);
451static void lindexCommand(redisClient *c);
452static void lrangeCommand(redisClient *c);
453static void ltrimCommand(redisClient *c);
454static void typeCommand(redisClient *c);
455static void lsetCommand(redisClient *c);
456static void saddCommand(redisClient *c);
457static void sremCommand(redisClient *c);
a4460ef4 458static void smoveCommand(redisClient *c);
ed9b544e 459static void sismemberCommand(redisClient *c);
460static void scardCommand(redisClient *c);
12fea928 461static void spopCommand(redisClient *c);
2abb95a9 462static void srandmemberCommand(redisClient *c);
ed9b544e 463static void sinterCommand(redisClient *c);
464static void sinterstoreCommand(redisClient *c);
40d224a9 465static void sunionCommand(redisClient *c);
466static void sunionstoreCommand(redisClient *c);
f4f56e1d 467static void sdiffCommand(redisClient *c);
468static void sdiffstoreCommand(redisClient *c);
ed9b544e 469static void syncCommand(redisClient *c);
470static void flushdbCommand(redisClient *c);
471static void flushallCommand(redisClient *c);
472static void sortCommand(redisClient *c);
473static void lremCommand(redisClient *c);
0f5f7e9a 474static void rpoplpushcommand(redisClient *c);
ed9b544e 475static void infoCommand(redisClient *c);
70003d28 476static void mgetCommand(redisClient *c);
87eca727 477static void monitorCommand(redisClient *c);
3305306f 478static void expireCommand(redisClient *c);
802e8373 479static void expireatCommand(redisClient *c);
f6b141c5 480static void getsetCommand(redisClient *c);
fd88489a 481static void ttlCommand(redisClient *c);
321b0e13 482static void slaveofCommand(redisClient *c);
7f957c92 483static void debugCommand(redisClient *c);
f6b141c5 484static void msetCommand(redisClient *c);
485static void msetnxCommand(redisClient *c);
fd8ccf44 486static void zaddCommand(redisClient *c);
7db723ad 487static void zincrbyCommand(redisClient *c);
cc812361 488static void zrangeCommand(redisClient *c);
50c55df5 489static void zrangebyscoreCommand(redisClient *c);
e3870fab 490static void zrevrangeCommand(redisClient *c);
3c41331e 491static void zcardCommand(redisClient *c);
1b7106e7 492static void zremCommand(redisClient *c);
6e333bbe 493static void zscoreCommand(redisClient *c);
1807985b 494static void zremrangebyscoreCommand(redisClient *c);
f6b141c5 495
ed9b544e 496/*================================= Globals ================================= */
497
498/* Global vars */
499static struct redisServer server; /* server global state */
500static struct redisCommand cmdTable[] = {
501 {"get",getCommand,2,REDIS_CMD_INLINE},
3fd78bcd 502 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
503 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
5109cdff 504 {"del",delCommand,-2,REDIS_CMD_INLINE},
ed9b544e 505 {"exists",existsCommand,2,REDIS_CMD_INLINE},
3fd78bcd 506 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
507 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
70003d28 508 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
3fd78bcd 509 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
510 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 511 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
512 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
513 {"llen",llenCommand,2,REDIS_CMD_INLINE},
514 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
3fd78bcd 515 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 516 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
517 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
518 {"lrem",lremCommand,4,REDIS_CMD_BULK},
0b13687c 519 {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 520 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 521 {"srem",sremCommand,3,REDIS_CMD_BULK},
a4460ef4 522 {"smove",smoveCommand,4,REDIS_CMD_BULK},
ed9b544e 523 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
524 {"scard",scardCommand,2,REDIS_CMD_INLINE},
12fea928 525 {"spop",spopCommand,2,REDIS_CMD_INLINE},
2abb95a9 526 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
3fd78bcd 527 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
528 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
529 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
530 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
531 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
532 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 533 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
fd8ccf44 534 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
7db723ad 535 {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
1b7106e7 536 {"zrem",zremCommand,3,REDIS_CMD_BULK},
1807985b 537 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
cc812361 538 {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
80181f78 539 {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE},
e3870fab 540 {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
3c41331e 541 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
6e333bbe 542 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 543 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
544 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
f6b141c5 545 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
546 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
547 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 548 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
549 {"select",selectCommand,2,REDIS_CMD_INLINE},
550 {"move",moveCommand,3,REDIS_CMD_INLINE},
551 {"rename",renameCommand,3,REDIS_CMD_INLINE},
552 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
321b0e13 553 {"expire",expireCommand,3,REDIS_CMD_INLINE},
802e8373 554 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
ed9b544e 555 {"keys",keysCommand,2,REDIS_CMD_INLINE},
556 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
abcb223e 557 {"auth",authCommand,2,REDIS_CMD_INLINE},
ed9b544e 558 {"ping",pingCommand,1,REDIS_CMD_INLINE},
559 {"echo",echoCommand,2,REDIS_CMD_BULK},
560 {"save",saveCommand,1,REDIS_CMD_INLINE},
561 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
9d65a1bb 562 {"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE},
ed9b544e 563 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
564 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
565 {"type",typeCommand,2,REDIS_CMD_INLINE},
566 {"sync",syncCommand,1,REDIS_CMD_INLINE},
567 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
568 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
3fd78bcd 569 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 570 {"info",infoCommand,1,REDIS_CMD_INLINE},
87eca727 571 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
fd88489a 572 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
321b0e13 573 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
7f957c92 574 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
ed9b544e 575 {NULL,NULL,0,0}
576};
bcfc686d 577
ed9b544e 578/*============================ Utility functions ============================ */
579
580/* Glob-style pattern matching. */
581int stringmatchlen(const char *pattern, int patternLen,
582 const char *string, int stringLen, int nocase)
583{
584 while(patternLen) {
585 switch(pattern[0]) {
586 case '*':
587 while (pattern[1] == '*') {
588 pattern++;
589 patternLen--;
590 }
591 if (patternLen == 1)
592 return 1; /* match */
593 while(stringLen) {
594 if (stringmatchlen(pattern+1, patternLen-1,
595 string, stringLen, nocase))
596 return 1; /* match */
597 string++;
598 stringLen--;
599 }
600 return 0; /* no match */
601 break;
602 case '?':
603 if (stringLen == 0)
604 return 0; /* no match */
605 string++;
606 stringLen--;
607 break;
608 case '[':
609 {
610 int not, match;
611
612 pattern++;
613 patternLen--;
614 not = pattern[0] == '^';
615 if (not) {
616 pattern++;
617 patternLen--;
618 }
619 match = 0;
620 while(1) {
621 if (pattern[0] == '\\') {
622 pattern++;
623 patternLen--;
624 if (pattern[0] == string[0])
625 match = 1;
626 } else if (pattern[0] == ']') {
627 break;
628 } else if (patternLen == 0) {
629 pattern--;
630 patternLen++;
631 break;
632 } else if (pattern[1] == '-' && patternLen >= 3) {
633 int start = pattern[0];
634 int end = pattern[2];
635 int c = string[0];
636 if (start > end) {
637 int t = start;
638 start = end;
639 end = t;
640 }
641 if (nocase) {
642 start = tolower(start);
643 end = tolower(end);
644 c = tolower(c);
645 }
646 pattern += 2;
647 patternLen -= 2;
648 if (c >= start && c <= end)
649 match = 1;
650 } else {
651 if (!nocase) {
652 if (pattern[0] == string[0])
653 match = 1;
654 } else {
655 if (tolower((int)pattern[0]) == tolower((int)string[0]))
656 match = 1;
657 }
658 }
659 pattern++;
660 patternLen--;
661 }
662 if (not)
663 match = !match;
664 if (!match)
665 return 0; /* no match */
666 string++;
667 stringLen--;
668 break;
669 }
670 case '\\':
671 if (patternLen >= 2) {
672 pattern++;
673 patternLen--;
674 }
675 /* fall through */
676 default:
677 if (!nocase) {
678 if (pattern[0] != string[0])
679 return 0; /* no match */
680 } else {
681 if (tolower((int)pattern[0]) != tolower((int)string[0]))
682 return 0; /* no match */
683 }
684 string++;
685 stringLen--;
686 break;
687 }
688 pattern++;
689 patternLen--;
690 if (stringLen == 0) {
691 while(*pattern == '*') {
692 pattern++;
693 patternLen--;
694 }
695 break;
696 }
697 }
698 if (patternLen == 0 && stringLen == 0)
699 return 1;
700 return 0;
701}
702
56906eef 703static void redisLog(int level, const char *fmt, ...) {
ed9b544e 704 va_list ap;
705 FILE *fp;
706
707 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
708 if (!fp) return;
709
710 va_start(ap, fmt);
711 if (level >= server.verbosity) {
712 char *c = ".-*";
1904ecc1 713 char buf[64];
714 time_t now;
715
716 now = time(NULL);
6c9385e0 717 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
1904ecc1 718 fprintf(fp,"%s %c ",buf,c[level]);
ed9b544e 719 vfprintf(fp, fmt, ap);
720 fprintf(fp,"\n");
721 fflush(fp);
722 }
723 va_end(ap);
724
725 if (server.logfile) fclose(fp);
726}
727
728/*====================== Hash table type implementation ==================== */
729
730/* This is an hash table type that uses the SDS dynamic strings libary as
731 * keys and radis objects as values (objects can hold SDS strings,
732 * lists, sets). */
733
1812e024 734static void dictVanillaFree(void *privdata, void *val)
735{
736 DICT_NOTUSED(privdata);
737 zfree(val);
738}
739
ed9b544e 740static int sdsDictKeyCompare(void *privdata, const void *key1,
741 const void *key2)
742{
743 int l1,l2;
744 DICT_NOTUSED(privdata);
745
746 l1 = sdslen((sds)key1);
747 l2 = sdslen((sds)key2);
748 if (l1 != l2) return 0;
749 return memcmp(key1, key2, l1) == 0;
750}
751
752static void dictRedisObjectDestructor(void *privdata, void *val)
753{
754 DICT_NOTUSED(privdata);
755
756 decrRefCount(val);
757}
758
942a3961 759static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 760 const void *key2)
761{
762 const robj *o1 = key1, *o2 = key2;
763 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
764}
765
942a3961 766static unsigned int dictObjHash(const void *key) {
ed9b544e 767 const robj *o = key;
768 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
769}
770
942a3961 771static int dictEncObjKeyCompare(void *privdata, const void *key1,
772 const void *key2)
773{
9d65a1bb 774 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
775 int cmp;
942a3961 776
9d65a1bb 777 o1 = getDecodedObject(o1);
778 o2 = getDecodedObject(o2);
779 cmp = sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
780 decrRefCount(o1);
781 decrRefCount(o2);
782 return cmp;
942a3961 783}
784
785static unsigned int dictEncObjHash(const void *key) {
9d65a1bb 786 robj *o = (robj*) key;
942a3961 787
9d65a1bb 788 o = getDecodedObject(o);
789 unsigned int hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
790 decrRefCount(o);
791 return hash;
942a3961 792}
793
ed9b544e 794static dictType setDictType = {
942a3961 795 dictEncObjHash, /* hash function */
ed9b544e 796 NULL, /* key dup */
797 NULL, /* val dup */
942a3961 798 dictEncObjKeyCompare, /* key compare */
ed9b544e 799 dictRedisObjectDestructor, /* key destructor */
800 NULL /* val destructor */
801};
802
1812e024 803static dictType zsetDictType = {
804 dictEncObjHash, /* hash function */
805 NULL, /* key dup */
806 NULL, /* val dup */
807 dictEncObjKeyCompare, /* key compare */
808 dictRedisObjectDestructor, /* key destructor */
da0a1620 809 dictVanillaFree /* val destructor of malloc(sizeof(double)) */
1812e024 810};
811
ed9b544e 812static dictType hashDictType = {
942a3961 813 dictObjHash, /* hash function */
ed9b544e 814 NULL, /* key dup */
815 NULL, /* val dup */
942a3961 816 dictObjKeyCompare, /* key compare */
ed9b544e 817 dictRedisObjectDestructor, /* key destructor */
818 dictRedisObjectDestructor /* val destructor */
819};
820
821/* ========================= Random utility functions ======================= */
822
823/* Redis generally does not try to recover from out of memory conditions
824 * when allocating objects or strings, it is not clear if it will be possible
825 * to report this condition to the client since the networking layer itself
826 * is based on heap allocation for send buffers, so we simply abort.
827 * At least the code will be simpler to read... */
828static void oom(const char *msg) {
71c54b21 829 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
ed9b544e 830 sleep(1);
831 abort();
832}
833
834/* ====================== Redis server networking stuff ===================== */
56906eef 835static void closeTimedoutClients(void) {
ed9b544e 836 redisClient *c;
ed9b544e 837 listNode *ln;
838 time_t now = time(NULL);
839
6208b3a7 840 listRewind(server.clients);
841 while ((ln = listYield(server.clients)) != NULL) {
ed9b544e 842 c = listNodeValue(ln);
843 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 844 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
ed9b544e 845 (now - c->lastinteraction > server.maxidletime)) {
846 redisLog(REDIS_DEBUG,"Closing idle client");
847 freeClient(c);
848 }
849 }
ed9b544e 850}
851
12fea928 852static int htNeedsResize(dict *dict) {
853 long long size, used;
854
855 size = dictSlots(dict);
856 used = dictSize(dict);
857 return (size && used && size > DICT_HT_INITIAL_SIZE &&
858 (used*100/size < REDIS_HT_MINFILL));
859}
860
0bc03378 861/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
862 * we resize the hash table to save memory */
56906eef 863static void tryResizeHashTables(void) {
0bc03378 864 int j;
865
866 for (j = 0; j < server.dbnum; j++) {
12fea928 867 if (htNeedsResize(server.db[j].dict)) {
868 redisLog(REDIS_DEBUG,"The hash table %d is too sparse, resize it...",j);
0bc03378 869 dictResize(server.db[j].dict);
12fea928 870 redisLog(REDIS_DEBUG,"Hash table %d resized.",j);
0bc03378 871 }
12fea928 872 if (htNeedsResize(server.db[j].expires))
873 dictResize(server.db[j].expires);
0bc03378 874 }
875}
876
9d65a1bb 877/* A background saving child (BGSAVE) terminated its work. Handle this. */
878void backgroundSaveDoneHandler(int statloc) {
879 int exitcode = WEXITSTATUS(statloc);
880 int bysignal = WIFSIGNALED(statloc);
881
882 if (!bysignal && exitcode == 0) {
883 redisLog(REDIS_NOTICE,
884 "Background saving terminated with success");
885 server.dirty = 0;
886 server.lastsave = time(NULL);
887 } else if (!bysignal && exitcode != 0) {
888 redisLog(REDIS_WARNING, "Background saving error");
889 } else {
890 redisLog(REDIS_WARNING,
891 "Background saving terminated by signal");
892 rdbRemoveTempFile(server.bgsavechildpid);
893 }
894 server.bgsavechildpid = -1;
895 /* Possibly there are slaves waiting for a BGSAVE in order to be served
896 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
897 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
898}
899
900/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
901 * Handle this. */
902void backgroundRewriteDoneHandler(int statloc) {
903 int exitcode = WEXITSTATUS(statloc);
904 int bysignal = WIFSIGNALED(statloc);
905
906 if (!bysignal && exitcode == 0) {
907 int fd;
908 char tmpfile[256];
909
910 redisLog(REDIS_NOTICE,
911 "Background append only file rewriting terminated with success");
912 /* Now it's time to flush the differences accumulated by the parent */
913 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
914 fd = open(tmpfile,O_WRONLY|O_APPEND);
915 if (fd == -1) {
916 redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
917 goto cleanup;
918 }
919 /* Flush our data... */
920 if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
921 (signed) sdslen(server.bgrewritebuf)) {
922 redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
923 close(fd);
924 goto cleanup;
925 }
b32627cd 926 redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
9d65a1bb 927 /* Now our work is to rename the temp file into the stable file. And
928 * switch the file descriptor used by the server for append only. */
929 if (rename(tmpfile,server.appendfilename) == -1) {
930 redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
931 close(fd);
932 goto cleanup;
933 }
934 /* Mission completed... almost */
935 redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
936 if (server.appendfd != -1) {
937 /* If append only is actually enabled... */
938 close(server.appendfd);
939 server.appendfd = fd;
940 fsync(fd);
85a83172 941 server.appendseldb = -1; /* Make sure it will issue SELECT */
9d65a1bb 942 redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
943 } else {
944 /* If append only is disabled we just generate a dump in this
945 * format. Why not? */
946 close(fd);
947 }
948 } else if (!bysignal && exitcode != 0) {
949 redisLog(REDIS_WARNING, "Background append only file rewriting error");
950 } else {
951 redisLog(REDIS_WARNING,
952 "Background append only file rewriting terminated by signal");
953 }
954cleanup:
955 sdsfree(server.bgrewritebuf);
956 server.bgrewritebuf = sdsempty();
957 aofRemoveTempFile(server.bgrewritechildpid);
958 server.bgrewritechildpid = -1;
959}
960
56906eef 961static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 962 int j, loops = server.cronloops++;
ed9b544e 963 REDIS_NOTUSED(eventLoop);
964 REDIS_NOTUSED(id);
965 REDIS_NOTUSED(clientData);
966
967 /* Update the global state with the amount of used memory */
968 server.usedmemory = zmalloc_used_memory();
969
0bc03378 970 /* Show some info about non-empty databases */
ed9b544e 971 for (j = 0; j < server.dbnum; j++) {
dec423d9 972 long long size, used, vkeys;
94754ccc 973
3305306f 974 size = dictSlots(server.db[j].dict);
975 used = dictSize(server.db[j].dict);
94754ccc 976 vkeys = dictSize(server.db[j].expires);
c3cb078d 977 if (!(loops % 5) && (used || vkeys)) {
978 redisLog(REDIS_DEBUG,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 979 /* dictPrintStats(server.dict); */
ed9b544e 980 }
ed9b544e 981 }
982
0bc03378 983 /* We don't want to resize the hash tables while a bacground saving
984 * is in progress: the saving child is created using fork() that is
985 * implemented with a copy-on-write semantic in most modern systems, so
986 * if we resize the HT while there is the saving child at work actually
987 * a lot of memory movements in the parent will cause a lot of pages
988 * copied. */
9d65a1bb 989 if (server.bgsavechildpid == -1) tryResizeHashTables();
0bc03378 990
ed9b544e 991 /* Show information about connected clients */
992 if (!(loops % 5)) {
21aecf4b 993 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
ed9b544e 994 listLength(server.clients)-listLength(server.slaves),
995 listLength(server.slaves),
10c43610 996 server.usedmemory,
3305306f 997 dictSize(server.sharingpool));
ed9b544e 998 }
999
1000 /* Close connections of timedout clients */
0150db36 1001 if (server.maxidletime && !(loops % 10))
ed9b544e 1002 closeTimedoutClients();
1003
9d65a1bb 1004 /* Check if a background saving or AOF rewrite in progress terminated */
1005 if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
ed9b544e 1006 int statloc;
9d65a1bb 1007 pid_t pid;
1008
1009 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
1010 if (pid == server.bgsavechildpid) {
1011 backgroundSaveDoneHandler(statloc);
ed9b544e 1012 } else {
9d65a1bb 1013 backgroundRewriteDoneHandler(statloc);
ed9b544e 1014 }
ed9b544e 1015 }
1016 } else {
1017 /* If there is not a background saving in progress check if
1018 * we have to save now */
1019 time_t now = time(NULL);
1020 for (j = 0; j < server.saveparamslen; j++) {
1021 struct saveparam *sp = server.saveparams+j;
1022
1023 if (server.dirty >= sp->changes &&
1024 now-server.lastsave > sp->seconds) {
1025 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
1026 sp->changes, sp->seconds);
f78fd11b 1027 rdbSaveBackground(server.dbfilename);
ed9b544e 1028 break;
1029 }
1030 }
1031 }
94754ccc 1032
f2324293 1033 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1034 * will use few CPU cycles if there are few expiring keys, otherwise
1035 * it will get more aggressive to avoid that too much memory is used by
1036 * keys that can be removed from the keyspace. */
94754ccc 1037 for (j = 0; j < server.dbnum; j++) {
f2324293 1038 int expired;
94754ccc 1039 redisDb *db = server.db+j;
94754ccc 1040
f2324293 1041 /* Continue to expire if at the end of the cycle more than 25%
1042 * of the keys were expired. */
1043 do {
1044 int num = dictSize(db->expires);
94754ccc 1045 time_t now = time(NULL);
1046
f2324293 1047 expired = 0;
94754ccc 1048 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
1049 num = REDIS_EXPIRELOOKUPS_PER_CRON;
1050 while (num--) {
1051 dictEntry *de;
1052 time_t t;
1053
1054 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
1055 t = (time_t) dictGetEntryVal(de);
1056 if (now > t) {
1057 deleteKey(db,dictGetEntryKey(de));
f2324293 1058 expired++;
94754ccc 1059 }
1060 }
f2324293 1061 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
94754ccc 1062 }
1063
ed9b544e 1064 /* Check if we should connect to a MASTER */
1065 if (server.replstate == REDIS_REPL_CONNECT) {
1066 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
1067 if (syncWithMaster() == REDIS_OK) {
1068 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
1069 }
1070 }
1071 return 1000;
1072}
1073
1074static void createSharedObjects(void) {
1075 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1076 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1077 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 1078 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1079 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1080 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1081 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1082 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1083 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 1084 /* no such key */
ed9b544e 1085 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1086 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1087 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 1088 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1089 "-ERR no such key\r\n"));
ed9b544e 1090 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1091 "-ERR syntax error\r\n"));
c937aa89 1092 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1093 "-ERR source and destination objects are the same\r\n"));
1094 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1095 "-ERR index out of range\r\n"));
ed9b544e 1096 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1097 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1098 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1099 shared.select0 = createStringObject("select 0\r\n",10);
1100 shared.select1 = createStringObject("select 1\r\n",10);
1101 shared.select2 = createStringObject("select 2\r\n",10);
1102 shared.select3 = createStringObject("select 3\r\n",10);
1103 shared.select4 = createStringObject("select 4\r\n",10);
1104 shared.select5 = createStringObject("select 5\r\n",10);
1105 shared.select6 = createStringObject("select 6\r\n",10);
1106 shared.select7 = createStringObject("select 7\r\n",10);
1107 shared.select8 = createStringObject("select 8\r\n",10);
1108 shared.select9 = createStringObject("select 9\r\n",10);
1109}
1110
1111static void appendServerSaveParams(time_t seconds, int changes) {
1112 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1113 server.saveparams[server.saveparamslen].seconds = seconds;
1114 server.saveparams[server.saveparamslen].changes = changes;
1115 server.saveparamslen++;
1116}
1117
bcfc686d 1118static void resetServerSaveParams() {
ed9b544e 1119 zfree(server.saveparams);
1120 server.saveparams = NULL;
1121 server.saveparamslen = 0;
1122}
1123
1124static void initServerConfig() {
1125 server.dbnum = REDIS_DEFAULT_DBNUM;
1126 server.port = REDIS_SERVERPORT;
1127 server.verbosity = REDIS_DEBUG;
1128 server.maxidletime = REDIS_MAXIDLETIME;
1129 server.saveparams = NULL;
1130 server.logfile = NULL; /* NULL = log on standard output */
1131 server.bindaddr = NULL;
1132 server.glueoutputbuf = 1;
1133 server.daemonize = 0;
44b38ef4 1134 server.appendonly = 0;
4e141d5a 1135 server.appendfsync = APPENDFSYNC_ALWAYS;
48f0308a 1136 server.lastfsync = time(NULL);
44b38ef4 1137 server.appendfd = -1;
1138 server.appendseldb = -1; /* Make sure the first time will not match */
ed329fcf 1139 server.pidfile = "/var/run/redis.pid";
ed9b544e 1140 server.dbfilename = "dump.rdb";
9d65a1bb 1141 server.appendfilename = "appendonly.aof";
abcb223e 1142 server.requirepass = NULL;
10c43610 1143 server.shareobjects = 0;
21aecf4b 1144 server.sharingpoolsize = 1024;
285add55 1145 server.maxclients = 0;
3fd78bcd 1146 server.maxmemory = 0;
bcfc686d 1147 resetServerSaveParams();
ed9b544e 1148
1149 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1150 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1151 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1152 /* Replication related */
1153 server.isslave = 0;
d0ccebcf 1154 server.masterauth = NULL;
ed9b544e 1155 server.masterhost = NULL;
1156 server.masterport = 6379;
1157 server.master = NULL;
1158 server.replstate = REDIS_REPL_NONE;
a7866db6 1159
1160 /* Double constants initialization */
1161 R_Zero = 0.0;
1162 R_PosInf = 1.0/R_Zero;
1163 R_NegInf = -1.0/R_Zero;
1164 R_Nan = R_Zero/R_Zero;
ed9b544e 1165}
1166
1167static void initServer() {
1168 int j;
1169
1170 signal(SIGHUP, SIG_IGN);
1171 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1172 setupSigSegvAction();
ed9b544e 1173
1174 server.clients = listCreate();
1175 server.slaves = listCreate();
87eca727 1176 server.monitors = listCreate();
ed9b544e 1177 server.objfreelist = listCreate();
1178 createSharedObjects();
1179 server.el = aeCreateEventLoop();
3305306f 1180 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
10c43610 1181 server.sharingpool = dictCreate(&setDictType,NULL);
ed9b544e 1182 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1183 if (server.fd == -1) {
1184 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1185 exit(1);
1186 }
3305306f 1187 for (j = 0; j < server.dbnum; j++) {
1188 server.db[j].dict = dictCreate(&hashDictType,NULL);
1189 server.db[j].expires = dictCreate(&setDictType,NULL);
1190 server.db[j].id = j;
1191 }
ed9b544e 1192 server.cronloops = 0;
9f3c422c 1193 server.bgsavechildpid = -1;
9d65a1bb 1194 server.bgrewritechildpid = -1;
1195 server.bgrewritebuf = sdsempty();
ed9b544e 1196 server.lastsave = time(NULL);
1197 server.dirty = 0;
1198 server.usedmemory = 0;
1199 server.stat_numcommands = 0;
1200 server.stat_numconnections = 0;
1201 server.stat_starttime = time(NULL);
d8f8b666 1202 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
44b38ef4 1203
1204 if (server.appendonly) {
71eba477 1205 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
44b38ef4 1206 if (server.appendfd == -1) {
1207 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1208 strerror(errno));
1209 exit(1);
1210 }
1211 }
ed9b544e 1212}
1213
1214/* Empty the whole database */
ca37e9cd 1215static long long emptyDb() {
ed9b544e 1216 int j;
ca37e9cd 1217 long long removed = 0;
ed9b544e 1218
3305306f 1219 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1220 removed += dictSize(server.db[j].dict);
3305306f 1221 dictEmpty(server.db[j].dict);
1222 dictEmpty(server.db[j].expires);
1223 }
ca37e9cd 1224 return removed;
ed9b544e 1225}
1226
85dd2f3a 1227static int yesnotoi(char *s) {
1228 if (!strcasecmp(s,"yes")) return 1;
1229 else if (!strcasecmp(s,"no")) return 0;
1230 else return -1;
1231}
1232
ed9b544e 1233/* I agree, this is a very rudimental way to load a configuration...
1234 will improve later if the config gets more complex */
1235static void loadServerConfig(char *filename) {
c9a111ac 1236 FILE *fp;
ed9b544e 1237 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1238 int linenum = 0;
1239 sds line = NULL;
c9a111ac 1240
1241 if (filename[0] == '-' && filename[1] == '\0')
1242 fp = stdin;
1243 else {
1244 if ((fp = fopen(filename,"r")) == NULL) {
1245 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1246 exit(1);
1247 }
ed9b544e 1248 }
c9a111ac 1249
ed9b544e 1250 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1251 sds *argv;
1252 int argc, j;
1253
1254 linenum++;
1255 line = sdsnew(buf);
1256 line = sdstrim(line," \t\r\n");
1257
1258 /* Skip comments and blank lines*/
1259 if (line[0] == '#' || line[0] == '\0') {
1260 sdsfree(line);
1261 continue;
1262 }
1263
1264 /* Split into arguments */
1265 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1266 sdstolower(argv[0]);
1267
1268 /* Execute config directives */
bb0b03a3 1269 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1270 server.maxidletime = atoi(argv[1]);
0150db36 1271 if (server.maxidletime < 0) {
ed9b544e 1272 err = "Invalid timeout value"; goto loaderr;
1273 }
bb0b03a3 1274 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1275 server.port = atoi(argv[1]);
1276 if (server.port < 1 || server.port > 65535) {
1277 err = "Invalid port"; goto loaderr;
1278 }
bb0b03a3 1279 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1280 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1281 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1282 int seconds = atoi(argv[1]);
1283 int changes = atoi(argv[2]);
1284 if (seconds < 1 || changes < 0) {
1285 err = "Invalid save parameters"; goto loaderr;
1286 }
1287 appendServerSaveParams(seconds,changes);
bb0b03a3 1288 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1289 if (chdir(argv[1]) == -1) {
1290 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1291 argv[1], strerror(errno));
1292 exit(1);
1293 }
bb0b03a3 1294 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1295 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
1296 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1297 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1298 else {
1299 err = "Invalid log level. Must be one of debug, notice, warning";
1300 goto loaderr;
1301 }
bb0b03a3 1302 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1303 FILE *logfp;
ed9b544e 1304
1305 server.logfile = zstrdup(argv[1]);
bb0b03a3 1306 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1307 zfree(server.logfile);
1308 server.logfile = NULL;
1309 }
1310 if (server.logfile) {
1311 /* Test if we are able to open the file. The server will not
1312 * be able to abort just for this problem later... */
c9a111ac 1313 logfp = fopen(server.logfile,"a");
1314 if (logfp == NULL) {
ed9b544e 1315 err = sdscatprintf(sdsempty(),
1316 "Can't open the log file: %s", strerror(errno));
1317 goto loaderr;
1318 }
c9a111ac 1319 fclose(logfp);
ed9b544e 1320 }
bb0b03a3 1321 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1322 server.dbnum = atoi(argv[1]);
1323 if (server.dbnum < 1) {
1324 err = "Invalid number of databases"; goto loaderr;
1325 }
285add55 1326 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1327 server.maxclients = atoi(argv[1]);
3fd78bcd 1328 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
d4465900 1329 server.maxmemory = strtoll(argv[1], NULL, 10);
bb0b03a3 1330 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1331 server.masterhost = sdsnew(argv[1]);
1332 server.masterport = atoi(argv[2]);
1333 server.replstate = REDIS_REPL_CONNECT;
d0ccebcf 1334 } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
1335 server.masterauth = zstrdup(argv[1]);
bb0b03a3 1336 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1337 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1338 err = "argument must be 'yes' or 'no'"; goto loaderr;
1339 }
bb0b03a3 1340 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
85dd2f3a 1341 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
10c43610 1342 err = "argument must be 'yes' or 'no'"; goto loaderr;
1343 }
e52c65b9 1344 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1345 server.sharingpoolsize = atoi(argv[1]);
1346 if (server.sharingpoolsize < 1) {
1347 err = "invalid object sharing pool size"; goto loaderr;
1348 }
bb0b03a3 1349 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1350 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1351 err = "argument must be 'yes' or 'no'"; goto loaderr;
1352 }
44b38ef4 1353 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1354 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1355 err = "argument must be 'yes' or 'no'"; goto loaderr;
1356 }
48f0308a 1357 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1766c6da 1358 if (!strcasecmp(argv[1],"no")) {
48f0308a 1359 server.appendfsync = APPENDFSYNC_NO;
1766c6da 1360 } else if (!strcasecmp(argv[1],"always")) {
48f0308a 1361 server.appendfsync = APPENDFSYNC_ALWAYS;
1766c6da 1362 } else if (!strcasecmp(argv[1],"everysec")) {
48f0308a 1363 server.appendfsync = APPENDFSYNC_EVERYSEC;
1364 } else {
1365 err = "argument must be 'no', 'always' or 'everysec'";
1366 goto loaderr;
1367 }
bb0b03a3 1368 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
abcb223e 1369 server.requirepass = zstrdup(argv[1]);
bb0b03a3 1370 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
ed329fcf 1371 server.pidfile = zstrdup(argv[1]);
bb0b03a3 1372 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
b8b553c8 1373 server.dbfilename = zstrdup(argv[1]);
ed9b544e 1374 } else {
1375 err = "Bad directive or wrong number of arguments"; goto loaderr;
1376 }
1377 for (j = 0; j < argc; j++)
1378 sdsfree(argv[j]);
1379 zfree(argv);
1380 sdsfree(line);
1381 }
c9a111ac 1382 if (fp != stdin) fclose(fp);
ed9b544e 1383 return;
1384
1385loaderr:
1386 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1387 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1388 fprintf(stderr, ">>> '%s'\n", line);
1389 fprintf(stderr, "%s\n", err);
1390 exit(1);
1391}
1392
1393static void freeClientArgv(redisClient *c) {
1394 int j;
1395
1396 for (j = 0; j < c->argc; j++)
1397 decrRefCount(c->argv[j]);
e8a74421 1398 for (j = 0; j < c->mbargc; j++)
1399 decrRefCount(c->mbargv[j]);
ed9b544e 1400 c->argc = 0;
e8a74421 1401 c->mbargc = 0;
ed9b544e 1402}
1403
1404static void freeClient(redisClient *c) {
1405 listNode *ln;
1406
1407 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1408 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1409 sdsfree(c->querybuf);
1410 listRelease(c->reply);
1411 freeClientArgv(c);
1412 close(c->fd);
1413 ln = listSearchKey(server.clients,c);
dfc5e96c 1414 redisAssert(ln != NULL);
ed9b544e 1415 listDelNode(server.clients,ln);
1416 if (c->flags & REDIS_SLAVE) {
6208b3a7 1417 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1418 close(c->repldbfd);
87eca727 1419 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1420 ln = listSearchKey(l,c);
dfc5e96c 1421 redisAssert(ln != NULL);
87eca727 1422 listDelNode(l,ln);
ed9b544e 1423 }
1424 if (c->flags & REDIS_MASTER) {
1425 server.master = NULL;
1426 server.replstate = REDIS_REPL_CONNECT;
1427 }
93ea3759 1428 zfree(c->argv);
e8a74421 1429 zfree(c->mbargv);
ed9b544e 1430 zfree(c);
1431}
1432
cc30e368 1433#define GLUEREPLY_UP_TO (1024)
ed9b544e 1434static void glueReplyBuffersIfNeeded(redisClient *c) {
c28b42ac 1435 int copylen = 0;
1436 char buf[GLUEREPLY_UP_TO];
6208b3a7 1437 listNode *ln;
ed9b544e 1438 robj *o;
1439
6208b3a7 1440 listRewind(c->reply);
1441 while((ln = listYield(c->reply))) {
c28b42ac 1442 int objlen;
1443
ed9b544e 1444 o = ln->value;
c28b42ac 1445 objlen = sdslen(o->ptr);
1446 if (copylen + objlen <= GLUEREPLY_UP_TO) {
1447 memcpy(buf+copylen,o->ptr,objlen);
1448 copylen += objlen;
ed9b544e 1449 listDelNode(c->reply,ln);
c28b42ac 1450 } else {
1451 if (copylen == 0) return;
1452 break;
ed9b544e 1453 }
ed9b544e 1454 }
c28b42ac 1455 /* Now the output buffer is empty, add the new single element */
1456 o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
1457 listAddNodeHead(c->reply,o);
ed9b544e 1458}
1459
1460static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1461 redisClient *c = privdata;
1462 int nwritten = 0, totwritten = 0, objlen;
1463 robj *o;
1464 REDIS_NOTUSED(el);
1465 REDIS_NOTUSED(mask);
1466
2895e862 1467 /* Use writev() if we have enough buffers to send */
7ea870c0 1468 if (!server.glueoutputbuf &&
1469 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
1470 !(c->flags & REDIS_MASTER))
2895e862 1471 {
1472 sendReplyToClientWritev(el, fd, privdata, mask);
1473 return;
1474 }
2895e862 1475
ed9b544e 1476 while(listLength(c->reply)) {
c28b42ac 1477 if (server.glueoutputbuf && listLength(c->reply) > 1)
1478 glueReplyBuffersIfNeeded(c);
1479
ed9b544e 1480 o = listNodeValue(listFirst(c->reply));
1481 objlen = sdslen(o->ptr);
1482
1483 if (objlen == 0) {
1484 listDelNode(c->reply,listFirst(c->reply));
1485 continue;
1486 }
1487
1488 if (c->flags & REDIS_MASTER) {
6f376729 1489 /* Don't reply to a master */
ed9b544e 1490 nwritten = objlen - c->sentlen;
1491 } else {
a4d1ba9a 1492 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 1493 if (nwritten <= 0) break;
1494 }
1495 c->sentlen += nwritten;
1496 totwritten += nwritten;
1497 /* If we fully sent the object on head go to the next one */
1498 if (c->sentlen == objlen) {
1499 listDelNode(c->reply,listFirst(c->reply));
1500 c->sentlen = 0;
1501 }
6f376729 1502 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
12f9d551 1503 * bytes, in a single threaded server it's a good idea to serve
6f376729 1504 * other clients as well, even if a very large request comes from
1505 * super fast link that is always able to accept data (in real world
12f9d551 1506 * scenario think about 'KEYS *' against the loopback interfae) */
6f376729 1507 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 1508 }
1509 if (nwritten == -1) {
1510 if (errno == EAGAIN) {
1511 nwritten = 0;
1512 } else {
1513 redisLog(REDIS_DEBUG,
1514 "Error writing to client: %s", strerror(errno));
1515 freeClient(c);
1516 return;
1517 }
1518 }
1519 if (totwritten > 0) c->lastinteraction = time(NULL);
1520 if (listLength(c->reply) == 0) {
1521 c->sentlen = 0;
1522 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1523 }
1524}
1525
2895e862 1526static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
1527{
1528 redisClient *c = privdata;
1529 int nwritten = 0, totwritten = 0, objlen, willwrite;
1530 robj *o;
1531 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
1532 int offset, ion = 0;
1533 REDIS_NOTUSED(el);
1534 REDIS_NOTUSED(mask);
1535
1536 listNode *node;
1537 while (listLength(c->reply)) {
1538 offset = c->sentlen;
1539 ion = 0;
1540 willwrite = 0;
1541
1542 /* fill-in the iov[] array */
1543 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
1544 o = listNodeValue(node);
1545 objlen = sdslen(o->ptr);
1546
1547 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
1548 break;
1549
1550 if(ion == REDIS_WRITEV_IOVEC_COUNT)
1551 break; /* no more iovecs */
1552
1553 iov[ion].iov_base = ((char*)o->ptr) + offset;
1554 iov[ion].iov_len = objlen - offset;
1555 willwrite += objlen - offset;
1556 offset = 0; /* just for the first item */
1557 ion++;
1558 }
1559
1560 if(willwrite == 0)
1561 break;
1562
1563 /* write all collected blocks at once */
1564 if((nwritten = writev(fd, iov, ion)) < 0) {
1565 if (errno != EAGAIN) {
1566 redisLog(REDIS_DEBUG,
1567 "Error writing to client: %s", strerror(errno));
1568 freeClient(c);
1569 return;
1570 }
1571 break;
1572 }
1573
1574 totwritten += nwritten;
1575 offset = c->sentlen;
1576
1577 /* remove written robjs from c->reply */
1578 while (nwritten && listLength(c->reply)) {
1579 o = listNodeValue(listFirst(c->reply));
1580 objlen = sdslen(o->ptr);
1581
1582 if(nwritten >= objlen - offset) {
1583 listDelNode(c->reply, listFirst(c->reply));
1584 nwritten -= objlen - offset;
1585 c->sentlen = 0;
1586 } else {
1587 /* partial write */
1588 c->sentlen += nwritten;
1589 break;
1590 }
1591 offset = 0;
1592 }
1593 }
1594
1595 if (totwritten > 0)
1596 c->lastinteraction = time(NULL);
1597
1598 if (listLength(c->reply) == 0) {
1599 c->sentlen = 0;
1600 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1601 }
1602}
1603
ed9b544e 1604static struct redisCommand *lookupCommand(char *name) {
1605 int j = 0;
1606 while(cmdTable[j].name != NULL) {
bb0b03a3 1607 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
ed9b544e 1608 j++;
1609 }
1610 return NULL;
1611}
1612
1613/* resetClient prepare the client to process the next command */
1614static void resetClient(redisClient *c) {
1615 freeClientArgv(c);
1616 c->bulklen = -1;
e8a74421 1617 c->multibulk = 0;
ed9b544e 1618}
1619
1620/* If this function gets called we already read a whole
1621 * command, argments are in the client argv/argc fields.
1622 * processCommand() execute the command or prepare the
1623 * server for a bulk read from the client.
1624 *
1625 * If 1 is returned the client is still alive and valid and
1626 * and other operations can be performed by the caller. Otherwise
1627 * if 0 is returned the client was destroied (i.e. after QUIT). */
1628static int processCommand(redisClient *c) {
1629 struct redisCommand *cmd;
1630 long long dirty;
1631
3fd78bcd 1632 /* Free some memory if needed (maxmemory setting) */
1633 if (server.maxmemory) freeMemoryIfNeeded();
1634
e8a74421 1635 /* Handle the multi bulk command type. This is an alternative protocol
1636 * supported by Redis in order to receive commands that are composed of
1637 * multiple binary-safe "bulk" arguments. The latency of processing is
1638 * a bit higher but this allows things like multi-sets, so if this
1639 * protocol is used only for MSET and similar commands this is a big win. */
1640 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1641 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1642 if (c->multibulk <= 0) {
1643 resetClient(c);
1644 return 1;
1645 } else {
1646 decrRefCount(c->argv[c->argc-1]);
1647 c->argc--;
1648 return 1;
1649 }
1650 } else if (c->multibulk) {
1651 if (c->bulklen == -1) {
1652 if (((char*)c->argv[0]->ptr)[0] != '$') {
1653 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1654 resetClient(c);
1655 return 1;
1656 } else {
1657 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1658 decrRefCount(c->argv[0]);
1659 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1660 c->argc--;
1661 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1662 resetClient(c);
1663 return 1;
1664 }
1665 c->argc--;
1666 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1667 return 1;
1668 }
1669 } else {
1670 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1671 c->mbargv[c->mbargc] = c->argv[0];
1672 c->mbargc++;
1673 c->argc--;
1674 c->multibulk--;
1675 if (c->multibulk == 0) {
1676 robj **auxargv;
1677 int auxargc;
1678
1679 /* Here we need to swap the multi-bulk argc/argv with the
1680 * normal argc/argv of the client structure. */
1681 auxargv = c->argv;
1682 c->argv = c->mbargv;
1683 c->mbargv = auxargv;
1684
1685 auxargc = c->argc;
1686 c->argc = c->mbargc;
1687 c->mbargc = auxargc;
1688
1689 /* We need to set bulklen to something different than -1
1690 * in order for the code below to process the command without
1691 * to try to read the last argument of a bulk command as
1692 * a special argument. */
1693 c->bulklen = 0;
1694 /* continue below and process the command */
1695 } else {
1696 c->bulklen = -1;
1697 return 1;
1698 }
1699 }
1700 }
1701 /* -- end of multi bulk commands processing -- */
1702
ed9b544e 1703 /* The QUIT command is handled as a special case. Normal command
1704 * procs are unable to close the client connection safely */
bb0b03a3 1705 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 1706 freeClient(c);
1707 return 0;
1708 }
1709 cmd = lookupCommand(c->argv[0]->ptr);
1710 if (!cmd) {
1711 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1712 resetClient(c);
1713 return 1;
1714 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1715 (c->argc < -cmd->arity)) {
454d4e43 1716 addReplySds(c,
1717 sdscatprintf(sdsempty(),
1718 "-ERR wrong number of arguments for '%s' command\r\n",
1719 cmd->name));
ed9b544e 1720 resetClient(c);
1721 return 1;
3fd78bcd 1722 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1723 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1724 resetClient(c);
1725 return 1;
ed9b544e 1726 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1727 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1728
1729 decrRefCount(c->argv[c->argc-1]);
1730 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1731 c->argc--;
1732 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1733 resetClient(c);
1734 return 1;
1735 }
1736 c->argc--;
1737 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1738 /* It is possible that the bulk read is already in the
8d0490e7 1739 * buffer. Check this condition and handle it accordingly.
1740 * This is just a fast path, alternative to call processInputBuffer().
1741 * It's a good idea since the code is small and this condition
1742 * happens most of the times. */
ed9b544e 1743 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1744 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1745 c->argc++;
1746 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1747 } else {
1748 return 1;
1749 }
1750 }
10c43610 1751 /* Let's try to share objects on the command arguments vector */
1752 if (server.shareobjects) {
1753 int j;
1754 for(j = 1; j < c->argc; j++)
1755 c->argv[j] = tryObjectSharing(c->argv[j]);
1756 }
942a3961 1757 /* Let's try to encode the bulk object to save space. */
1758 if (cmd->flags & REDIS_CMD_BULK)
1759 tryObjectEncoding(c->argv[c->argc-1]);
1760
e63943a4 1761 /* Check if the user is authenticated */
1762 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1763 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1764 resetClient(c);
1765 return 1;
1766 }
1767
ed9b544e 1768 /* Exec the command */
1769 dirty = server.dirty;
1770 cmd->proc(c);
33ed1a42 1771 if (server.appendonly && server.dirty-dirty)
44b38ef4 1772 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
33ed1a42 1773 if (server.dirty-dirty && listLength(server.slaves))
3305306f 1774 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
87eca727 1775 if (listLength(server.monitors))
3305306f 1776 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
ed9b544e 1777 server.stat_numcommands++;
1778
1779 /* Prepare the client for the next command */
1780 if (c->flags & REDIS_CLOSE) {
1781 freeClient(c);
1782 return 0;
1783 }
1784 resetClient(c);
1785 return 1;
1786}
1787
87eca727 1788static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6208b3a7 1789 listNode *ln;
ed9b544e 1790 int outc = 0, j;
93ea3759 1791 robj **outv;
1792 /* (args*2)+1 is enough room for args, spaces, newlines */
1793 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1794
1795 if (argc <= REDIS_STATIC_ARGS) {
1796 outv = static_outv;
1797 } else {
1798 outv = zmalloc(sizeof(robj*)*(argc*2+1));
93ea3759 1799 }
ed9b544e 1800
1801 for (j = 0; j < argc; j++) {
1802 if (j != 0) outv[outc++] = shared.space;
1803 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1804 robj *lenobj;
1805
1806 lenobj = createObject(REDIS_STRING,
682ac724 1807 sdscatprintf(sdsempty(),"%lu\r\n",
83c6a618 1808 (unsigned long) stringObjectLen(argv[j])));
ed9b544e 1809 lenobj->refcount = 0;
1810 outv[outc++] = lenobj;
1811 }
1812 outv[outc++] = argv[j];
1813 }
1814 outv[outc++] = shared.crlf;
1815
40d224a9 1816 /* Increment all the refcounts at start and decrement at end in order to
1817 * be sure to free objects if there is no slave in a replication state
1818 * able to be feed with commands */
1819 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
6208b3a7 1820 listRewind(slaves);
1821 while((ln = listYield(slaves))) {
ed9b544e 1822 redisClient *slave = ln->value;
40d224a9 1823
1824 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 1825 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 1826
1827 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 1828 if (slave->slaveseldb != dictid) {
1829 robj *selectcmd;
1830
1831 switch(dictid) {
1832 case 0: selectcmd = shared.select0; break;
1833 case 1: selectcmd = shared.select1; break;
1834 case 2: selectcmd = shared.select2; break;
1835 case 3: selectcmd = shared.select3; break;
1836 case 4: selectcmd = shared.select4; break;
1837 case 5: selectcmd = shared.select5; break;
1838 case 6: selectcmd = shared.select6; break;
1839 case 7: selectcmd = shared.select7; break;
1840 case 8: selectcmd = shared.select8; break;
1841 case 9: selectcmd = shared.select9; break;
1842 default:
1843 selectcmd = createObject(REDIS_STRING,
1844 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1845 selectcmd->refcount = 0;
1846 break;
1847 }
1848 addReply(slave,selectcmd);
1849 slave->slaveseldb = dictid;
1850 }
1851 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 1852 }
40d224a9 1853 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 1854 if (outv != static_outv) zfree(outv);
ed9b544e 1855}
1856
638e42ac 1857static void processInputBuffer(redisClient *c) {
ed9b544e 1858again:
1859 if (c->bulklen == -1) {
1860 /* Read the first line of the query */
1861 char *p = strchr(c->querybuf,'\n');
1862 size_t querylen;
644fafa3 1863
ed9b544e 1864 if (p) {
1865 sds query, *argv;
1866 int argc, j;
1867
1868 query = c->querybuf;
1869 c->querybuf = sdsempty();
1870 querylen = 1+(p-(query));
1871 if (sdslen(query) > querylen) {
1872 /* leave data after the first line of the query in the buffer */
1873 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1874 }
1875 *p = '\0'; /* remove "\n" */
1876 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1877 sdsupdatelen(query);
1878
1879 /* Now we can split the query in arguments */
ed9b544e 1880 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 1881 sdsfree(query);
1882
1883 if (c->argv) zfree(c->argv);
1884 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 1885
1886 for (j = 0; j < argc; j++) {
ed9b544e 1887 if (sdslen(argv[j])) {
1888 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1889 c->argc++;
1890 } else {
1891 sdsfree(argv[j]);
1892 }
1893 }
1894 zfree(argv);
7c49733c 1895 if (c->argc) {
1896 /* Execute the command. If the client is still valid
1897 * after processCommand() return and there is something
1898 * on the query buffer try to process the next command. */
1899 if (processCommand(c) && sdslen(c->querybuf)) goto again;
1900 } else {
1901 /* Nothing to process, argc == 0. Just process the query
1902 * buffer if it's not empty or return to the caller */
1903 if (sdslen(c->querybuf)) goto again;
1904 }
ed9b544e 1905 return;
644fafa3 1906 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
ed9b544e 1907 redisLog(REDIS_DEBUG, "Client protocol error");
1908 freeClient(c);
1909 return;
1910 }
1911 } else {
1912 /* Bulk read handling. Note that if we are at this point
1913 the client already sent a command terminated with a newline,
1914 we are reading the bulk data that is actually the last
1915 argument of the command. */
1916 int qbl = sdslen(c->querybuf);
1917
1918 if (c->bulklen <= qbl) {
1919 /* Copy everything but the final CRLF as final argument */
1920 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1921 c->argc++;
1922 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 1923 /* Process the command. If the client is still valid after
1924 * the processing and there is more data in the buffer
1925 * try to parse it. */
1926 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1927 return;
1928 }
1929 }
1930}
1931
638e42ac 1932static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1933 redisClient *c = (redisClient*) privdata;
1934 char buf[REDIS_IOBUF_LEN];
1935 int nread;
1936 REDIS_NOTUSED(el);
1937 REDIS_NOTUSED(mask);
1938
1939 nread = read(fd, buf, REDIS_IOBUF_LEN);
1940 if (nread == -1) {
1941 if (errno == EAGAIN) {
1942 nread = 0;
1943 } else {
1944 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1945 freeClient(c);
1946 return;
1947 }
1948 } else if (nread == 0) {
1949 redisLog(REDIS_DEBUG, "Client closed connection");
1950 freeClient(c);
1951 return;
1952 }
1953 if (nread) {
1954 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1955 c->lastinteraction = time(NULL);
1956 } else {
1957 return;
1958 }
1959 processInputBuffer(c);
1960}
1961
ed9b544e 1962static int selectDb(redisClient *c, int id) {
1963 if (id < 0 || id >= server.dbnum)
1964 return REDIS_ERR;
3305306f 1965 c->db = &server.db[id];
ed9b544e 1966 return REDIS_OK;
1967}
1968
40d224a9 1969static void *dupClientReplyValue(void *o) {
1970 incrRefCount((robj*)o);
1971 return 0;
1972}
1973
ed9b544e 1974static redisClient *createClient(int fd) {
1975 redisClient *c = zmalloc(sizeof(*c));
1976
1977 anetNonBlock(NULL,fd);
1978 anetTcpNoDelay(NULL,fd);
1979 if (!c) return NULL;
1980 selectDb(c,0);
1981 c->fd = fd;
1982 c->querybuf = sdsempty();
1983 c->argc = 0;
93ea3759 1984 c->argv = NULL;
ed9b544e 1985 c->bulklen = -1;
e8a74421 1986 c->multibulk = 0;
1987 c->mbargc = 0;
1988 c->mbargv = NULL;
ed9b544e 1989 c->sentlen = 0;
1990 c->flags = 0;
1991 c->lastinteraction = time(NULL);
abcb223e 1992 c->authenticated = 0;
40d224a9 1993 c->replstate = REDIS_REPL_NONE;
6b47e12e 1994 c->reply = listCreate();
ed9b544e 1995 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 1996 listSetDupMethod(c->reply,dupClientReplyValue);
ed9b544e 1997 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
266373b2 1998 readQueryFromClient, c) == AE_ERR) {
ed9b544e 1999 freeClient(c);
2000 return NULL;
2001 }
6b47e12e 2002 listAddNodeTail(server.clients,c);
ed9b544e 2003 return c;
2004}
2005
2006static void addReply(redisClient *c, robj *obj) {
2007 if (listLength(c->reply) == 0 &&
6208b3a7 2008 (c->replstate == REDIS_REPL_NONE ||
2009 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 2010 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
266373b2 2011 sendReplyToClient, c) == AE_ERR) return;
9d65a1bb 2012 listAddNodeTail(c->reply,getDecodedObject(obj));
ed9b544e 2013}
2014
2015static void addReplySds(redisClient *c, sds s) {
2016 robj *o = createObject(REDIS_STRING,s);
2017 addReply(c,o);
2018 decrRefCount(o);
2019}
2020
e2665397 2021static void addReplyDouble(redisClient *c, double d) {
2022 char buf[128];
2023
2024 snprintf(buf,sizeof(buf),"%.17g",d);
682ac724 2025 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
83c6a618 2026 (unsigned long) strlen(buf),buf));
e2665397 2027}
2028
942a3961 2029static void addReplyBulkLen(redisClient *c, robj *obj) {
2030 size_t len;
2031
2032 if (obj->encoding == REDIS_ENCODING_RAW) {
2033 len = sdslen(obj->ptr);
2034 } else {
2035 long n = (long)obj->ptr;
2036
e054afda 2037 /* Compute how many bytes will take this integer as a radix 10 string */
942a3961 2038 len = 1;
2039 if (n < 0) {
2040 len++;
2041 n = -n;
2042 }
2043 while((n = n/10) != 0) {
2044 len++;
2045 }
2046 }
83c6a618 2047 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len));
942a3961 2048}
2049
ed9b544e 2050static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
2051 int cport, cfd;
2052 char cip[128];
285add55 2053 redisClient *c;
ed9b544e 2054 REDIS_NOTUSED(el);
2055 REDIS_NOTUSED(mask);
2056 REDIS_NOTUSED(privdata);
2057
2058 cfd = anetAccept(server.neterr, fd, cip, &cport);
2059 if (cfd == AE_ERR) {
2060 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
2061 return;
2062 }
2063 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
285add55 2064 if ((c = createClient(cfd)) == NULL) {
ed9b544e 2065 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
2066 close(cfd); /* May be already closed, just ingore errors */
2067 return;
2068 }
285add55 2069 /* If maxclient directive is set and this is one client more... close the
2070 * connection. Note that we create the client instead to check before
2071 * for this condition, since now the socket is already set in nonblocking
2072 * mode and we can send an error for free using the Kernel I/O */
2073 if (server.maxclients && listLength(server.clients) > server.maxclients) {
2074 char *err = "-ERR max number of clients reached\r\n";
2075
2076 /* That's a best effort error message, don't check write errors */
fee803ba 2077 if (write(c->fd,err,strlen(err)) == -1) {
2078 /* Nothing to do, Just to avoid the warning... */
2079 }
285add55 2080 freeClient(c);
2081 return;
2082 }
ed9b544e 2083 server.stat_numconnections++;
2084}
2085
2086/* ======================= Redis objects implementation ===================== */
2087
2088static robj *createObject(int type, void *ptr) {
2089 robj *o;
2090
2091 if (listLength(server.objfreelist)) {
2092 listNode *head = listFirst(server.objfreelist);
2093 o = listNodeValue(head);
2094 listDelNode(server.objfreelist,head);
2095 } else {
2096 o = zmalloc(sizeof(*o));
2097 }
ed9b544e 2098 o->type = type;
942a3961 2099 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 2100 o->ptr = ptr;
2101 o->refcount = 1;
2102 return o;
2103}
2104
2105static robj *createStringObject(char *ptr, size_t len) {
2106 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
2107}
2108
2109static robj *createListObject(void) {
2110 list *l = listCreate();
2111
ed9b544e 2112 listSetFreeMethod(l,decrRefCount);
2113 return createObject(REDIS_LIST,l);
2114}
2115
2116static robj *createSetObject(void) {
2117 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 2118 return createObject(REDIS_SET,d);
2119}
2120
1812e024 2121static robj *createZsetObject(void) {
6b47e12e 2122 zset *zs = zmalloc(sizeof(*zs));
2123
2124 zs->dict = dictCreate(&zsetDictType,NULL);
2125 zs->zsl = zslCreate();
2126 return createObject(REDIS_ZSET,zs);
1812e024 2127}
2128
ed9b544e 2129static void freeStringObject(robj *o) {
942a3961 2130 if (o->encoding == REDIS_ENCODING_RAW) {
2131 sdsfree(o->ptr);
2132 }
ed9b544e 2133}
2134
2135static void freeListObject(robj *o) {
2136 listRelease((list*) o->ptr);
2137}
2138
2139static void freeSetObject(robj *o) {
2140 dictRelease((dict*) o->ptr);
2141}
2142
fd8ccf44 2143static void freeZsetObject(robj *o) {
2144 zset *zs = o->ptr;
2145
2146 dictRelease(zs->dict);
2147 zslFree(zs->zsl);
2148 zfree(zs);
2149}
2150
ed9b544e 2151static void freeHashObject(robj *o) {
2152 dictRelease((dict*) o->ptr);
2153}
2154
2155static void incrRefCount(robj *o) {
2156 o->refcount++;
94754ccc 2157#ifdef DEBUG_REFCOUNT
2158 if (o->type == REDIS_STRING)
2159 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
2160#endif
ed9b544e 2161}
2162
2163static void decrRefCount(void *obj) {
2164 robj *o = obj;
94754ccc 2165
2166#ifdef DEBUG_REFCOUNT
2167 if (o->type == REDIS_STRING)
2168 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
2169#endif
ed9b544e 2170 if (--(o->refcount) == 0) {
2171 switch(o->type) {
2172 case REDIS_STRING: freeStringObject(o); break;
2173 case REDIS_LIST: freeListObject(o); break;
2174 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 2175 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 2176 case REDIS_HASH: freeHashObject(o); break;
dfc5e96c 2177 default: redisAssert(0 != 0); break;
ed9b544e 2178 }
2179 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2180 !listAddNodeHead(server.objfreelist,o))
2181 zfree(o);
2182 }
2183}
2184
942a3961 2185static robj *lookupKey(redisDb *db, robj *key) {
2186 dictEntry *de = dictFind(db->dict,key);
2187 return de ? dictGetEntryVal(de) : NULL;
2188}
2189
2190static robj *lookupKeyRead(redisDb *db, robj *key) {
2191 expireIfNeeded(db,key);
2192 return lookupKey(db,key);
2193}
2194
2195static robj *lookupKeyWrite(redisDb *db, robj *key) {
2196 deleteIfVolatile(db,key);
2197 return lookupKey(db,key);
2198}
2199
2200static int deleteKey(redisDb *db, robj *key) {
2201 int retval;
2202
2203 /* We need to protect key from destruction: after the first dictDelete()
2204 * it may happen that 'key' is no longer valid if we don't increment
2205 * it's count. This may happen when we get the object reference directly
2206 * from the hash table with dictRandomKey() or dict iterators */
2207 incrRefCount(key);
2208 if (dictSize(db->expires)) dictDelete(db->expires,key);
2209 retval = dictDelete(db->dict,key);
2210 decrRefCount(key);
2211
2212 return retval == DICT_OK;
2213}
2214
10c43610 2215/* Try to share an object against the shared objects pool */
2216static robj *tryObjectSharing(robj *o) {
2217 struct dictEntry *de;
2218 unsigned long c;
2219
3305306f 2220 if (o == NULL || server.shareobjects == 0) return o;
10c43610 2221
dfc5e96c 2222 redisAssert(o->type == REDIS_STRING);
10c43610 2223 de = dictFind(server.sharingpool,o);
2224 if (de) {
2225 robj *shared = dictGetEntryKey(de);
2226
2227 c = ((unsigned long) dictGetEntryVal(de))+1;
2228 dictGetEntryVal(de) = (void*) c;
2229 incrRefCount(shared);
2230 decrRefCount(o);
2231 return shared;
2232 } else {
2233 /* Here we are using a stream algorihtm: Every time an object is
2234 * shared we increment its count, everytime there is a miss we
2235 * recrement the counter of a random object. If this object reaches
2236 * zero we remove the object and put the current object instead. */
3305306f 2237 if (dictSize(server.sharingpool) >=
10c43610 2238 server.sharingpoolsize) {
2239 de = dictGetRandomKey(server.sharingpool);
dfc5e96c 2240 redisAssert(de != NULL);
10c43610 2241 c = ((unsigned long) dictGetEntryVal(de))-1;
2242 dictGetEntryVal(de) = (void*) c;
2243 if (c == 0) {
2244 dictDelete(server.sharingpool,de->key);
2245 }
2246 } else {
2247 c = 0; /* If the pool is empty we want to add this object */
2248 }
2249 if (c == 0) {
2250 int retval;
2251
2252 retval = dictAdd(server.sharingpool,o,(void*)1);
dfc5e96c 2253 redisAssert(retval == DICT_OK);
10c43610 2254 incrRefCount(o);
2255 }
2256 return o;
2257 }
2258}
2259
724a51b1 2260/* Check if the nul-terminated string 's' can be represented by a long
2261 * (that is, is a number that fits into long without any other space or
2262 * character before or after the digits).
2263 *
2264 * If so, the function returns REDIS_OK and *longval is set to the value
2265 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 2266static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 2267 char buf[32], *endptr;
2268 long value;
2269 int slen;
2270
2271 value = strtol(s, &endptr, 10);
2272 if (endptr[0] != '\0') return REDIS_ERR;
2273 slen = snprintf(buf,32,"%ld",value);
2274
2275 /* If the number converted back into a string is not identical
2276 * then it's not possible to encode the string as integer */
f69f2cba 2277 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 2278 if (longval) *longval = value;
2279 return REDIS_OK;
2280}
2281
942a3961 2282/* Try to encode a string object in order to save space */
2283static int tryObjectEncoding(robj *o) {
2284 long value;
942a3961 2285 sds s = o->ptr;
3305306f 2286
942a3961 2287 if (o->encoding != REDIS_ENCODING_RAW)
2288 return REDIS_ERR; /* Already encoded */
3305306f 2289
942a3961 2290 /* It's not save to encode shared objects: shared objects can be shared
2291 * everywhere in the "object space" of Redis. Encoded objects can only
2292 * appear as "values" (and not, for instance, as keys) */
2293 if (o->refcount > 1) return REDIS_ERR;
3305306f 2294
942a3961 2295 /* Currently we try to encode only strings */
dfc5e96c 2296 redisAssert(o->type == REDIS_STRING);
94754ccc 2297
724a51b1 2298 /* Check if we can represent this string as a long integer */
2299 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
942a3961 2300
2301 /* Ok, this object can be encoded */
2302 o->encoding = REDIS_ENCODING_INT;
2303 sdsfree(o->ptr);
2304 o->ptr = (void*) value;
2305 return REDIS_OK;
2306}
2307
9d65a1bb 2308/* Get a decoded version of an encoded object (returned as a new object).
2309 * If the object is already raw-encoded just increment the ref count. */
2310static robj *getDecodedObject(robj *o) {
942a3961 2311 robj *dec;
2312
9d65a1bb 2313 if (o->encoding == REDIS_ENCODING_RAW) {
2314 incrRefCount(o);
2315 return o;
2316 }
942a3961 2317 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2318 char buf[32];
2319
2320 snprintf(buf,32,"%ld",(long)o->ptr);
2321 dec = createStringObject(buf,strlen(buf));
2322 return dec;
2323 } else {
dfc5e96c 2324 redisAssert(1 != 1);
942a3961 2325 }
3305306f 2326}
2327
d7f43c08 2328/* Compare two string objects via strcmp() or alike.
2329 * Note that the objects may be integer-encoded. In such a case we
2330 * use snprintf() to get a string representation of the numbers on the stack
1fd9bc8a 2331 * and compare the strings, it's much faster than calling getDecodedObject().
2332 *
2333 * Important note: if objects are not integer encoded, but binary-safe strings,
2334 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2335 * binary safe. */
724a51b1 2336static int compareStringObjects(robj *a, robj *b) {
dfc5e96c 2337 redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 2338 char bufa[128], bufb[128], *astr, *bstr;
2339 int bothsds = 1;
724a51b1 2340
e197b441 2341 if (a == b) return 0;
d7f43c08 2342 if (a->encoding != REDIS_ENCODING_RAW) {
2343 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2344 astr = bufa;
2345 bothsds = 0;
724a51b1 2346 } else {
d7f43c08 2347 astr = a->ptr;
724a51b1 2348 }
d7f43c08 2349 if (b->encoding != REDIS_ENCODING_RAW) {
2350 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2351 bstr = bufb;
2352 bothsds = 0;
2353 } else {
2354 bstr = b->ptr;
2355 }
2356 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 2357}
2358
0ea663ea 2359static size_t stringObjectLen(robj *o) {
dfc5e96c 2360 redisAssert(o->type == REDIS_STRING);
0ea663ea 2361 if (o->encoding == REDIS_ENCODING_RAW) {
2362 return sdslen(o->ptr);
2363 } else {
2364 char buf[32];
2365
2366 return snprintf(buf,32,"%ld",(long)o->ptr);
2367 }
2368}
2369
ed9b544e 2370/*============================ DB saving/loading ============================ */
2371
f78fd11b 2372static int rdbSaveType(FILE *fp, unsigned char type) {
2373 if (fwrite(&type,1,1,fp) == 0) return -1;
2374 return 0;
2375}
2376
bb32ede5 2377static int rdbSaveTime(FILE *fp, time_t t) {
2378 int32_t t32 = (int32_t) t;
2379 if (fwrite(&t32,4,1,fp) == 0) return -1;
2380 return 0;
2381}
2382
e3566d4b 2383/* check rdbLoadLen() comments for more info */
f78fd11b 2384static int rdbSaveLen(FILE *fp, uint32_t len) {
2385 unsigned char buf[2];
2386
2387 if (len < (1<<6)) {
2388 /* Save a 6 bit len */
10c43610 2389 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 2390 if (fwrite(buf,1,1,fp) == 0) return -1;
2391 } else if (len < (1<<14)) {
2392 /* Save a 14 bit len */
10c43610 2393 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 2394 buf[1] = len&0xFF;
17be1a4a 2395 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 2396 } else {
2397 /* Save a 32 bit len */
10c43610 2398 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 2399 if (fwrite(buf,1,1,fp) == 0) return -1;
2400 len = htonl(len);
2401 if (fwrite(&len,4,1,fp) == 0) return -1;
2402 }
2403 return 0;
2404}
2405
e3566d4b 2406/* String objects in the form "2391" "-100" without any space and with a
2407 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2408 * encoded as integers to save space */
56906eef 2409static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
e3566d4b 2410 long long value;
2411 char *endptr, buf[32];
2412
2413 /* Check if it's possible to encode this value as a number */
2414 value = strtoll(s, &endptr, 10);
2415 if (endptr[0] != '\0') return 0;
2416 snprintf(buf,32,"%lld",value);
2417
2418 /* If the number converted back into a string is not identical
2419 * then it's not possible to encode the string as integer */
2420 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2421
2422 /* Finally check if it fits in our ranges */
2423 if (value >= -(1<<7) && value <= (1<<7)-1) {
2424 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2425 enc[1] = value&0xFF;
2426 return 2;
2427 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2428 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2429 enc[1] = value&0xFF;
2430 enc[2] = (value>>8)&0xFF;
2431 return 3;
2432 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2433 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2434 enc[1] = value&0xFF;
2435 enc[2] = (value>>8)&0xFF;
2436 enc[3] = (value>>16)&0xFF;
2437 enc[4] = (value>>24)&0xFF;
2438 return 5;
2439 } else {
2440 return 0;
2441 }
2442}
2443
774e3047 2444static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2445 unsigned int comprlen, outlen;
2446 unsigned char byte;
2447 void *out;
2448
2449 /* We require at least four bytes compression for this to be worth it */
2450 outlen = sdslen(obj->ptr)-4;
2451 if (outlen <= 0) return 0;
3a2694c4 2452 if ((out = zmalloc(outlen+1)) == NULL) return 0;
774e3047 2453 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2454 if (comprlen == 0) {
88e85998 2455 zfree(out);
774e3047 2456 return 0;
2457 }
2458 /* Data compressed! Let's save it on disk */
2459 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2460 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2461 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2462 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2463 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 2464 zfree(out);
774e3047 2465 return comprlen;
2466
2467writeerr:
88e85998 2468 zfree(out);
774e3047 2469 return -1;
2470}
2471
e3566d4b 2472/* Save a string objet as [len][data] on disk. If the object is a string
2473 * representation of an integer value we try to safe it in a special form */
942a3961 2474static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2475 size_t len;
e3566d4b 2476 int enclen;
10c43610 2477
942a3961 2478 len = sdslen(obj->ptr);
2479
774e3047 2480 /* Try integer encoding */
e3566d4b 2481 if (len <= 11) {
2482 unsigned char buf[5];
2483 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2484 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2485 return 0;
2486 }
2487 }
774e3047 2488
2489 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 2490 * aaaaaaaaaaaaaaaaaa so skip it */
942a3961 2491 if (len > 20) {
774e3047 2492 int retval;
2493
2494 retval = rdbSaveLzfStringObject(fp,obj);
2495 if (retval == -1) return -1;
2496 if (retval > 0) return 0;
2497 /* retval == 0 means data can't be compressed, save the old way */
2498 }
2499
2500 /* Store verbatim */
10c43610 2501 if (rdbSaveLen(fp,len) == -1) return -1;
2502 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2503 return 0;
2504}
2505
942a3961 2506/* Like rdbSaveStringObjectRaw() but handle encoded objects */
2507static int rdbSaveStringObject(FILE *fp, robj *obj) {
2508 int retval;
942a3961 2509
9d65a1bb 2510 obj = getDecodedObject(obj);
2511 retval = rdbSaveStringObjectRaw(fp,obj);
2512 decrRefCount(obj);
2513 return retval;
942a3961 2514}
2515
a7866db6 2516/* Save a double value. Doubles are saved as strings prefixed by an unsigned
2517 * 8 bit integer specifing the length of the representation.
2518 * This 8 bit integer has special values in order to specify the following
2519 * conditions:
2520 * 253: not a number
2521 * 254: + inf
2522 * 255: - inf
2523 */
2524static int rdbSaveDoubleValue(FILE *fp, double val) {
2525 unsigned char buf[128];
2526 int len;
2527
2528 if (isnan(val)) {
2529 buf[0] = 253;
2530 len = 1;
2531 } else if (!isfinite(val)) {
2532 len = 1;
2533 buf[0] = (val < 0) ? 255 : 254;
2534 } else {
eaa256ad 2535 snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
6c446631 2536 buf[0] = strlen((char*)buf+1);
a7866db6 2537 len = buf[0]+1;
2538 }
2539 if (fwrite(buf,len,1,fp) == 0) return -1;
2540 return 0;
2541}
2542
ed9b544e 2543/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 2544static int rdbSave(char *filename) {
ed9b544e 2545 dictIterator *di = NULL;
2546 dictEntry *de;
ed9b544e 2547 FILE *fp;
2548 char tmpfile[256];
2549 int j;
bb32ede5 2550 time_t now = time(NULL);
ed9b544e 2551
a3b21203 2552 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 2553 fp = fopen(tmpfile,"w");
2554 if (!fp) {
2555 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2556 return REDIS_ERR;
2557 }
f78fd11b 2558 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 2559 for (j = 0; j < server.dbnum; j++) {
bb32ede5 2560 redisDb *db = server.db+j;
2561 dict *d = db->dict;
3305306f 2562 if (dictSize(d) == 0) continue;
ed9b544e 2563 di = dictGetIterator(d);
2564 if (!di) {
2565 fclose(fp);
2566 return REDIS_ERR;
2567 }
2568
2569 /* Write the SELECT DB opcode */
f78fd11b 2570 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2571 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 2572
2573 /* Iterate this DB writing every entry */
2574 while((de = dictNext(di)) != NULL) {
2575 robj *key = dictGetEntryKey(de);
2576 robj *o = dictGetEntryVal(de);
bb32ede5 2577 time_t expiretime = getExpire(db,key);
2578
2579 /* Save the expire time */
2580 if (expiretime != -1) {
2581 /* If this key is already expired skip it */
2582 if (expiretime < now) continue;
2583 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2584 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2585 }
2586 /* Save the key and associated value */
f78fd11b 2587 if (rdbSaveType(fp,o->type) == -1) goto werr;
10c43610 2588 if (rdbSaveStringObject(fp,key) == -1) goto werr;
f78fd11b 2589 if (o->type == REDIS_STRING) {
ed9b544e 2590 /* Save a string value */
10c43610 2591 if (rdbSaveStringObject(fp,o) == -1) goto werr;
f78fd11b 2592 } else if (o->type == REDIS_LIST) {
ed9b544e 2593 /* Save a list value */
2594 list *list = o->ptr;
6208b3a7 2595 listNode *ln;
ed9b544e 2596
6208b3a7 2597 listRewind(list);
f78fd11b 2598 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
6208b3a7 2599 while((ln = listYield(list))) {
ed9b544e 2600 robj *eleobj = listNodeValue(ln);
f78fd11b 2601
10c43610 2602 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2603 }
f78fd11b 2604 } else if (o->type == REDIS_SET) {
ed9b544e 2605 /* Save a set value */
2606 dict *set = o->ptr;
2607 dictIterator *di = dictGetIterator(set);
2608 dictEntry *de;
2609
3305306f 2610 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
ed9b544e 2611 while((de = dictNext(di)) != NULL) {
10c43610 2612 robj *eleobj = dictGetEntryKey(de);
ed9b544e 2613
10c43610 2614 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2615 }
2616 dictReleaseIterator(di);
2b59cfdf 2617 } else if (o->type == REDIS_ZSET) {
2618 /* Save a set value */
2619 zset *zs = o->ptr;
2620 dictIterator *di = dictGetIterator(zs->dict);
2621 dictEntry *de;
2622
2623 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
2624 while((de = dictNext(di)) != NULL) {
2625 robj *eleobj = dictGetEntryKey(de);
2626 double *score = dictGetEntryVal(de);
2627
2628 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2629 if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
2630 }
2631 dictReleaseIterator(di);
ed9b544e 2632 } else {
dfc5e96c 2633 redisAssert(0 != 0);
ed9b544e 2634 }
2635 }
2636 dictReleaseIterator(di);
2637 }
2638 /* EOF opcode */
f78fd11b 2639 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2640
2641 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 2642 fflush(fp);
2643 fsync(fileno(fp));
2644 fclose(fp);
2645
2646 /* Use RENAME to make sure the DB file is changed atomically only
2647 * if the generate DB file is ok. */
2648 if (rename(tmpfile,filename) == -1) {
325d1eb4 2649 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 2650 unlink(tmpfile);
2651 return REDIS_ERR;
2652 }
2653 redisLog(REDIS_NOTICE,"DB saved on disk");
2654 server.dirty = 0;
2655 server.lastsave = time(NULL);
2656 return REDIS_OK;
2657
2658werr:
2659 fclose(fp);
2660 unlink(tmpfile);
2661 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2662 if (di) dictReleaseIterator(di);
2663 return REDIS_ERR;
2664}
2665
f78fd11b 2666static int rdbSaveBackground(char *filename) {
ed9b544e 2667 pid_t childpid;
2668
9d65a1bb 2669 if (server.bgsavechildpid != -1) return REDIS_ERR;
ed9b544e 2670 if ((childpid = fork()) == 0) {
2671 /* Child */
2672 close(server.fd);
f78fd11b 2673 if (rdbSave(filename) == REDIS_OK) {
ed9b544e 2674 exit(0);
2675 } else {
2676 exit(1);
2677 }
2678 } else {
2679 /* Parent */
5a7c647e 2680 if (childpid == -1) {
2681 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2682 strerror(errno));
2683 return REDIS_ERR;
2684 }
ed9b544e 2685 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
9f3c422c 2686 server.bgsavechildpid = childpid;
ed9b544e 2687 return REDIS_OK;
2688 }
2689 return REDIS_OK; /* unreached */
2690}
2691
a3b21203 2692static void rdbRemoveTempFile(pid_t childpid) {
2693 char tmpfile[256];
2694
2695 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2696 unlink(tmpfile);
2697}
2698
f78fd11b 2699static int rdbLoadType(FILE *fp) {
2700 unsigned char type;
7b45bfb2 2701 if (fread(&type,1,1,fp) == 0) return -1;
2702 return type;
2703}
2704
bb32ede5 2705static time_t rdbLoadTime(FILE *fp) {
2706 int32_t t32;
2707 if (fread(&t32,4,1,fp) == 0) return -1;
2708 return (time_t) t32;
2709}
2710
e3566d4b 2711/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
2712 * of this file for a description of how this are stored on disk.
2713 *
2714 * isencoded is set to 1 if the readed length is not actually a length but
2715 * an "encoding type", check the above comments for more info */
2716static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
f78fd11b 2717 unsigned char buf[2];
2718 uint32_t len;
2719
e3566d4b 2720 if (isencoded) *isencoded = 0;
f78fd11b 2721 if (rdbver == 0) {
2722 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2723 return ntohl(len);
2724 } else {
17be1a4a 2725 int type;
2726
f78fd11b 2727 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
17be1a4a 2728 type = (buf[0]&0xC0)>>6;
2729 if (type == REDIS_RDB_6BITLEN) {
f78fd11b 2730 /* Read a 6 bit len */
e3566d4b 2731 return buf[0]&0x3F;
2732 } else if (type == REDIS_RDB_ENCVAL) {
2733 /* Read a 6 bit len encoding type */
2734 if (isencoded) *isencoded = 1;
2735 return buf[0]&0x3F;
17be1a4a 2736 } else if (type == REDIS_RDB_14BITLEN) {
f78fd11b 2737 /* Read a 14 bit len */
2738 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
2739 return ((buf[0]&0x3F)<<8)|buf[1];
2740 } else {
2741 /* Read a 32 bit len */
2742 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2743 return ntohl(len);
2744 }
2745 }
f78fd11b 2746}
2747
e3566d4b 2748static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
2749 unsigned char enc[4];
2750 long long val;
2751
2752 if (enctype == REDIS_RDB_ENC_INT8) {
2753 if (fread(enc,1,1,fp) == 0) return NULL;
2754 val = (signed char)enc[0];
2755 } else if (enctype == REDIS_RDB_ENC_INT16) {
2756 uint16_t v;
2757 if (fread(enc,2,1,fp) == 0) return NULL;
2758 v = enc[0]|(enc[1]<<8);
2759 val = (int16_t)v;
2760 } else if (enctype == REDIS_RDB_ENC_INT32) {
2761 uint32_t v;
2762 if (fread(enc,4,1,fp) == 0) return NULL;
2763 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2764 val = (int32_t)v;
2765 } else {
2766 val = 0; /* anti-warning */
dfc5e96c 2767 redisAssert(0!=0);
e3566d4b 2768 }
2769 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2770}
2771
88e85998 2772static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2773 unsigned int len, clen;
2774 unsigned char *c = NULL;
2775 sds val = NULL;
2776
2777 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2778 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2779 if ((c = zmalloc(clen)) == NULL) goto err;
2780 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2781 if (fread(c,clen,1,fp) == 0) goto err;
2782 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 2783 zfree(c);
88e85998 2784 return createObject(REDIS_STRING,val);
2785err:
2786 zfree(c);
2787 sdsfree(val);
2788 return NULL;
2789}
2790
e3566d4b 2791static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2792 int isencoded;
2793 uint32_t len;
f78fd11b 2794 sds val;
2795
e3566d4b 2796 len = rdbLoadLen(fp,rdbver,&isencoded);
2797 if (isencoded) {
2798 switch(len) {
2799 case REDIS_RDB_ENC_INT8:
2800 case REDIS_RDB_ENC_INT16:
2801 case REDIS_RDB_ENC_INT32:
3305306f 2802 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
88e85998 2803 case REDIS_RDB_ENC_LZF:
2804 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
e3566d4b 2805 default:
dfc5e96c 2806 redisAssert(0!=0);
e3566d4b 2807 }
2808 }
2809
f78fd11b 2810 if (len == REDIS_RDB_LENERR) return NULL;
2811 val = sdsnewlen(NULL,len);
2812 if (len && fread(val,len,1,fp) == 0) {
2813 sdsfree(val);
2814 return NULL;
2815 }
10c43610 2816 return tryObjectSharing(createObject(REDIS_STRING,val));
f78fd11b 2817}
2818
a7866db6 2819/* For information about double serialization check rdbSaveDoubleValue() */
2820static int rdbLoadDoubleValue(FILE *fp, double *val) {
2821 char buf[128];
2822 unsigned char len;
2823
2824 if (fread(&len,1,1,fp) == 0) return -1;
2825 switch(len) {
2826 case 255: *val = R_NegInf; return 0;
2827 case 254: *val = R_PosInf; return 0;
2828 case 253: *val = R_Nan; return 0;
2829 default:
2830 if (fread(buf,len,1,fp) == 0) return -1;
231d758e 2831 buf[len] = '\0';
a7866db6 2832 sscanf(buf, "%lg", val);
2833 return 0;
2834 }
2835}
2836
f78fd11b 2837static int rdbLoad(char *filename) {
ed9b544e 2838 FILE *fp;
f78fd11b 2839 robj *keyobj = NULL;
2840 uint32_t dbid;
bb32ede5 2841 int type, retval, rdbver;
3305306f 2842 dict *d = server.db[0].dict;
bb32ede5 2843 redisDb *db = server.db+0;
f78fd11b 2844 char buf[1024];
bb32ede5 2845 time_t expiretime = -1, now = time(NULL);
2846
ed9b544e 2847 fp = fopen(filename,"r");
2848 if (!fp) return REDIS_ERR;
2849 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 2850 buf[9] = '\0';
2851 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 2852 fclose(fp);
2853 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2854 return REDIS_ERR;
2855 }
f78fd11b 2856 rdbver = atoi(buf+5);
2857 if (rdbver > 1) {
2858 fclose(fp);
2859 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2860 return REDIS_ERR;
2861 }
ed9b544e 2862 while(1) {
2863 robj *o;
2864
2865 /* Read type. */
f78fd11b 2866 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 2867 if (type == REDIS_EXPIRETIME) {
2868 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2869 /* We read the time so we need to read the object type again */
2870 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2871 }
ed9b544e 2872 if (type == REDIS_EOF) break;
2873 /* Handle SELECT DB opcode as a special case */
2874 if (type == REDIS_SELECTDB) {
e3566d4b 2875 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2876 goto eoferr;
ed9b544e 2877 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 2878 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 2879 exit(1);
2880 }
bb32ede5 2881 db = server.db+dbid;
2882 d = db->dict;
ed9b544e 2883 continue;
2884 }
2885 /* Read key */
f78fd11b 2886 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
ed9b544e 2887
2888 if (type == REDIS_STRING) {
2889 /* Read string value */
f78fd11b 2890 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2891 tryObjectEncoding(o);
ed9b544e 2892 } else if (type == REDIS_LIST || type == REDIS_SET) {
2893 /* Read list/set value */
2894 uint32_t listlen;
f78fd11b 2895
e3566d4b 2896 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
f78fd11b 2897 goto eoferr;
ed9b544e 2898 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2899 /* Load every single element of the list/set */
2900 while(listlen--) {
2901 robj *ele;
2902
f78fd11b 2903 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2904 tryObjectEncoding(ele);
ed9b544e 2905 if (type == REDIS_LIST) {
6b47e12e 2906 listAddNodeTail((list*)o->ptr,ele);
ed9b544e 2907 } else {
6b47e12e 2908 dictAdd((dict*)o->ptr,ele,NULL);
ed9b544e 2909 }
ed9b544e 2910 }
2b59cfdf 2911 } else if (type == REDIS_ZSET) {
2912 /* Read list/set value */
2913 uint32_t zsetlen;
2914 zset *zs;
2915
2916 if ((zsetlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2917 goto eoferr;
2918 o = createZsetObject();
2919 zs = o->ptr;
2920 /* Load every single element of the list/set */
2921 while(zsetlen--) {
2922 robj *ele;
2923 double *score = zmalloc(sizeof(double));
2924
2925 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2926 tryObjectEncoding(ele);
2927 if (rdbLoadDoubleValue(fp,score) == -1) goto eoferr;
2928 dictAdd(zs->dict,ele,score);
2929 zslInsert(zs->zsl,*score,ele);
2930 incrRefCount(ele); /* added to skiplist */
2931 }
ed9b544e 2932 } else {
dfc5e96c 2933 redisAssert(0 != 0);
ed9b544e 2934 }
2935 /* Add the new object in the hash table */
f78fd11b 2936 retval = dictAdd(d,keyobj,o);
ed9b544e 2937 if (retval == DICT_ERR) {
f78fd11b 2938 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
ed9b544e 2939 exit(1);
2940 }
bb32ede5 2941 /* Set the expire time if needed */
2942 if (expiretime != -1) {
2943 setExpire(db,keyobj,expiretime);
2944 /* Delete this key if already expired */
2945 if (expiretime < now) deleteKey(db,keyobj);
2946 expiretime = -1;
2947 }
f78fd11b 2948 keyobj = o = NULL;
ed9b544e 2949 }
2950 fclose(fp);
2951 return REDIS_OK;
2952
2953eoferr: /* unexpected end of file is handled here with a fatal exit */
e3566d4b 2954 if (keyobj) decrRefCount(keyobj);
f80dff62 2955 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
ed9b544e 2956 exit(1);
2957 return REDIS_ERR; /* Just to avoid warning */
2958}
2959
2960/*================================== Commands =============================== */
2961
abcb223e 2962static void authCommand(redisClient *c) {
2e77c2ee 2963 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
2964 c->authenticated = 1;
2965 addReply(c,shared.ok);
2966 } else {
2967 c->authenticated = 0;
fa4c0aba 2968 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
2969 }
2970}
2971
ed9b544e 2972static void pingCommand(redisClient *c) {
2973 addReply(c,shared.pong);
2974}
2975
2976static void echoCommand(redisClient *c) {
942a3961 2977 addReplyBulkLen(c,c->argv[1]);
ed9b544e 2978 addReply(c,c->argv[1]);
2979 addReply(c,shared.crlf);
2980}
2981
2982/*=================================== Strings =============================== */
2983
2984static void setGenericCommand(redisClient *c, int nx) {
2985 int retval;
2986
333fd216 2987 if (nx) deleteIfVolatile(c->db,c->argv[1]);
3305306f 2988 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2989 if (retval == DICT_ERR) {
2990 if (!nx) {
3305306f 2991 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2992 incrRefCount(c->argv[2]);
2993 } else {
c937aa89 2994 addReply(c,shared.czero);
ed9b544e 2995 return;
2996 }
2997 } else {
2998 incrRefCount(c->argv[1]);
2999 incrRefCount(c->argv[2]);
3000 }
3001 server.dirty++;
3305306f 3002 removeExpire(c->db,c->argv[1]);
c937aa89 3003 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 3004}
3005
3006static void setCommand(redisClient *c) {
a4d1ba9a 3007 setGenericCommand(c,0);
ed9b544e 3008}
3009
3010static void setnxCommand(redisClient *c) {
a4d1ba9a 3011 setGenericCommand(c,1);
ed9b544e 3012}
3013
3014static void getCommand(redisClient *c) {
3305306f 3015 robj *o = lookupKeyRead(c->db,c->argv[1]);
3016
3017 if (o == NULL) {
c937aa89 3018 addReply(c,shared.nullbulk);
ed9b544e 3019 } else {
ed9b544e 3020 if (o->type != REDIS_STRING) {
c937aa89 3021 addReply(c,shared.wrongtypeerr);
ed9b544e 3022 } else {
942a3961 3023 addReplyBulkLen(c,o);
ed9b544e 3024 addReply(c,o);
3025 addReply(c,shared.crlf);
3026 }
3027 }
3028}
3029
f6b141c5 3030static void getsetCommand(redisClient *c) {
a431eb74 3031 getCommand(c);
3032 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
3033 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
3034 } else {
3035 incrRefCount(c->argv[1]);
3036 }
3037 incrRefCount(c->argv[2]);
3038 server.dirty++;
3039 removeExpire(c->db,c->argv[1]);
3040}
3041
70003d28 3042static void mgetCommand(redisClient *c) {
70003d28 3043 int j;
3044
c937aa89 3045 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 3046 for (j = 1; j < c->argc; j++) {
3305306f 3047 robj *o = lookupKeyRead(c->db,c->argv[j]);
3048 if (o == NULL) {
c937aa89 3049 addReply(c,shared.nullbulk);
70003d28 3050 } else {
70003d28 3051 if (o->type != REDIS_STRING) {
c937aa89 3052 addReply(c,shared.nullbulk);
70003d28 3053 } else {
942a3961 3054 addReplyBulkLen(c,o);
70003d28 3055 addReply(c,o);
3056 addReply(c,shared.crlf);
3057 }
3058 }
3059 }
3060}
3061
6c446631 3062static void msetGenericCommand(redisClient *c, int nx) {
906573e7 3063 int j, busykeys = 0;
6c446631 3064
3065 if ((c->argc % 2) == 0) {
454d4e43 3066 addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
6c446631 3067 return;
3068 }
3069 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3070 * set nothing at all if at least one already key exists. */
3071 if (nx) {
3072 for (j = 1; j < c->argc; j += 2) {
906573e7 3073 if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
3074 busykeys++;
6c446631 3075 }
3076 }
3077 }
906573e7 3078 if (busykeys) {
3079 addReply(c, shared.czero);
3080 return;
3081 }
6c446631 3082
3083 for (j = 1; j < c->argc; j += 2) {
3084 int retval;
3085
17511391 3086 tryObjectEncoding(c->argv[j+1]);
6c446631 3087 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
3088 if (retval == DICT_ERR) {
3089 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
3090 incrRefCount(c->argv[j+1]);
3091 } else {
3092 incrRefCount(c->argv[j]);
3093 incrRefCount(c->argv[j+1]);
3094 }
3095 removeExpire(c->db,c->argv[j]);
3096 }
3097 server.dirty += (c->argc-1)/2;
3098 addReply(c, nx ? shared.cone : shared.ok);
3099}
3100
3101static void msetCommand(redisClient *c) {
3102 msetGenericCommand(c,0);
3103}
3104
3105static void msetnxCommand(redisClient *c) {
3106 msetGenericCommand(c,1);
3107}
3108
d68ed120 3109static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 3110 long long value;
3111 int retval;
3112 robj *o;
3113
3305306f 3114 o = lookupKeyWrite(c->db,c->argv[1]);
3115 if (o == NULL) {
ed9b544e 3116 value = 0;
3117 } else {
ed9b544e 3118 if (o->type != REDIS_STRING) {
3119 value = 0;
3120 } else {
3121 char *eptr;
3122
942a3961 3123 if (o->encoding == REDIS_ENCODING_RAW)
3124 value = strtoll(o->ptr, &eptr, 10);
3125 else if (o->encoding == REDIS_ENCODING_INT)
3126 value = (long)o->ptr;
3127 else
dfc5e96c 3128 redisAssert(1 != 1);
ed9b544e 3129 }
3130 }
3131
3132 value += incr;
3133 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
942a3961 3134 tryObjectEncoding(o);
3305306f 3135 retval = dictAdd(c->db->dict,c->argv[1],o);
ed9b544e 3136 if (retval == DICT_ERR) {
3305306f 3137 dictReplace(c->db->dict,c->argv[1],o);
3138 removeExpire(c->db,c->argv[1]);
ed9b544e 3139 } else {
3140 incrRefCount(c->argv[1]);
3141 }
3142 server.dirty++;
c937aa89 3143 addReply(c,shared.colon);
ed9b544e 3144 addReply(c,o);
3145 addReply(c,shared.crlf);
3146}
3147
3148static void incrCommand(redisClient *c) {
a4d1ba9a 3149 incrDecrCommand(c,1);
ed9b544e 3150}
3151
3152static void decrCommand(redisClient *c) {
a4d1ba9a 3153 incrDecrCommand(c,-1);
ed9b544e 3154}
3155
3156static void incrbyCommand(redisClient *c) {
d68ed120 3157 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3158 incrDecrCommand(c,incr);
ed9b544e 3159}
3160
3161static void decrbyCommand(redisClient *c) {
d68ed120 3162 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3163 incrDecrCommand(c,-incr);
ed9b544e 3164}
3165
3166/* ========================= Type agnostic commands ========================= */
3167
3168static void delCommand(redisClient *c) {
5109cdff 3169 int deleted = 0, j;
3170
3171 for (j = 1; j < c->argc; j++) {
3172 if (deleteKey(c->db,c->argv[j])) {
3173 server.dirty++;
3174 deleted++;
3175 }
3176 }
3177 switch(deleted) {
3178 case 0:
c937aa89 3179 addReply(c,shared.czero);
5109cdff 3180 break;
3181 case 1:
3182 addReply(c,shared.cone);
3183 break;
3184 default:
3185 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
3186 break;
ed9b544e 3187 }
3188}
3189
3190static void existsCommand(redisClient *c) {
3305306f 3191 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
ed9b544e 3192}
3193
3194static void selectCommand(redisClient *c) {
3195 int id = atoi(c->argv[1]->ptr);
3196
3197 if (selectDb(c,id) == REDIS_ERR) {
774e3047 3198 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 3199 } else {
3200 addReply(c,shared.ok);
3201 }
3202}
3203
3204static void randomkeyCommand(redisClient *c) {
3205 dictEntry *de;
3305306f 3206
3207 while(1) {
3208 de = dictGetRandomKey(c->db->dict);
ce7bef07 3209 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3305306f 3210 }
ed9b544e 3211 if (de == NULL) {
ce7bef07 3212 addReply(c,shared.plus);
ed9b544e 3213 addReply(c,shared.crlf);
3214 } else {
c937aa89 3215 addReply(c,shared.plus);
ed9b544e 3216 addReply(c,dictGetEntryKey(de));
3217 addReply(c,shared.crlf);
3218 }
3219}
3220
3221static void keysCommand(redisClient *c) {
3222 dictIterator *di;
3223 dictEntry *de;
3224 sds pattern = c->argv[1]->ptr;
3225 int plen = sdslen(pattern);
682ac724 3226 unsigned long numkeys = 0, keyslen = 0;
ed9b544e 3227 robj *lenobj = createObject(REDIS_STRING,NULL);
3228
3305306f 3229 di = dictGetIterator(c->db->dict);
ed9b544e 3230 addReply(c,lenobj);
3231 decrRefCount(lenobj);
3232 while((de = dictNext(di)) != NULL) {
3233 robj *keyobj = dictGetEntryKey(de);
3305306f 3234
ed9b544e 3235 sds key = keyobj->ptr;
3236 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3237 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3305306f 3238 if (expireIfNeeded(c->db,keyobj) == 0) {
3239 if (numkeys != 0)
3240 addReply(c,shared.space);
3241 addReply(c,keyobj);
3242 numkeys++;
3243 keyslen += sdslen(key);
3244 }
ed9b544e 3245 }
3246 }
3247 dictReleaseIterator(di);
c937aa89 3248 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
ed9b544e 3249 addReply(c,shared.crlf);
3250}
3251
3252static void dbsizeCommand(redisClient *c) {
3253 addReplySds(c,
3305306f 3254 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 3255}
3256
3257static void lastsaveCommand(redisClient *c) {
3258 addReplySds(c,
c937aa89 3259 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 3260}
3261
3262static void typeCommand(redisClient *c) {
3305306f 3263 robj *o;
ed9b544e 3264 char *type;
3305306f 3265
3266 o = lookupKeyRead(c->db,c->argv[1]);
3267 if (o == NULL) {
c937aa89 3268 type = "+none";
ed9b544e 3269 } else {
ed9b544e 3270 switch(o->type) {
c937aa89 3271 case REDIS_STRING: type = "+string"; break;
3272 case REDIS_LIST: type = "+list"; break;
3273 case REDIS_SET: type = "+set"; break;
412a8bce 3274 case REDIS_ZSET: type = "+zset"; break;
ed9b544e 3275 default: type = "unknown"; break;
3276 }
3277 }
3278 addReplySds(c,sdsnew(type));
3279 addReply(c,shared.crlf);
3280}
3281
3282static void saveCommand(redisClient *c) {
9d65a1bb 3283 if (server.bgsavechildpid != -1) {
05557f6d 3284 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3285 return;
3286 }
f78fd11b 3287 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 3288 addReply(c,shared.ok);
3289 } else {
3290 addReply(c,shared.err);
3291 }
3292}
3293
3294static void bgsaveCommand(redisClient *c) {
9d65a1bb 3295 if (server.bgsavechildpid != -1) {
ed9b544e 3296 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3297 return;
3298 }
f78fd11b 3299 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
49b99ab4 3300 char *status = "+Background saving started\r\n";
3301 addReplySds(c,sdsnew(status));
ed9b544e 3302 } else {
3303 addReply(c,shared.err);
3304 }
3305}
3306
3307static void shutdownCommand(redisClient *c) {
3308 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
a3b21203 3309 /* Kill the saving child if there is a background saving in progress.
3310 We want to avoid race conditions, for instance our saving child may
3311 overwrite the synchronous saving did by SHUTDOWN. */
9d65a1bb 3312 if (server.bgsavechildpid != -1) {
9f3c422c 3313 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3314 kill(server.bgsavechildpid,SIGKILL);
a3b21203 3315 rdbRemoveTempFile(server.bgsavechildpid);
9f3c422c 3316 }
a3b21203 3317 /* SYNC SAVE */
f78fd11b 3318 if (rdbSave(server.dbfilename) == REDIS_OK) {
9f3c422c 3319 if (server.daemonize)
b284af55 3320 unlink(server.pidfile);
b284af55 3321 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
ed9b544e 3322 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3323 exit(1);
3324 } else {
a3b21203 3325 /* Ooops.. error saving! The best we can do is to continue operating.
3326 * Note that if there was a background saving process, in the next
3327 * cron() Redis will be notified that the background saving aborted,
3328 * handling special stuff like slaves pending for synchronization... */
ed9b544e 3329 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3330 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3331 }
3332}
3333
3334static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 3335 robj *o;
3336
3337 /* To use the same key as src and dst is probably an error */
3338 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 3339 addReply(c,shared.sameobjecterr);
ed9b544e 3340 return;
3341 }
3342
3305306f 3343 o = lookupKeyWrite(c->db,c->argv[1]);
3344 if (o == NULL) {
c937aa89 3345 addReply(c,shared.nokeyerr);
ed9b544e 3346 return;
3347 }
ed9b544e 3348 incrRefCount(o);
3305306f 3349 deleteIfVolatile(c->db,c->argv[2]);
3350 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
ed9b544e 3351 if (nx) {
3352 decrRefCount(o);
c937aa89 3353 addReply(c,shared.czero);
ed9b544e 3354 return;
3355 }
3305306f 3356 dictReplace(c->db->dict,c->argv[2],o);
ed9b544e 3357 } else {
3358 incrRefCount(c->argv[2]);
3359 }
3305306f 3360 deleteKey(c->db,c->argv[1]);
ed9b544e 3361 server.dirty++;
c937aa89 3362 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 3363}
3364
3365static void renameCommand(redisClient *c) {
3366 renameGenericCommand(c,0);
3367}
3368
3369static void renamenxCommand(redisClient *c) {
3370 renameGenericCommand(c,1);
3371}
3372
3373static void moveCommand(redisClient *c) {
3305306f 3374 robj *o;
3375 redisDb *src, *dst;
ed9b544e 3376 int srcid;
3377
3378 /* Obtain source and target DB pointers */
3305306f 3379 src = c->db;
3380 srcid = c->db->id;
ed9b544e 3381 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 3382 addReply(c,shared.outofrangeerr);
ed9b544e 3383 return;
3384 }
3305306f 3385 dst = c->db;
3386 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 3387
3388 /* If the user is moving using as target the same
3389 * DB as the source DB it is probably an error. */
3390 if (src == dst) {
c937aa89 3391 addReply(c,shared.sameobjecterr);
ed9b544e 3392 return;
3393 }
3394
3395 /* Check if the element exists and get a reference */
3305306f 3396 o = lookupKeyWrite(c->db,c->argv[1]);
3397 if (!o) {
c937aa89 3398 addReply(c,shared.czero);
ed9b544e 3399 return;
3400 }
3401
3402 /* Try to add the element to the target DB */
3305306f 3403 deleteIfVolatile(dst,c->argv[1]);
3404 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
c937aa89 3405 addReply(c,shared.czero);
ed9b544e 3406 return;
3407 }
3305306f 3408 incrRefCount(c->argv[1]);
ed9b544e 3409 incrRefCount(o);
3410
3411 /* OK! key moved, free the entry in the source DB */
3305306f 3412 deleteKey(src,c->argv[1]);
ed9b544e 3413 server.dirty++;
c937aa89 3414 addReply(c,shared.cone);
ed9b544e 3415}
3416
3417/* =================================== Lists ================================ */
3418static void pushGenericCommand(redisClient *c, int where) {
3419 robj *lobj;
ed9b544e 3420 list *list;
3305306f 3421
3422 lobj = lookupKeyWrite(c->db,c->argv[1]);
3423 if (lobj == NULL) {
ed9b544e 3424 lobj = createListObject();
3425 list = lobj->ptr;
3426 if (where == REDIS_HEAD) {
6b47e12e 3427 listAddNodeHead(list,c->argv[2]);
ed9b544e 3428 } else {
6b47e12e 3429 listAddNodeTail(list,c->argv[2]);
ed9b544e 3430 }
3305306f 3431 dictAdd(c->db->dict,c->argv[1],lobj);
ed9b544e 3432 incrRefCount(c->argv[1]);
3433 incrRefCount(c->argv[2]);
3434 } else {
ed9b544e 3435 if (lobj->type != REDIS_LIST) {
3436 addReply(c,shared.wrongtypeerr);
3437 return;
3438 }
3439 list = lobj->ptr;
3440 if (where == REDIS_HEAD) {
6b47e12e 3441 listAddNodeHead(list,c->argv[2]);
ed9b544e 3442 } else {
6b47e12e 3443 listAddNodeTail(list,c->argv[2]);
ed9b544e 3444 }
3445 incrRefCount(c->argv[2]);
3446 }
3447 server.dirty++;
3448 addReply(c,shared.ok);
3449}
3450
3451static void lpushCommand(redisClient *c) {
3452 pushGenericCommand(c,REDIS_HEAD);
3453}
3454
3455static void rpushCommand(redisClient *c) {
3456 pushGenericCommand(c,REDIS_TAIL);
3457}
3458
3459static void llenCommand(redisClient *c) {
3305306f 3460 robj *o;
ed9b544e 3461 list *l;
3462
3305306f 3463 o = lookupKeyRead(c->db,c->argv[1]);
3464 if (o == NULL) {
c937aa89 3465 addReply(c,shared.czero);
ed9b544e 3466 return;
3467 } else {
ed9b544e 3468 if (o->type != REDIS_LIST) {
c937aa89 3469 addReply(c,shared.wrongtypeerr);
ed9b544e 3470 } else {
3471 l = o->ptr;
c937aa89 3472 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
ed9b544e 3473 }
3474 }
3475}
3476
3477static void lindexCommand(redisClient *c) {
3305306f 3478 robj *o;
ed9b544e 3479 int index = atoi(c->argv[2]->ptr);
3480
3305306f 3481 o = lookupKeyRead(c->db,c->argv[1]);
3482 if (o == NULL) {
c937aa89 3483 addReply(c,shared.nullbulk);
ed9b544e 3484 } else {
ed9b544e 3485 if (o->type != REDIS_LIST) {
c937aa89 3486 addReply(c,shared.wrongtypeerr);
ed9b544e 3487 } else {
3488 list *list = o->ptr;
3489 listNode *ln;
3490
3491 ln = listIndex(list, index);
3492 if (ln == NULL) {
c937aa89 3493 addReply(c,shared.nullbulk);
ed9b544e 3494 } else {
3495 robj *ele = listNodeValue(ln);
942a3961 3496 addReplyBulkLen(c,ele);
ed9b544e 3497 addReply(c,ele);
3498 addReply(c,shared.crlf);
3499 }
3500 }
3501 }
3502}
3503
3504static void lsetCommand(redisClient *c) {
3305306f 3505 robj *o;
ed9b544e 3506 int index = atoi(c->argv[2]->ptr);
3507
3305306f 3508 o = lookupKeyWrite(c->db,c->argv[1]);
3509 if (o == NULL) {
ed9b544e 3510 addReply(c,shared.nokeyerr);
3511 } else {
ed9b544e 3512 if (o->type != REDIS_LIST) {
3513 addReply(c,shared.wrongtypeerr);
3514 } else {
3515 list *list = o->ptr;
3516 listNode *ln;
3517
3518 ln = listIndex(list, index);
3519 if (ln == NULL) {
c937aa89 3520 addReply(c,shared.outofrangeerr);
ed9b544e 3521 } else {
3522 robj *ele = listNodeValue(ln);
3523
3524 decrRefCount(ele);
3525 listNodeValue(ln) = c->argv[3];
3526 incrRefCount(c->argv[3]);
3527 addReply(c,shared.ok);
3528 server.dirty++;
3529 }
3530 }
3531 }
3532}
3533
3534static void popGenericCommand(redisClient *c, int where) {
3305306f 3535 robj *o;
3536
3537 o = lookupKeyWrite(c->db,c->argv[1]);
3538 if (o == NULL) {
c937aa89 3539 addReply(c,shared.nullbulk);
ed9b544e 3540 } else {
ed9b544e 3541 if (o->type != REDIS_LIST) {
c937aa89 3542 addReply(c,shared.wrongtypeerr);
ed9b544e 3543 } else {
3544 list *list = o->ptr;
3545 listNode *ln;
3546
3547 if (where == REDIS_HEAD)
3548 ln = listFirst(list);
3549 else
3550 ln = listLast(list);
3551
3552 if (ln == NULL) {
c937aa89 3553 addReply(c,shared.nullbulk);
ed9b544e 3554 } else {
3555 robj *ele = listNodeValue(ln);
942a3961 3556 addReplyBulkLen(c,ele);
ed9b544e 3557 addReply(c,ele);
3558 addReply(c,shared.crlf);
3559 listDelNode(list,ln);
3560 server.dirty++;
3561 }
3562 }
3563 }
3564}
3565
3566static void lpopCommand(redisClient *c) {
3567 popGenericCommand(c,REDIS_HEAD);
3568}
3569
3570static void rpopCommand(redisClient *c) {
3571 popGenericCommand(c,REDIS_TAIL);
3572}
3573
3574static void lrangeCommand(redisClient *c) {
3305306f 3575 robj *o;
ed9b544e 3576 int start = atoi(c->argv[2]->ptr);
3577 int end = atoi(c->argv[3]->ptr);
3305306f 3578
3579 o = lookupKeyRead(c->db,c->argv[1]);
3580 if (o == NULL) {
c937aa89 3581 addReply(c,shared.nullmultibulk);
ed9b544e 3582 } else {
ed9b544e 3583 if (o->type != REDIS_LIST) {
c937aa89 3584 addReply(c,shared.wrongtypeerr);
ed9b544e 3585 } else {
3586 list *list = o->ptr;
3587 listNode *ln;
3588 int llen = listLength(list);
3589 int rangelen, j;
3590 robj *ele;
3591
3592 /* convert negative indexes */
3593 if (start < 0) start = llen+start;
3594 if (end < 0) end = llen+end;
3595 if (start < 0) start = 0;
3596 if (end < 0) end = 0;
3597
3598 /* indexes sanity checks */
3599 if (start > end || start >= llen) {
3600 /* Out of range start or start > end result in empty list */
c937aa89 3601 addReply(c,shared.emptymultibulk);
ed9b544e 3602 return;
3603 }
3604 if (end >= llen) end = llen-1;
3605 rangelen = (end-start)+1;
3606
3607 /* Return the result in form of a multi-bulk reply */
3608 ln = listIndex(list, start);
c937aa89 3609 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
ed9b544e 3610 for (j = 0; j < rangelen; j++) {
3611 ele = listNodeValue(ln);
942a3961 3612 addReplyBulkLen(c,ele);
ed9b544e 3613 addReply(c,ele);
3614 addReply(c,shared.crlf);
3615 ln = ln->next;
3616 }
3617 }
3618 }
3619}
3620
3621static void ltrimCommand(redisClient *c) {
3305306f 3622 robj *o;
ed9b544e 3623 int start = atoi(c->argv[2]->ptr);
3624 int end = atoi(c->argv[3]->ptr);
3625
3305306f 3626 o = lookupKeyWrite(c->db,c->argv[1]);
3627 if (o == NULL) {
ed9b544e 3628 addReply(c,shared.nokeyerr);
3629 } else {
ed9b544e 3630 if (o->type != REDIS_LIST) {
3631 addReply(c,shared.wrongtypeerr);
3632 } else {
3633 list *list = o->ptr;
3634 listNode *ln;
3635 int llen = listLength(list);
3636 int j, ltrim, rtrim;
3637
3638 /* convert negative indexes */
3639 if (start < 0) start = llen+start;
3640 if (end < 0) end = llen+end;
3641 if (start < 0) start = 0;
3642 if (end < 0) end = 0;
3643
3644 /* indexes sanity checks */
3645 if (start > end || start >= llen) {
3646 /* Out of range start or start > end result in empty list */
3647 ltrim = llen;
3648 rtrim = 0;
3649 } else {
3650 if (end >= llen) end = llen-1;
3651 ltrim = start;
3652 rtrim = llen-end-1;
3653 }
3654
3655 /* Remove list elements to perform the trim */
3656 for (j = 0; j < ltrim; j++) {
3657 ln = listFirst(list);
3658 listDelNode(list,ln);
3659 }
3660 for (j = 0; j < rtrim; j++) {
3661 ln = listLast(list);
3662 listDelNode(list,ln);
3663 }
ed9b544e 3664 server.dirty++;
e59229a2 3665 addReply(c,shared.ok);
ed9b544e 3666 }
3667 }
3668}
3669
3670static void lremCommand(redisClient *c) {
3305306f 3671 robj *o;
ed9b544e 3672
3305306f 3673 o = lookupKeyWrite(c->db,c->argv[1]);
3674 if (o == NULL) {
33c08b39 3675 addReply(c,shared.czero);
ed9b544e 3676 } else {
ed9b544e 3677 if (o->type != REDIS_LIST) {
c937aa89 3678 addReply(c,shared.wrongtypeerr);
ed9b544e 3679 } else {
3680 list *list = o->ptr;
3681 listNode *ln, *next;
3682 int toremove = atoi(c->argv[2]->ptr);
3683 int removed = 0;
3684 int fromtail = 0;
3685
3686 if (toremove < 0) {
3687 toremove = -toremove;
3688 fromtail = 1;
3689 }
3690 ln = fromtail ? list->tail : list->head;
3691 while (ln) {
ed9b544e 3692 robj *ele = listNodeValue(ln);
a4d1ba9a 3693
3694 next = fromtail ? ln->prev : ln->next;
724a51b1 3695 if (compareStringObjects(ele,c->argv[3]) == 0) {
ed9b544e 3696 listDelNode(list,ln);
3697 server.dirty++;
3698 removed++;
3699 if (toremove && removed == toremove) break;
3700 }
3701 ln = next;
3702 }
c937aa89 3703 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 3704 }
3705 }
3706}
3707
12f9d551 3708/* This is the semantic of this command:
0f5f7e9a 3709 * RPOPLPUSH srclist dstlist:
12f9d551 3710 * IF LLEN(srclist) > 0
3711 * element = RPOP srclist
3712 * LPUSH dstlist element
3713 * RETURN element
3714 * ELSE
3715 * RETURN nil
3716 * END
3717 * END
3718 *
3719 * The idea is to be able to get an element from a list in a reliable way
3720 * since the element is not just returned but pushed against another list
3721 * as well. This command was originally proposed by Ezra Zygmuntowicz.
3722 */
0f5f7e9a 3723static void rpoplpushcommand(redisClient *c) {
12f9d551 3724 robj *sobj;
3725
3726 sobj = lookupKeyWrite(c->db,c->argv[1]);
3727 if (sobj == NULL) {
3728 addReply(c,shared.nullbulk);
3729 } else {
3730 if (sobj->type != REDIS_LIST) {
3731 addReply(c,shared.wrongtypeerr);
3732 } else {
3733 list *srclist = sobj->ptr;
3734 listNode *ln = listLast(srclist);
3735
3736 if (ln == NULL) {
3737 addReply(c,shared.nullbulk);
3738 } else {
3739 robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
3740 robj *ele = listNodeValue(ln);
3741 list *dstlist;
3742
3743 if (dobj == NULL) {
3744
3745 /* Create the list if the key does not exist */
3746 dobj = createListObject();
3747 dictAdd(c->db->dict,c->argv[2],dobj);
3748 incrRefCount(c->argv[2]);
3749 } else if (dobj->type != REDIS_LIST) {
3750 addReply(c,shared.wrongtypeerr);
3751 return;
3752 }
3753 /* Add the element to the target list */
3754 dstlist = dobj->ptr;
3755 listAddNodeHead(dstlist,ele);
3756 incrRefCount(ele);
3757
3758 /* Send the element to the client as reply as well */
3759 addReplyBulkLen(c,ele);
3760 addReply(c,ele);
3761 addReply(c,shared.crlf);
3762
3763 /* Finally remove the element from the source list */
3764 listDelNode(srclist,ln);
3765 server.dirty++;
3766 }
3767 }
3768 }
3769}
3770
3771
ed9b544e 3772/* ==================================== Sets ================================ */
3773
3774static void saddCommand(redisClient *c) {
ed9b544e 3775 robj *set;
3776
3305306f 3777 set = lookupKeyWrite(c->db,c->argv[1]);
3778 if (set == NULL) {
ed9b544e 3779 set = createSetObject();
3305306f 3780 dictAdd(c->db->dict,c->argv[1],set);
ed9b544e 3781 incrRefCount(c->argv[1]);
3782 } else {
ed9b544e 3783 if (set->type != REDIS_SET) {
c937aa89 3784 addReply(c,shared.wrongtypeerr);
ed9b544e 3785 return;
3786 }
3787 }
3788 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
3789 incrRefCount(c->argv[2]);
3790 server.dirty++;
c937aa89 3791 addReply(c,shared.cone);
ed9b544e 3792 } else {
c937aa89 3793 addReply(c,shared.czero);
ed9b544e 3794 }
3795}
3796
3797static void sremCommand(redisClient *c) {
3305306f 3798 robj *set;
ed9b544e 3799
3305306f 3800 set = lookupKeyWrite(c->db,c->argv[1]);
3801 if (set == NULL) {
c937aa89 3802 addReply(c,shared.czero);
ed9b544e 3803 } else {
ed9b544e 3804 if (set->type != REDIS_SET) {
c937aa89 3805 addReply(c,shared.wrongtypeerr);
ed9b544e 3806 return;
3807 }
3808 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
3809 server.dirty++;
12fea928 3810 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
c937aa89 3811 addReply(c,shared.cone);
ed9b544e 3812 } else {
c937aa89 3813 addReply(c,shared.czero);
ed9b544e 3814 }
3815 }
3816}
3817
a4460ef4 3818static void smoveCommand(redisClient *c) {
3819 robj *srcset, *dstset;
3820
3821 srcset = lookupKeyWrite(c->db,c->argv[1]);
3822 dstset = lookupKeyWrite(c->db,c->argv[2]);
3823
3824 /* If the source key does not exist return 0, if it's of the wrong type
3825 * raise an error */
3826 if (srcset == NULL || srcset->type != REDIS_SET) {
3827 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
3828 return;
3829 }
3830 /* Error if the destination key is not a set as well */
3831 if (dstset && dstset->type != REDIS_SET) {
3832 addReply(c,shared.wrongtypeerr);
3833 return;
3834 }
3835 /* Remove the element from the source set */
3836 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
3837 /* Key not found in the src set! return zero */
3838 addReply(c,shared.czero);
3839 return;
3840 }
3841 server.dirty++;
3842 /* Add the element to the destination set */
3843 if (!dstset) {
3844 dstset = createSetObject();
3845 dictAdd(c->db->dict,c->argv[2],dstset);
3846 incrRefCount(c->argv[2]);
3847 }
3848 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
3849 incrRefCount(c->argv[3]);
3850 addReply(c,shared.cone);
3851}
3852
ed9b544e 3853static void sismemberCommand(redisClient *c) {
3305306f 3854 robj *set;
ed9b544e 3855
3305306f 3856 set = lookupKeyRead(c->db,c->argv[1]);
3857 if (set == NULL) {
c937aa89 3858 addReply(c,shared.czero);
ed9b544e 3859 } else {
ed9b544e 3860 if (set->type != REDIS_SET) {
c937aa89 3861 addReply(c,shared.wrongtypeerr);
ed9b544e 3862 return;
3863 }
3864 if (dictFind(set->ptr,c->argv[2]))
c937aa89 3865 addReply(c,shared.cone);
ed9b544e 3866 else
c937aa89 3867 addReply(c,shared.czero);
ed9b544e 3868 }
3869}
3870
3871static void scardCommand(redisClient *c) {
3305306f 3872 robj *o;
ed9b544e 3873 dict *s;
3874
3305306f 3875 o = lookupKeyRead(c->db,c->argv[1]);
3876 if (o == NULL) {
c937aa89 3877 addReply(c,shared.czero);
ed9b544e 3878 return;
3879 } else {
ed9b544e 3880 if (o->type != REDIS_SET) {
c937aa89 3881 addReply(c,shared.wrongtypeerr);
ed9b544e 3882 } else {
3883 s = o->ptr;
682ac724 3884 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
3305306f 3885 dictSize(s)));
ed9b544e 3886 }
3887 }
3888}
3889
12fea928 3890static void spopCommand(redisClient *c) {
3891 robj *set;
3892 dictEntry *de;
3893
3894 set = lookupKeyWrite(c->db,c->argv[1]);
3895 if (set == NULL) {
3896 addReply(c,shared.nullbulk);
3897 } else {
3898 if (set->type != REDIS_SET) {
3899 addReply(c,shared.wrongtypeerr);
3900 return;
3901 }
3902 de = dictGetRandomKey(set->ptr);
3903 if (de == NULL) {
3904 addReply(c,shared.nullbulk);
3905 } else {
3906 robj *ele = dictGetEntryKey(de);
3907
942a3961 3908 addReplyBulkLen(c,ele);
12fea928 3909 addReply(c,ele);
3910 addReply(c,shared.crlf);
3911 dictDelete(set->ptr,ele);
3912 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3913 server.dirty++;
3914 }
3915 }
3916}
3917
2abb95a9 3918static void srandmemberCommand(redisClient *c) {
3919 robj *set;
3920 dictEntry *de;
3921
3922 set = lookupKeyRead(c->db,c->argv[1]);
3923 if (set == NULL) {
3924 addReply(c,shared.nullbulk);
3925 } else {
3926 if (set->type != REDIS_SET) {
3927 addReply(c,shared.wrongtypeerr);
3928 return;
3929 }
3930 de = dictGetRandomKey(set->ptr);
3931 if (de == NULL) {
3932 addReply(c,shared.nullbulk);
3933 } else {
3934 robj *ele = dictGetEntryKey(de);
3935
3936 addReplyBulkLen(c,ele);
3937 addReply(c,ele);
3938 addReply(c,shared.crlf);
3939 }
3940 }
3941}
3942
ed9b544e 3943static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
3944 dict **d1 = (void*) s1, **d2 = (void*) s2;
3945
3305306f 3946 return dictSize(*d1)-dictSize(*d2);
ed9b544e 3947}
3948
682ac724 3949static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) {
ed9b544e 3950 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3951 dictIterator *di;
3952 dictEntry *de;
3953 robj *lenobj = NULL, *dstset = NULL;
682ac724 3954 unsigned long j, cardinality = 0;
ed9b544e 3955
ed9b544e 3956 for (j = 0; j < setsnum; j++) {
3957 robj *setobj;
3305306f 3958
3959 setobj = dstkey ?
3960 lookupKeyWrite(c->db,setskeys[j]) :
3961 lookupKeyRead(c->db,setskeys[j]);
3962 if (!setobj) {
ed9b544e 3963 zfree(dv);
5faa6025 3964 if (dstkey) {
3965 deleteKey(c->db,dstkey);
0d36ded0 3966 addReply(c,shared.czero);
5faa6025 3967 } else {
3968 addReply(c,shared.nullmultibulk);
3969 }
ed9b544e 3970 return;
3971 }
ed9b544e 3972 if (setobj->type != REDIS_SET) {
3973 zfree(dv);
c937aa89 3974 addReply(c,shared.wrongtypeerr);
ed9b544e 3975 return;
3976 }
3977 dv[j] = setobj->ptr;
3978 }
3979 /* Sort sets from the smallest to largest, this will improve our
3980 * algorithm's performace */
3981 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3982
3983 /* The first thing we should output is the total number of elements...
3984 * since this is a multi-bulk write, but at this stage we don't know
3985 * the intersection set size, so we use a trick, append an empty object
3986 * to the output list and save the pointer to later modify it with the
3987 * right length */
3988 if (!dstkey) {
3989 lenobj = createObject(REDIS_STRING,NULL);
3990 addReply(c,lenobj);
3991 decrRefCount(lenobj);
3992 } else {
3993 /* If we have a target key where to store the resulting set
3994 * create this key with an empty set inside */
3995 dstset = createSetObject();
ed9b544e 3996 }
3997
3998 /* Iterate all the elements of the first (smallest) set, and test
3999 * the element against all the other sets, if at least one set does
4000 * not include the element it is discarded */
4001 di = dictGetIterator(dv[0]);
ed9b544e 4002
4003 while((de = dictNext(di)) != NULL) {
4004 robj *ele;
4005
4006 for (j = 1; j < setsnum; j++)
4007 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
4008 if (j != setsnum)
4009 continue; /* at least one set does not contain the member */
4010 ele = dictGetEntryKey(de);
4011 if (!dstkey) {
942a3961 4012 addReplyBulkLen(c,ele);
ed9b544e 4013 addReply(c,ele);
4014 addReply(c,shared.crlf);
4015 cardinality++;
4016 } else {
4017 dictAdd(dstset->ptr,ele,NULL);
4018 incrRefCount(ele);
4019 }
4020 }
4021 dictReleaseIterator(di);
4022
83cdfe18
AG
4023 if (dstkey) {
4024 /* Store the resulting set into the target */
4025 deleteKey(c->db,dstkey);
4026 dictAdd(c->db->dict,dstkey,dstset);
4027 incrRefCount(dstkey);
4028 }
4029
40d224a9 4030 if (!dstkey) {
682ac724 4031 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality);
40d224a9 4032 } else {
682ac724 4033 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
03fd01c7 4034 dictSize((dict*)dstset->ptr)));
40d224a9 4035 server.dirty++;
4036 }
ed9b544e 4037 zfree(dv);
4038}
4039
4040static void sinterCommand(redisClient *c) {
4041 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
4042}
4043
4044static void sinterstoreCommand(redisClient *c) {
4045 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
4046}
4047
f4f56e1d 4048#define REDIS_OP_UNION 0
4049#define REDIS_OP_DIFF 1
4050
4051static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 4052 dict **dv = zmalloc(sizeof(dict*)*setsnum);
4053 dictIterator *di;
4054 dictEntry *de;
f4f56e1d 4055 robj *dstset = NULL;
40d224a9 4056 int j, cardinality = 0;
4057
40d224a9 4058 for (j = 0; j < setsnum; j++) {
4059 robj *setobj;
4060
4061 setobj = dstkey ?
4062 lookupKeyWrite(c->db,setskeys[j]) :
4063 lookupKeyRead(c->db,setskeys[j]);
4064 if (!setobj) {
4065 dv[j] = NULL;
4066 continue;
4067 }
4068 if (setobj->type != REDIS_SET) {
4069 zfree(dv);
4070 addReply(c,shared.wrongtypeerr);
4071 return;
4072 }
4073 dv[j] = setobj->ptr;
4074 }
4075
4076 /* We need a temp set object to store our union. If the dstkey
4077 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4078 * this set object will be the resulting object to set into the target key*/
4079 dstset = createSetObject();
4080
40d224a9 4081 /* Iterate all the elements of all the sets, add every element a single
4082 * time to the result set */
4083 for (j = 0; j < setsnum; j++) {
51829ed3 4084 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 4085 if (!dv[j]) continue; /* non existing keys are like empty sets */
4086
4087 di = dictGetIterator(dv[j]);
40d224a9 4088
4089 while((de = dictNext(di)) != NULL) {
4090 robj *ele;
4091
4092 /* dictAdd will not add the same element multiple times */
4093 ele = dictGetEntryKey(de);
f4f56e1d 4094 if (op == REDIS_OP_UNION || j == 0) {
4095 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
4096 incrRefCount(ele);
40d224a9 4097 cardinality++;
4098 }
f4f56e1d 4099 } else if (op == REDIS_OP_DIFF) {
4100 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
4101 cardinality--;
4102 }
40d224a9 4103 }
4104 }
4105 dictReleaseIterator(di);
51829ed3
AG
4106
4107 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
40d224a9 4108 }
4109
f4f56e1d 4110 /* Output the content of the resulting set, if not in STORE mode */
4111 if (!dstkey) {
4112 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
4113 di = dictGetIterator(dstset->ptr);
f4f56e1d 4114 while((de = dictNext(di)) != NULL) {
4115 robj *ele;
4116
4117 ele = dictGetEntryKey(de);
942a3961 4118 addReplyBulkLen(c,ele);
f4f56e1d 4119 addReply(c,ele);
4120 addReply(c,shared.crlf);
4121 }
4122 dictReleaseIterator(di);
83cdfe18
AG
4123 } else {
4124 /* If we have a target key where to store the resulting set
4125 * create this key with the result set inside */
4126 deleteKey(c->db,dstkey);
4127 dictAdd(c->db->dict,dstkey,dstset);
4128 incrRefCount(dstkey);
f4f56e1d 4129 }
4130
4131 /* Cleanup */
40d224a9 4132 if (!dstkey) {
40d224a9 4133 decrRefCount(dstset);
4134 } else {
682ac724 4135 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
03fd01c7 4136 dictSize((dict*)dstset->ptr)));
40d224a9 4137 server.dirty++;
4138 }
4139 zfree(dv);
4140}
4141
4142static void sunionCommand(redisClient *c) {
f4f56e1d 4143 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 4144}
4145
4146static void sunionstoreCommand(redisClient *c) {
f4f56e1d 4147 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
4148}
4149
4150static void sdiffCommand(redisClient *c) {
4151 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
4152}
4153
4154static void sdiffstoreCommand(redisClient *c) {
4155 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 4156}
4157
6b47e12e 4158/* ==================================== ZSets =============================== */
4159
4160/* ZSETs are ordered sets using two data structures to hold the same elements
4161 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4162 * data structure.
4163 *
4164 * The elements are added to an hash table mapping Redis objects to scores.
4165 * At the same time the elements are added to a skip list mapping scores
4166 * to Redis objects (so objects are sorted by scores in this "view"). */
4167
4168/* This skiplist implementation is almost a C translation of the original
4169 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4170 * Alternative to Balanced Trees", modified in three ways:
4171 * a) this implementation allows for repeated values.
4172 * b) the comparison is not just by key (our 'score') but by satellite data.
4173 * c) there is a back pointer, so it's a doubly linked list with the back
4174 * pointers being only at "level 1". This allows to traverse the list
4175 * from tail to head, useful for ZREVRANGE. */
4176
4177static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
4178 zskiplistNode *zn = zmalloc(sizeof(*zn));
4179
4180 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
4181 zn->score = score;
4182 zn->obj = obj;
4183 return zn;
4184}
4185
4186static zskiplist *zslCreate(void) {
4187 int j;
4188 zskiplist *zsl;
4189
4190 zsl = zmalloc(sizeof(*zsl));
4191 zsl->level = 1;
cc812361 4192 zsl->length = 0;
6b47e12e 4193 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
4194 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
4195 zsl->header->forward[j] = NULL;
e3870fab 4196 zsl->header->backward = NULL;
4197 zsl->tail = NULL;
6b47e12e 4198 return zsl;
4199}
4200
fd8ccf44 4201static void zslFreeNode(zskiplistNode *node) {
4202 decrRefCount(node->obj);
ad807e6f 4203 zfree(node->forward);
fd8ccf44 4204 zfree(node);
4205}
4206
4207static void zslFree(zskiplist *zsl) {
ad807e6f 4208 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 4209
ad807e6f 4210 zfree(zsl->header->forward);
4211 zfree(zsl->header);
fd8ccf44 4212 while(node) {
599379dd 4213 next = node->forward[0];
fd8ccf44 4214 zslFreeNode(node);
4215 node = next;
4216 }
ad807e6f 4217 zfree(zsl);
fd8ccf44 4218}
4219
6b47e12e 4220static int zslRandomLevel(void) {
4221 int level = 1;
4222 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
4223 level += 1;
4224 return level;
4225}
4226
4227static void zslInsert(zskiplist *zsl, double score, robj *obj) {
4228 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4229 int i, level;
4230
4231 x = zsl->header;
4232 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4233 while (x->forward[i] &&
4234 (x->forward[i]->score < score ||
4235 (x->forward[i]->score == score &&
4236 compareStringObjects(x->forward[i]->obj,obj) < 0)))
6b47e12e 4237 x = x->forward[i];
4238 update[i] = x;
4239 }
6b47e12e 4240 /* we assume the key is not already inside, since we allow duplicated
4241 * scores, and the re-insertion of score and redis object should never
4242 * happpen since the caller of zslInsert() should test in the hash table
4243 * if the element is already inside or not. */
4244 level = zslRandomLevel();
4245 if (level > zsl->level) {
4246 for (i = zsl->level; i < level; i++)
4247 update[i] = zsl->header;
4248 zsl->level = level;
4249 }
4250 x = zslCreateNode(level,score,obj);
4251 for (i = 0; i < level; i++) {
4252 x->forward[i] = update[i]->forward[i];
4253 update[i]->forward[i] = x;
4254 }
bb975144 4255 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 4256 if (x->forward[0])
4257 x->forward[0]->backward = x;
4258 else
4259 zsl->tail = x;
cc812361 4260 zsl->length++;
6b47e12e 4261}
4262
50c55df5 4263/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 4264static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 4265 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4266 int i;
4267
4268 x = zsl->header;
4269 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4270 while (x->forward[i] &&
4271 (x->forward[i]->score < score ||
4272 (x->forward[i]->score == score &&
4273 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 4274 x = x->forward[i];
4275 update[i] = x;
4276 }
4277 /* We may have multiple elements with the same score, what we need
4278 * is to find the element with both the right score and object. */
4279 x = x->forward[0];
50c55df5 4280 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
9d60e6e4 4281 for (i = 0; i < zsl->level; i++) {
4282 if (update[i]->forward[i] != x) break;
4283 update[i]->forward[i] = x->forward[i];
4284 }
4285 if (x->forward[0]) {
4286 x->forward[0]->backward = (x->backward == zsl->header) ?
4287 NULL : x->backward;
e197b441 4288 } else {
9d60e6e4 4289 zsl->tail = x->backward;
e197b441 4290 }
9d60e6e4 4291 zslFreeNode(x);
4292 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4293 zsl->level--;
4294 zsl->length--;
4295 return 1;
4296 } else {
4297 return 0; /* not found */
e197b441 4298 }
4299 return 0; /* not found */
fd8ccf44 4300}
4301
1807985b 4302/* Delete all the elements with score between min and max from the skiplist.
4303 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4304 * Note that this function takes the reference to the hash table view of the
4305 * sorted set, in order to remove the elements from the hash table too. */
4306static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4307 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4308 unsigned long removed = 0;
4309 int i;
4310
4311 x = zsl->header;
4312 for (i = zsl->level-1; i >= 0; i--) {
4313 while (x->forward[i] && x->forward[i]->score < min)
4314 x = x->forward[i];
4315 update[i] = x;
4316 }
4317 /* We may have multiple elements with the same score, what we need
4318 * is to find the element with both the right score and object. */
4319 x = x->forward[0];
4320 while (x && x->score <= max) {
4321 zskiplistNode *next;
4322
4323 for (i = 0; i < zsl->level; i++) {
4324 if (update[i]->forward[i] != x) break;
4325 update[i]->forward[i] = x->forward[i];
4326 }
4327 if (x->forward[0]) {
4328 x->forward[0]->backward = (x->backward == zsl->header) ?
4329 NULL : x->backward;
4330 } else {
4331 zsl->tail = x->backward;
4332 }
4333 next = x->forward[0];
4334 dictDelete(dict,x->obj);
4335 zslFreeNode(x);
4336 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4337 zsl->level--;
4338 zsl->length--;
4339 removed++;
4340 x = next;
4341 }
4342 return removed; /* not found */
4343}
4344
50c55df5 4345/* Find the first node having a score equal or greater than the specified one.
4346 * Returns NULL if there is no match. */
4347static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4348 zskiplistNode *x;
4349 int i;
4350
4351 x = zsl->header;
4352 for (i = zsl->level-1; i >= 0; i--) {
4353 while (x->forward[i] && x->forward[i]->score < score)
4354 x = x->forward[i];
4355 }
4356 /* We may have multiple elements with the same score, what we need
4357 * is to find the element with both the right score and object. */
4358 return x->forward[0];
4359}
4360
fd8ccf44 4361/* The actual Z-commands implementations */
4362
7db723ad 4363/* This generic command implements both ZADD and ZINCRBY.
e2665397 4364 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
7db723ad 4365 * the increment if the operation is a ZINCRBY (doincrement == 1). */
e2665397 4366static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
fd8ccf44 4367 robj *zsetobj;
4368 zset *zs;
4369 double *score;
4370
e2665397 4371 zsetobj = lookupKeyWrite(c->db,key);
fd8ccf44 4372 if (zsetobj == NULL) {
4373 zsetobj = createZsetObject();
e2665397 4374 dictAdd(c->db->dict,key,zsetobj);
4375 incrRefCount(key);
fd8ccf44 4376 } else {
4377 if (zsetobj->type != REDIS_ZSET) {
4378 addReply(c,shared.wrongtypeerr);
4379 return;
4380 }
4381 }
fd8ccf44 4382 zs = zsetobj->ptr;
e2665397 4383
7db723ad 4384 /* Ok now since we implement both ZADD and ZINCRBY here the code
e2665397 4385 * needs to handle the two different conditions. It's all about setting
4386 * '*score', that is, the new score to set, to the right value. */
4387 score = zmalloc(sizeof(double));
4388 if (doincrement) {
4389 dictEntry *de;
4390
4391 /* Read the old score. If the element was not present starts from 0 */
4392 de = dictFind(zs->dict,ele);
4393 if (de) {
4394 double *oldscore = dictGetEntryVal(de);
4395 *score = *oldscore + scoreval;
4396 } else {
4397 *score = scoreval;
4398 }
4399 } else {
4400 *score = scoreval;
4401 }
4402
4403 /* What follows is a simple remove and re-insert operation that is common
7db723ad 4404 * to both ZADD and ZINCRBY... */
e2665397 4405 if (dictAdd(zs->dict,ele,score) == DICT_OK) {
fd8ccf44 4406 /* case 1: New element */
e2665397 4407 incrRefCount(ele); /* added to hash */
4408 zslInsert(zs->zsl,*score,ele);
4409 incrRefCount(ele); /* added to skiplist */
fd8ccf44 4410 server.dirty++;
e2665397 4411 if (doincrement)
e2665397 4412 addReplyDouble(c,*score);
91d71bfc 4413 else
4414 addReply(c,shared.cone);
fd8ccf44 4415 } else {
4416 dictEntry *de;
4417 double *oldscore;
4418
4419 /* case 2: Score update operation */
e2665397 4420 de = dictFind(zs->dict,ele);
dfc5e96c 4421 redisAssert(de != NULL);
fd8ccf44 4422 oldscore = dictGetEntryVal(de);
4423 if (*score != *oldscore) {
4424 int deleted;
4425
e2665397 4426 /* Remove and insert the element in the skip list with new score */
4427 deleted = zslDelete(zs->zsl,*oldscore,ele);
dfc5e96c 4428 redisAssert(deleted != 0);
e2665397 4429 zslInsert(zs->zsl,*score,ele);
4430 incrRefCount(ele);
4431 /* Update the score in the hash table */
4432 dictReplace(zs->dict,ele,score);
fd8ccf44 4433 server.dirty++;
2161a965 4434 } else {
4435 zfree(score);
fd8ccf44 4436 }
e2665397 4437 if (doincrement)
4438 addReplyDouble(c,*score);
4439 else
4440 addReply(c,shared.czero);
fd8ccf44 4441 }
4442}
4443
e2665397 4444static void zaddCommand(redisClient *c) {
4445 double scoreval;
4446
4447 scoreval = strtod(c->argv[2]->ptr,NULL);
4448 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
4449}
4450
7db723ad 4451static void zincrbyCommand(redisClient *c) {
e2665397 4452 double scoreval;
4453
4454 scoreval = strtod(c->argv[2]->ptr,NULL);
4455 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
4456}
4457
1b7106e7 4458static void zremCommand(redisClient *c) {
4459 robj *zsetobj;
4460 zset *zs;
4461
4462 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4463 if (zsetobj == NULL) {
4464 addReply(c,shared.czero);
4465 } else {
4466 dictEntry *de;
4467 double *oldscore;
4468 int deleted;
4469
4470 if (zsetobj->type != REDIS_ZSET) {
4471 addReply(c,shared.wrongtypeerr);
4472 return;
4473 }
4474 zs = zsetobj->ptr;
4475 de = dictFind(zs->dict,c->argv[2]);
4476 if (de == NULL) {
4477 addReply(c,shared.czero);
4478 return;
4479 }
4480 /* Delete from the skiplist */
4481 oldscore = dictGetEntryVal(de);
4482 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
dfc5e96c 4483 redisAssert(deleted != 0);
1b7106e7 4484
4485 /* Delete from the hash table */
4486 dictDelete(zs->dict,c->argv[2]);
4487 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4488 server.dirty++;
4489 addReply(c,shared.cone);
4490 }
4491}
4492
1807985b 4493static void zremrangebyscoreCommand(redisClient *c) {
4494 double min = strtod(c->argv[2]->ptr,NULL);
4495 double max = strtod(c->argv[3]->ptr,NULL);
4496 robj *zsetobj;
4497 zset *zs;
4498
4499 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4500 if (zsetobj == NULL) {
4501 addReply(c,shared.czero);
4502 } else {
4503 long deleted;
4504
4505 if (zsetobj->type != REDIS_ZSET) {
4506 addReply(c,shared.wrongtypeerr);
4507 return;
4508 }
4509 zs = zsetobj->ptr;
4510 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4511 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4512 server.dirty += deleted;
4513 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4514 }
4515}
4516
e3870fab 4517static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 4518 robj *o;
4519 int start = atoi(c->argv[2]->ptr);
4520 int end = atoi(c->argv[3]->ptr);
4521
4522 o = lookupKeyRead(c->db,c->argv[1]);
4523 if (o == NULL) {
4524 addReply(c,shared.nullmultibulk);
4525 } else {
4526 if (o->type != REDIS_ZSET) {
4527 addReply(c,shared.wrongtypeerr);
4528 } else {
4529 zset *zsetobj = o->ptr;
4530 zskiplist *zsl = zsetobj->zsl;
4531 zskiplistNode *ln;
4532
4533 int llen = zsl->length;
4534 int rangelen, j;
4535 robj *ele;
4536
4537 /* convert negative indexes */
4538 if (start < 0) start = llen+start;
4539 if (end < 0) end = llen+end;
4540 if (start < 0) start = 0;
4541 if (end < 0) end = 0;
4542
4543 /* indexes sanity checks */
4544 if (start > end || start >= llen) {
4545 /* Out of range start or start > end result in empty list */
4546 addReply(c,shared.emptymultibulk);
4547 return;
4548 }
4549 if (end >= llen) end = llen-1;
4550 rangelen = (end-start)+1;
4551
4552 /* Return the result in form of a multi-bulk reply */
e3870fab 4553 if (reverse) {
4554 ln = zsl->tail;
4555 while (start--)
4556 ln = ln->backward;
4557 } else {
4558 ln = zsl->header->forward[0];
4559 while (start--)
4560 ln = ln->forward[0];
4561 }
cc812361 4562
4563 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
4564 for (j = 0; j < rangelen; j++) {
0aad7a19 4565 ele = ln->obj;
cc812361 4566 addReplyBulkLen(c,ele);
4567 addReply(c,ele);
4568 addReply(c,shared.crlf);
e3870fab 4569 ln = reverse ? ln->backward : ln->forward[0];
cc812361 4570 }
4571 }
4572 }
4573}
4574
e3870fab 4575static void zrangeCommand(redisClient *c) {
4576 zrangeGenericCommand(c,0);
4577}
4578
4579static void zrevrangeCommand(redisClient *c) {
4580 zrangeGenericCommand(c,1);
4581}
4582
50c55df5 4583static void zrangebyscoreCommand(redisClient *c) {
4584 robj *o;
4585 double min = strtod(c->argv[2]->ptr,NULL);
4586 double max = strtod(c->argv[3]->ptr,NULL);
80181f78 4587 int offset = 0, limit = -1;
4588
4589 if (c->argc != 4 && c->argc != 7) {
454d4e43 4590 addReplySds(c,
4591 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
80181f78 4592 return;
4593 } else if (c->argc == 7 && strcasecmp(c->argv[4]->ptr,"limit")) {
4594 addReply(c,shared.syntaxerr);
4595 return;
4596 } else if (c->argc == 7) {
4597 offset = atoi(c->argv[5]->ptr);
4598 limit = atoi(c->argv[6]->ptr);
0b13687c 4599 if (offset < 0) offset = 0;
80181f78 4600 }
50c55df5 4601
4602 o = lookupKeyRead(c->db,c->argv[1]);
4603 if (o == NULL) {
4604 addReply(c,shared.nullmultibulk);
4605 } else {
4606 if (o->type != REDIS_ZSET) {
4607 addReply(c,shared.wrongtypeerr);
4608 } else {
4609 zset *zsetobj = o->ptr;
4610 zskiplist *zsl = zsetobj->zsl;
4611 zskiplistNode *ln;
4612 robj *ele, *lenobj;
4613 unsigned int rangelen = 0;
4614
4615 /* Get the first node with the score >= min */
4616 ln = zslFirstWithScore(zsl,min);
4617 if (ln == NULL) {
4618 /* No element matching the speciifed interval */
4619 addReply(c,shared.emptymultibulk);
4620 return;
4621 }
4622
4623 /* We don't know in advance how many matching elements there
4624 * are in the list, so we push this object that will represent
4625 * the multi-bulk length in the output buffer, and will "fix"
4626 * it later */
4627 lenobj = createObject(REDIS_STRING,NULL);
4628 addReply(c,lenobj);
c74e7c77 4629 decrRefCount(lenobj);
50c55df5 4630
dbbc7285 4631 while(ln && ln->score <= max) {
80181f78 4632 if (offset) {
4633 offset--;
4634 ln = ln->forward[0];
4635 continue;
4636 }
4637 if (limit == 0) break;
50c55df5 4638 ele = ln->obj;
4639 addReplyBulkLen(c,ele);
4640 addReply(c,ele);
4641 addReply(c,shared.crlf);
4642 ln = ln->forward[0];
4643 rangelen++;
80181f78 4644 if (limit > 0) limit--;
50c55df5 4645 }
4646 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4647 }
4648 }
4649}
4650
3c41331e 4651static void zcardCommand(redisClient *c) {
e197b441 4652 robj *o;
4653 zset *zs;
4654
4655 o = lookupKeyRead(c->db,c->argv[1]);
4656 if (o == NULL) {
4657 addReply(c,shared.czero);
4658 return;
4659 } else {
4660 if (o->type != REDIS_ZSET) {
4661 addReply(c,shared.wrongtypeerr);
4662 } else {
4663 zs = o->ptr;
682ac724 4664 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",zs->zsl->length));
e197b441 4665 }
4666 }
4667}
4668
6e333bbe 4669static void zscoreCommand(redisClient *c) {
4670 robj *o;
4671 zset *zs;
4672
4673 o = lookupKeyRead(c->db,c->argv[1]);
4674 if (o == NULL) {
96d8b4ee 4675 addReply(c,shared.nullbulk);
6e333bbe 4676 return;
4677 } else {
4678 if (o->type != REDIS_ZSET) {
4679 addReply(c,shared.wrongtypeerr);
4680 } else {
4681 dictEntry *de;
4682
4683 zs = o->ptr;
4684 de = dictFind(zs->dict,c->argv[2]);
4685 if (!de) {
4686 addReply(c,shared.nullbulk);
4687 } else {
6e333bbe 4688 double *score = dictGetEntryVal(de);
4689
e2665397 4690 addReplyDouble(c,*score);
6e333bbe 4691 }
4692 }
4693 }
4694}
4695
6b47e12e 4696/* ========================= Non type-specific commands ==================== */
4697
ed9b544e 4698static void flushdbCommand(redisClient *c) {
ca37e9cd 4699 server.dirty += dictSize(c->db->dict);
3305306f 4700 dictEmpty(c->db->dict);
4701 dictEmpty(c->db->expires);
ed9b544e 4702 addReply(c,shared.ok);
ed9b544e 4703}
4704
4705static void flushallCommand(redisClient *c) {
ca37e9cd 4706 server.dirty += emptyDb();
ed9b544e 4707 addReply(c,shared.ok);
f78fd11b 4708 rdbSave(server.dbfilename);
ca37e9cd 4709 server.dirty++;
ed9b544e 4710}
4711
56906eef 4712static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 4713 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 4714 so->type = type;
4715 so->pattern = pattern;
4716 return so;
4717}
4718
4719/* Return the value associated to the key with a name obtained
4720 * substituting the first occurence of '*' in 'pattern' with 'subst' */
56906eef 4721static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
ed9b544e 4722 char *p;
4723 sds spat, ssub;
4724 robj keyobj;
4725 int prefixlen, sublen, postfixlen;
ed9b544e 4726 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
4727 struct {
f1017b3f 4728 long len;
4729 long free;
ed9b544e 4730 char buf[REDIS_SORTKEY_MAX+1];
4731 } keyname;
4732
28173a49 4733 /* If the pattern is "#" return the substitution object itself in order
4734 * to implement the "SORT ... GET #" feature. */
4735 spat = pattern->ptr;
4736 if (spat[0] == '#' && spat[1] == '\0') {
4737 return subst;
4738 }
4739
4740 /* The substitution object may be specially encoded. If so we create
9d65a1bb 4741 * a decoded object on the fly. Otherwise getDecodedObject will just
4742 * increment the ref count, that we'll decrement later. */
4743 subst = getDecodedObject(subst);
942a3961 4744
ed9b544e 4745 ssub = subst->ptr;
4746 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
4747 p = strchr(spat,'*');
ed5a857a 4748 if (!p) {
4749 decrRefCount(subst);
4750 return NULL;
4751 }
ed9b544e 4752
4753 prefixlen = p-spat;
4754 sublen = sdslen(ssub);
4755 postfixlen = sdslen(spat)-(prefixlen+1);
4756 memcpy(keyname.buf,spat,prefixlen);
4757 memcpy(keyname.buf+prefixlen,ssub,sublen);
4758 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
4759 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
4760 keyname.len = prefixlen+sublen+postfixlen;
4761
dfc5e96c 4762 initStaticStringObject(keyobj,((char*)&keyname)+(sizeof(long)*2))
942a3961 4763 decrRefCount(subst);
4764
a4d1ba9a 4765 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3305306f 4766 return lookupKeyRead(db,&keyobj);
ed9b544e 4767}
4768
4769/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
4770 * the additional parameter is not standard but a BSD-specific we have to
4771 * pass sorting parameters via the global 'server' structure */
4772static int sortCompare(const void *s1, const void *s2) {
4773 const redisSortObject *so1 = s1, *so2 = s2;
4774 int cmp;
4775
4776 if (!server.sort_alpha) {
4777 /* Numeric sorting. Here it's trivial as we precomputed scores */
4778 if (so1->u.score > so2->u.score) {
4779 cmp = 1;
4780 } else if (so1->u.score < so2->u.score) {
4781 cmp = -1;
4782 } else {
4783 cmp = 0;
4784 }
4785 } else {
4786 /* Alphanumeric sorting */
4787 if (server.sort_bypattern) {
4788 if (!so1->u.cmpobj || !so2->u.cmpobj) {
4789 /* At least one compare object is NULL */
4790 if (so1->u.cmpobj == so2->u.cmpobj)
4791 cmp = 0;
4792 else if (so1->u.cmpobj == NULL)
4793 cmp = -1;
4794 else
4795 cmp = 1;
4796 } else {
4797 /* We have both the objects, use strcoll */
4798 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
4799 }
4800 } else {
4801 /* Compare elements directly */
9d65a1bb 4802 robj *dec1, *dec2;
4803
4804 dec1 = getDecodedObject(so1->obj);
4805 dec2 = getDecodedObject(so2->obj);
4806 cmp = strcoll(dec1->ptr,dec2->ptr);
4807 decrRefCount(dec1);
4808 decrRefCount(dec2);
ed9b544e 4809 }
4810 }
4811 return server.sort_desc ? -cmp : cmp;
4812}
4813
4814/* The SORT command is the most complex command in Redis. Warning: this code
4815 * is optimized for speed and a bit less for readability */
4816static void sortCommand(redisClient *c) {
ed9b544e 4817 list *operations;
4818 int outputlen = 0;
4819 int desc = 0, alpha = 0;
4820 int limit_start = 0, limit_count = -1, start, end;
4821 int j, dontsort = 0, vectorlen;
4822 int getop = 0; /* GET operation counter */
443c6409 4823 robj *sortval, *sortby = NULL, *storekey = NULL;
ed9b544e 4824 redisSortObject *vector; /* Resulting vector to sort */
4825
4826 /* Lookup the key to sort. It must be of the right types */
3305306f 4827 sortval = lookupKeyRead(c->db,c->argv[1]);
4828 if (sortval == NULL) {
c937aa89 4829 addReply(c,shared.nokeyerr);
ed9b544e 4830 return;
4831 }
a5eb649b 4832 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
4833 sortval->type != REDIS_ZSET)
4834 {
c937aa89 4835 addReply(c,shared.wrongtypeerr);
ed9b544e 4836 return;
4837 }
4838
4839 /* Create a list of operations to perform for every sorted element.
4840 * Operations can be GET/DEL/INCR/DECR */
4841 operations = listCreate();
092dac2a 4842 listSetFreeMethod(operations,zfree);
ed9b544e 4843 j = 2;
4844
4845 /* Now we need to protect sortval incrementing its count, in the future
4846 * SORT may have options able to overwrite/delete keys during the sorting
4847 * and the sorted key itself may get destroied */
4848 incrRefCount(sortval);
4849
4850 /* The SORT command has an SQL-alike syntax, parse it */
4851 while(j < c->argc) {
4852 int leftargs = c->argc-j-1;
4853 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
4854 desc = 0;
4855 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
4856 desc = 1;
4857 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
4858 alpha = 1;
4859 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
4860 limit_start = atoi(c->argv[j+1]->ptr);
4861 limit_count = atoi(c->argv[j+2]->ptr);
4862 j+=2;
443c6409 4863 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
4864 storekey = c->argv[j+1];
4865 j++;
ed9b544e 4866 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
4867 sortby = c->argv[j+1];
4868 /* If the BY pattern does not contain '*', i.e. it is constant,
4869 * we don't need to sort nor to lookup the weight keys. */
4870 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
4871 j++;
4872 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4873 listAddNodeTail(operations,createSortOperation(
4874 REDIS_SORT_GET,c->argv[j+1]));
4875 getop++;
4876 j++;
ed9b544e 4877 } else {
4878 decrRefCount(sortval);
4879 listRelease(operations);
c937aa89 4880 addReply(c,shared.syntaxerr);
ed9b544e 4881 return;
4882 }
4883 j++;
4884 }
4885
4886 /* Load the sorting vector with all the objects to sort */
a5eb649b 4887 switch(sortval->type) {
4888 case REDIS_LIST: vectorlen = listLength((list*)sortval->ptr); break;
4889 case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break;
4890 case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
dfc5e96c 4891 default: vectorlen = 0; redisAssert(0); /* Avoid GCC warning */
a5eb649b 4892 }
ed9b544e 4893 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 4894 j = 0;
a5eb649b 4895
ed9b544e 4896 if (sortval->type == REDIS_LIST) {
4897 list *list = sortval->ptr;
6208b3a7 4898 listNode *ln;
4899
4900 listRewind(list);
4901 while((ln = listYield(list))) {
ed9b544e 4902 robj *ele = ln->value;
4903 vector[j].obj = ele;
4904 vector[j].u.score = 0;
4905 vector[j].u.cmpobj = NULL;
ed9b544e 4906 j++;
4907 }
4908 } else {
a5eb649b 4909 dict *set;
ed9b544e 4910 dictIterator *di;
4911 dictEntry *setele;
4912
a5eb649b 4913 if (sortval->type == REDIS_SET) {
4914 set = sortval->ptr;
4915 } else {
4916 zset *zs = sortval->ptr;
4917 set = zs->dict;
4918 }
4919
ed9b544e 4920 di = dictGetIterator(set);
ed9b544e 4921 while((setele = dictNext(di)) != NULL) {
4922 vector[j].obj = dictGetEntryKey(setele);
4923 vector[j].u.score = 0;
4924 vector[j].u.cmpobj = NULL;
4925 j++;
4926 }
4927 dictReleaseIterator(di);
4928 }
dfc5e96c 4929 redisAssert(j == vectorlen);
ed9b544e 4930
4931 /* Now it's time to load the right scores in the sorting vector */
4932 if (dontsort == 0) {
4933 for (j = 0; j < vectorlen; j++) {
4934 if (sortby) {
4935 robj *byval;
4936
3305306f 4937 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
ed9b544e 4938 if (!byval || byval->type != REDIS_STRING) continue;
4939 if (alpha) {
9d65a1bb 4940 vector[j].u.cmpobj = getDecodedObject(byval);
ed9b544e 4941 } else {
942a3961 4942 if (byval->encoding == REDIS_ENCODING_RAW) {
4943 vector[j].u.score = strtod(byval->ptr,NULL);
4944 } else {
9d65a1bb 4945 /* Don't need to decode the object if it's
4946 * integer-encoded (the only encoding supported) so
4947 * far. We can just cast it */
f1017b3f 4948 if (byval->encoding == REDIS_ENCODING_INT) {
942a3961 4949 vector[j].u.score = (long)byval->ptr;
f1017b3f 4950 } else
dfc5e96c 4951 redisAssert(1 != 1);
942a3961 4952 }
ed9b544e 4953 }
4954 } else {
942a3961 4955 if (!alpha) {
4956 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
4957 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
4958 else {
4959 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
4960 vector[j].u.score = (long) vector[j].obj->ptr;
4961 else
dfc5e96c 4962 redisAssert(1 != 1);
942a3961 4963 }
4964 }
ed9b544e 4965 }
4966 }
4967 }
4968
4969 /* We are ready to sort the vector... perform a bit of sanity check
4970 * on the LIMIT option too. We'll use a partial version of quicksort. */
4971 start = (limit_start < 0) ? 0 : limit_start;
4972 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
4973 if (start >= vectorlen) {
4974 start = vectorlen-1;
4975 end = vectorlen-2;
4976 }
4977 if (end >= vectorlen) end = vectorlen-1;
4978
4979 if (dontsort == 0) {
4980 server.sort_desc = desc;
4981 server.sort_alpha = alpha;
4982 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 4983 if (sortby && (start != 0 || end != vectorlen-1))
4984 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
4985 else
4986 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 4987 }
4988
4989 /* Send command output to the output buffer, performing the specified
4990 * GET/DEL/INCR/DECR operations if any. */
4991 outputlen = getop ? getop*(end-start+1) : end-start+1;
443c6409 4992 if (storekey == NULL) {
4993 /* STORE option not specified, sent the sorting result to client */
4994 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
4995 for (j = start; j <= end; j++) {
4996 listNode *ln;
4997 if (!getop) {
4998 addReplyBulkLen(c,vector[j].obj);
4999 addReply(c,vector[j].obj);
5000 addReply(c,shared.crlf);
5001 }
5002 listRewind(operations);
5003 while((ln = listYield(operations))) {
5004 redisSortOperation *sop = ln->value;
5005 robj *val = lookupKeyByPattern(c->db,sop->pattern,
5006 vector[j].obj);
5007
5008 if (sop->type == REDIS_SORT_GET) {
5009 if (!val || val->type != REDIS_STRING) {
5010 addReply(c,shared.nullbulk);
5011 } else {
5012 addReplyBulkLen(c,val);
5013 addReply(c,val);
5014 addReply(c,shared.crlf);
5015 }
5016 } else {
dfc5e96c 5017 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
443c6409 5018 }
5019 }
ed9b544e 5020 }
443c6409 5021 } else {
5022 robj *listObject = createListObject();
5023 list *listPtr = (list*) listObject->ptr;
5024
5025 /* STORE option specified, set the sorting result as a List object */
5026 for (j = start; j <= end; j++) {
5027 listNode *ln;
5028 if (!getop) {
5029 listAddNodeTail(listPtr,vector[j].obj);
5030 incrRefCount(vector[j].obj);
5031 }
5032 listRewind(operations);
5033 while((ln = listYield(operations))) {
5034 redisSortOperation *sop = ln->value;
5035 robj *val = lookupKeyByPattern(c->db,sop->pattern,
5036 vector[j].obj);
5037
5038 if (sop->type == REDIS_SORT_GET) {
5039 if (!val || val->type != REDIS_STRING) {
5040 listAddNodeTail(listPtr,createStringObject("",0));
5041 } else {
5042 listAddNodeTail(listPtr,val);
5043 incrRefCount(val);
5044 }
ed9b544e 5045 } else {
dfc5e96c 5046 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
ed9b544e 5047 }
ed9b544e 5048 }
ed9b544e 5049 }
121796f7 5050 if (dictReplace(c->db->dict,storekey,listObject)) {
5051 incrRefCount(storekey);
5052 }
443c6409 5053 /* Note: we add 1 because the DB is dirty anyway since even if the
5054 * SORT result is empty a new key is set and maybe the old content
5055 * replaced. */
5056 server.dirty += 1+outputlen;
5057 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
ed9b544e 5058 }
5059
5060 /* Cleanup */
5061 decrRefCount(sortval);
5062 listRelease(operations);
5063 for (j = 0; j < vectorlen; j++) {
5064 if (sortby && alpha && vector[j].u.cmpobj)
5065 decrRefCount(vector[j].u.cmpobj);
5066 }
5067 zfree(vector);
5068}
5069
1c85b79f 5070/* Create the string returned by the INFO command. This is decoupled
5071 * by the INFO command itself as we need to report the same information
5072 * on memory corruption problems. */
5073static sds genRedisInfoString(void) {
ed9b544e 5074 sds info;
5075 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 5076 int j;
ed9b544e 5077
5078 info = sdscatprintf(sdsempty(),
5079 "redis_version:%s\r\n"
f1017b3f 5080 "arch_bits:%s\r\n"
7a932b74 5081 "multiplexing_api:%s\r\n"
682ac724 5082 "uptime_in_seconds:%ld\r\n"
5083 "uptime_in_days:%ld\r\n"
ed9b544e 5084 "connected_clients:%d\r\n"
5085 "connected_slaves:%d\r\n"
5fba9f71 5086 "used_memory:%zu\r\n"
ed9b544e 5087 "changes_since_last_save:%lld\r\n"
be2bb6b0 5088 "bgsave_in_progress:%d\r\n"
682ac724 5089 "last_save_time:%ld\r\n"
b3fad521 5090 "bgrewriteaof_in_progress:%d\r\n"
ed9b544e 5091 "total_connections_received:%lld\r\n"
5092 "total_commands_processed:%lld\r\n"
a0f643ea 5093 "role:%s\r\n"
ed9b544e 5094 ,REDIS_VERSION,
f1017b3f 5095 (sizeof(long) == 8) ? "64" : "32",
7a932b74 5096 aeGetApiName(),
a0f643ea 5097 uptime,
5098 uptime/(3600*24),
ed9b544e 5099 listLength(server.clients)-listLength(server.slaves),
5100 listLength(server.slaves),
5101 server.usedmemory,
5102 server.dirty,
9d65a1bb 5103 server.bgsavechildpid != -1,
ed9b544e 5104 server.lastsave,
b3fad521 5105 server.bgrewritechildpid != -1,
ed9b544e 5106 server.stat_numconnections,
5107 server.stat_numcommands,
a0f643ea 5108 server.masterhost == NULL ? "master" : "slave"
ed9b544e 5109 );
a0f643ea 5110 if (server.masterhost) {
5111 info = sdscatprintf(info,
5112 "master_host:%s\r\n"
5113 "master_port:%d\r\n"
5114 "master_link_status:%s\r\n"
5115 "master_last_io_seconds_ago:%d\r\n"
5116 ,server.masterhost,
5117 server.masterport,
5118 (server.replstate == REDIS_REPL_CONNECTED) ?
5119 "up" : "down",
f72b934d 5120 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 5121 );
5122 }
c3cb078d 5123 for (j = 0; j < server.dbnum; j++) {
5124 long long keys, vkeys;
5125
5126 keys = dictSize(server.db[j].dict);
5127 vkeys = dictSize(server.db[j].expires);
5128 if (keys || vkeys) {
9d65a1bb 5129 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
c3cb078d 5130 j, keys, vkeys);
5131 }
5132 }
1c85b79f 5133 return info;
5134}
5135
5136static void infoCommand(redisClient *c) {
5137 sds info = genRedisInfoString();
83c6a618 5138 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
5139 (unsigned long)sdslen(info)));
ed9b544e 5140 addReplySds(c,info);
70003d28 5141 addReply(c,shared.crlf);
ed9b544e 5142}
5143
3305306f 5144static void monitorCommand(redisClient *c) {
5145 /* ignore MONITOR if aleady slave or in monitor mode */
5146 if (c->flags & REDIS_SLAVE) return;
5147
5148 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
5149 c->slaveseldb = 0;
6b47e12e 5150 listAddNodeTail(server.monitors,c);
3305306f 5151 addReply(c,shared.ok);
5152}
5153
5154/* ================================= Expire ================================= */
5155static int removeExpire(redisDb *db, robj *key) {
5156 if (dictDelete(db->expires,key) == DICT_OK) {
5157 return 1;
5158 } else {
5159 return 0;
5160 }
5161}
5162
5163static int setExpire(redisDb *db, robj *key, time_t when) {
5164 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
5165 return 0;
5166 } else {
5167 incrRefCount(key);
5168 return 1;
5169 }
5170}
5171
bb32ede5 5172/* Return the expire time of the specified key, or -1 if no expire
5173 * is associated with this key (i.e. the key is non volatile) */
5174static time_t getExpire(redisDb *db, robj *key) {
5175 dictEntry *de;
5176
5177 /* No expire? return ASAP */
5178 if (dictSize(db->expires) == 0 ||
5179 (de = dictFind(db->expires,key)) == NULL) return -1;
5180
5181 return (time_t) dictGetEntryVal(de);
5182}
5183
3305306f 5184static int expireIfNeeded(redisDb *db, robj *key) {
5185 time_t when;
5186 dictEntry *de;
5187
5188 /* No expire? return ASAP */
5189 if (dictSize(db->expires) == 0 ||
5190 (de = dictFind(db->expires,key)) == NULL) return 0;
5191
5192 /* Lookup the expire */
5193 when = (time_t) dictGetEntryVal(de);
5194 if (time(NULL) <= when) return 0;
5195
5196 /* Delete the key */
5197 dictDelete(db->expires,key);
5198 return dictDelete(db->dict,key) == DICT_OK;
5199}
5200
5201static int deleteIfVolatile(redisDb *db, robj *key) {
5202 dictEntry *de;
5203
5204 /* No expire? return ASAP */
5205 if (dictSize(db->expires) == 0 ||
5206 (de = dictFind(db->expires,key)) == NULL) return 0;
5207
5208 /* Delete the key */
0c66a471 5209 server.dirty++;
3305306f 5210 dictDelete(db->expires,key);
5211 return dictDelete(db->dict,key) == DICT_OK;
5212}
5213
802e8373 5214static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
3305306f 5215 dictEntry *de;
3305306f 5216
802e8373 5217 de = dictFind(c->db->dict,key);
3305306f 5218 if (de == NULL) {
5219 addReply(c,shared.czero);
5220 return;
5221 }
43e5ccdf 5222 if (seconds < 0) {
5223 if (deleteKey(c->db,key)) server.dirty++;
5224 addReply(c, shared.cone);
3305306f 5225 return;
5226 } else {
5227 time_t when = time(NULL)+seconds;
802e8373 5228 if (setExpire(c->db,key,when)) {
3305306f 5229 addReply(c,shared.cone);
77423026 5230 server.dirty++;
5231 } else {
3305306f 5232 addReply(c,shared.czero);
77423026 5233 }
3305306f 5234 return;
5235 }
5236}
5237
802e8373 5238static void expireCommand(redisClient *c) {
5239 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
5240}
5241
5242static void expireatCommand(redisClient *c) {
5243 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
5244}
5245
fd88489a 5246static void ttlCommand(redisClient *c) {
5247 time_t expire;
5248 int ttl = -1;
5249
5250 expire = getExpire(c->db,c->argv[1]);
5251 if (expire != -1) {
5252 ttl = (int) (expire-time(NULL));
5253 if (ttl < 0) ttl = -1;
5254 }
5255 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
5256}
5257
ed9b544e 5258/* =============================== Replication ============================= */
5259
a4d1ba9a 5260static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5261 ssize_t nwritten, ret = size;
5262 time_t start = time(NULL);
5263
5264 timeout++;
5265 while(size) {
5266 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
5267 nwritten = write(fd,ptr,size);
5268 if (nwritten == -1) return -1;
5269 ptr += nwritten;
5270 size -= nwritten;
5271 }
5272 if ((time(NULL)-start) > timeout) {
5273 errno = ETIMEDOUT;
5274 return -1;
5275 }
5276 }
5277 return ret;
5278}
5279
a4d1ba9a 5280static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5281 ssize_t nread, totread = 0;
5282 time_t start = time(NULL);
5283
5284 timeout++;
5285 while(size) {
5286 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
5287 nread = read(fd,ptr,size);
5288 if (nread == -1) return -1;
5289 ptr += nread;
5290 size -= nread;
5291 totread += nread;
5292 }
5293 if ((time(NULL)-start) > timeout) {
5294 errno = ETIMEDOUT;
5295 return -1;
5296 }
5297 }
5298 return totread;
5299}
5300
5301static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
5302 ssize_t nread = 0;
5303
5304 size--;
5305 while(size) {
5306 char c;
5307
5308 if (syncRead(fd,&c,1,timeout) == -1) return -1;
5309 if (c == '\n') {
5310 *ptr = '\0';
5311 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
5312 return nread;
5313 } else {
5314 *ptr++ = c;
5315 *ptr = '\0';
5316 nread++;
5317 }
5318 }
5319 return nread;
5320}
5321
5322static void syncCommand(redisClient *c) {
40d224a9 5323 /* ignore SYNC if aleady slave or in monitor mode */
5324 if (c->flags & REDIS_SLAVE) return;
5325
5326 /* SYNC can't be issued when the server has pending data to send to
5327 * the client about already issued commands. We need a fresh reply
5328 * buffer registering the differences between the BGSAVE and the current
5329 * dataset, so that we can copy to other slaves if needed. */
5330 if (listLength(c->reply) != 0) {
5331 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
5332 return;
5333 }
5334
5335 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
5336 /* Here we need to check if there is a background saving operation
5337 * in progress, or if it is required to start one */
9d65a1bb 5338 if (server.bgsavechildpid != -1) {
40d224a9 5339 /* Ok a background save is in progress. Let's check if it is a good
5340 * one for replication, i.e. if there is another slave that is
5341 * registering differences since the server forked to save */
5342 redisClient *slave;
5343 listNode *ln;
5344
6208b3a7 5345 listRewind(server.slaves);
5346 while((ln = listYield(server.slaves))) {
40d224a9 5347 slave = ln->value;
5348 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 5349 }
5350 if (ln) {
5351 /* Perfect, the server is already registering differences for
5352 * another slave. Set the right state, and copy the buffer. */
5353 listRelease(c->reply);
5354 c->reply = listDup(slave->reply);
40d224a9 5355 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5356 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
5357 } else {
5358 /* No way, we need to wait for the next BGSAVE in order to
5359 * register differences */
5360 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5361 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5362 }
5363 } else {
5364 /* Ok we don't have a BGSAVE in progress, let's start one */
5365 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
5366 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5367 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
5368 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
5369 return;
5370 }
5371 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5372 }
6208b3a7 5373 c->repldbfd = -1;
40d224a9 5374 c->flags |= REDIS_SLAVE;
5375 c->slaveseldb = 0;
6b47e12e 5376 listAddNodeTail(server.slaves,c);
40d224a9 5377 return;
5378}
5379
6208b3a7 5380static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
5381 redisClient *slave = privdata;
5382 REDIS_NOTUSED(el);
5383 REDIS_NOTUSED(mask);
5384 char buf[REDIS_IOBUF_LEN];
5385 ssize_t nwritten, buflen;
5386
5387 if (slave->repldboff == 0) {
5388 /* Write the bulk write count before to transfer the DB. In theory here
5389 * we don't know how much room there is in the output buffer of the
5390 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
5391 * operations) will never be smaller than the few bytes we need. */
5392 sds bulkcount;
5393
5394 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
5395 slave->repldbsize);
5396 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
5397 {
5398 sdsfree(bulkcount);
5399 freeClient(slave);
5400 return;
5401 }
5402 sdsfree(bulkcount);
5403 }
5404 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
5405 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
5406 if (buflen <= 0) {
5407 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
5408 (buflen == 0) ? "premature EOF" : strerror(errno));
5409 freeClient(slave);
5410 return;
5411 }
5412 if ((nwritten = write(fd,buf,buflen)) == -1) {
5413 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
5414 strerror(errno));
5415 freeClient(slave);
5416 return;
5417 }
5418 slave->repldboff += nwritten;
5419 if (slave->repldboff == slave->repldbsize) {
5420 close(slave->repldbfd);
5421 slave->repldbfd = -1;
5422 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5423 slave->replstate = REDIS_REPL_ONLINE;
5424 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
266373b2 5425 sendReplyToClient, slave) == AE_ERR) {
6208b3a7 5426 freeClient(slave);
5427 return;
5428 }
5429 addReplySds(slave,sdsempty());
5430 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
5431 }
5432}
ed9b544e 5433
a3b21203 5434/* This function is called at the end of every backgrond saving.
5435 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
5436 * otherwise REDIS_ERR is passed to the function.
5437 *
5438 * The goal of this function is to handle slaves waiting for a successful
5439 * background saving in order to perform non-blocking synchronization. */
5440static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 5441 listNode *ln;
5442 int startbgsave = 0;
ed9b544e 5443
6208b3a7 5444 listRewind(server.slaves);
5445 while((ln = listYield(server.slaves))) {
5446 redisClient *slave = ln->value;
ed9b544e 5447
6208b3a7 5448 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
5449 startbgsave = 1;
5450 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5451 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 5452 struct redis_stat buf;
6208b3a7 5453
5454 if (bgsaveerr != REDIS_OK) {
5455 freeClient(slave);
5456 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
5457 continue;
5458 }
5459 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 5460 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 5461 freeClient(slave);
5462 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
5463 continue;
5464 }
5465 slave->repldboff = 0;
5466 slave->repldbsize = buf.st_size;
5467 slave->replstate = REDIS_REPL_SEND_BULK;
5468 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
266373b2 5469 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
6208b3a7 5470 freeClient(slave);
5471 continue;
5472 }
5473 }
ed9b544e 5474 }
6208b3a7 5475 if (startbgsave) {
5476 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5477 listRewind(server.slaves);
5478 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
5479 while((ln = listYield(server.slaves))) {
5480 redisClient *slave = ln->value;
ed9b544e 5481
6208b3a7 5482 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
5483 freeClient(slave);
5484 }
5485 }
5486 }
ed9b544e 5487}
5488
5489static int syncWithMaster(void) {
d0ccebcf 5490 char buf[1024], tmpfile[256], authcmd[1024];
ed9b544e 5491 int dumpsize;
5492 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
5493 int dfd;
5494
5495 if (fd == -1) {
5496 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
5497 strerror(errno));
5498 return REDIS_ERR;
5499 }
d0ccebcf 5500
5501 /* AUTH with the master if required. */
5502 if(server.masterauth) {
5503 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
5504 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
5505 close(fd);
5506 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
5507 strerror(errno));
5508 return REDIS_ERR;
5509 }
5510 /* Read the AUTH result. */
5511 if (syncReadLine(fd,buf,1024,3600) == -1) {
5512 close(fd);
5513 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
5514 strerror(errno));
5515 return REDIS_ERR;
5516 }
5517 if (buf[0] != '+') {
5518 close(fd);
5519 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
5520 return REDIS_ERR;
5521 }
5522 }
5523
ed9b544e 5524 /* Issue the SYNC command */
5525 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
5526 close(fd);
5527 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
5528 strerror(errno));
5529 return REDIS_ERR;
5530 }
5531 /* Read the bulk write count */
8c4d91fc 5532 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 5533 close(fd);
5534 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
5535 strerror(errno));
5536 return REDIS_ERR;
5537 }
4aa701c1 5538 if (buf[0] != '$') {
5539 close(fd);
5540 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
5541 return REDIS_ERR;
5542 }
c937aa89 5543 dumpsize = atoi(buf+1);
ed9b544e 5544 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
5545 /* Read the bulk write data on a temp file */
5546 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
5547 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
5548 if (dfd == -1) {
5549 close(fd);
5550 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
5551 return REDIS_ERR;
5552 }
5553 while(dumpsize) {
5554 int nread, nwritten;
5555
5556 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
5557 if (nread == -1) {
5558 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
5559 strerror(errno));
5560 close(fd);
5561 close(dfd);
5562 return REDIS_ERR;
5563 }
5564 nwritten = write(dfd,buf,nread);
5565 if (nwritten == -1) {
5566 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
5567 close(fd);
5568 close(dfd);
5569 return REDIS_ERR;
5570 }
5571 dumpsize -= nread;
5572 }
5573 close(dfd);
5574 if (rename(tmpfile,server.dbfilename) == -1) {
5575 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
5576 unlink(tmpfile);
5577 close(fd);
5578 return REDIS_ERR;
5579 }
5580 emptyDb();
f78fd11b 5581 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 5582 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
5583 close(fd);
5584 return REDIS_ERR;
5585 }
5586 server.master = createClient(fd);
5587 server.master->flags |= REDIS_MASTER;
179b3952 5588 server.master->authenticated = 1;
ed9b544e 5589 server.replstate = REDIS_REPL_CONNECTED;
5590 return REDIS_OK;
5591}
5592
321b0e13 5593static void slaveofCommand(redisClient *c) {
5594 if (!strcasecmp(c->argv[1]->ptr,"no") &&
5595 !strcasecmp(c->argv[2]->ptr,"one")) {
5596 if (server.masterhost) {
5597 sdsfree(server.masterhost);
5598 server.masterhost = NULL;
5599 if (server.master) freeClient(server.master);
5600 server.replstate = REDIS_REPL_NONE;
5601 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
5602 }
5603 } else {
5604 sdsfree(server.masterhost);
5605 server.masterhost = sdsdup(c->argv[1]->ptr);
5606 server.masterport = atoi(c->argv[2]->ptr);
5607 if (server.master) freeClient(server.master);
5608 server.replstate = REDIS_REPL_CONNECT;
5609 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
5610 server.masterhost, server.masterport);
5611 }
5612 addReply(c,shared.ok);
5613}
5614
3fd78bcd 5615/* ============================ Maxmemory directive ======================== */
5616
5617/* This function gets called when 'maxmemory' is set on the config file to limit
5618 * the max memory used by the server, and we are out of memory.
5619 * This function will try to, in order:
5620 *
5621 * - Free objects from the free list
5622 * - Try to remove keys with an EXPIRE set
5623 *
5624 * It is not possible to free enough memory to reach used-memory < maxmemory
5625 * the server will start refusing commands that will enlarge even more the
5626 * memory usage.
5627 */
5628static void freeMemoryIfNeeded(void) {
5629 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
5630 if (listLength(server.objfreelist)) {
5631 robj *o;
5632
5633 listNode *head = listFirst(server.objfreelist);
5634 o = listNodeValue(head);
5635 listDelNode(server.objfreelist,head);
5636 zfree(o);
5637 } else {
5638 int j, k, freed = 0;
5639
5640 for (j = 0; j < server.dbnum; j++) {
5641 int minttl = -1;
5642 robj *minkey = NULL;
5643 struct dictEntry *de;
5644
5645 if (dictSize(server.db[j].expires)) {
5646 freed = 1;
5647 /* From a sample of three keys drop the one nearest to
5648 * the natural expire */
5649 for (k = 0; k < 3; k++) {
5650 time_t t;
5651
5652 de = dictGetRandomKey(server.db[j].expires);
5653 t = (time_t) dictGetEntryVal(de);
5654 if (minttl == -1 || t < minttl) {
5655 minkey = dictGetEntryKey(de);
5656 minttl = t;
5657 }
5658 }
5659 deleteKey(server.db+j,minkey);
5660 }
5661 }
5662 if (!freed) return; /* nothing to free... */
5663 }
5664 }
5665}
5666
f80dff62 5667/* ============================== Append Only file ========================== */
5668
5669static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
5670 sds buf = sdsempty();
5671 int j;
5672 ssize_t nwritten;
5673 time_t now;
5674 robj *tmpargv[3];
5675
5676 /* The DB this command was targetting is not the same as the last command
5677 * we appendend. To issue a SELECT command is needed. */
5678 if (dictid != server.appendseldb) {
5679 char seldb[64];
5680
5681 snprintf(seldb,sizeof(seldb),"%d",dictid);
682ac724 5682 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
83c6a618 5683 (unsigned long)strlen(seldb),seldb);
f80dff62 5684 server.appendseldb = dictid;
5685 }
5686
5687 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
5688 * EXPIREs into EXPIREATs calls */
5689 if (cmd->proc == expireCommand) {
5690 long when;
5691
5692 tmpargv[0] = createStringObject("EXPIREAT",8);
5693 tmpargv[1] = argv[1];
5694 incrRefCount(argv[1]);
5695 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
5696 tmpargv[2] = createObject(REDIS_STRING,
5697 sdscatprintf(sdsempty(),"%ld",when));
5698 argv = tmpargv;
5699 }
5700
5701 /* Append the actual command */
5702 buf = sdscatprintf(buf,"*%d\r\n",argc);
5703 for (j = 0; j < argc; j++) {
5704 robj *o = argv[j];
5705
9d65a1bb 5706 o = getDecodedObject(o);
83c6a618 5707 buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
f80dff62 5708 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
5709 buf = sdscatlen(buf,"\r\n",2);
9d65a1bb 5710 decrRefCount(o);
f80dff62 5711 }
5712
5713 /* Free the objects from the modified argv for EXPIREAT */
5714 if (cmd->proc == expireCommand) {
5715 for (j = 0; j < 3; j++)
5716 decrRefCount(argv[j]);
5717 }
5718
5719 /* We want to perform a single write. This should be guaranteed atomic
5720 * at least if the filesystem we are writing is a real physical one.
5721 * While this will save us against the server being killed I don't think
5722 * there is much to do about the whole server stopping for power problems
5723 * or alike */
5724 nwritten = write(server.appendfd,buf,sdslen(buf));
5725 if (nwritten != (signed)sdslen(buf)) {
5726 /* Ooops, we are in troubles. The best thing to do for now is
5727 * to simply exit instead to give the illusion that everything is
5728 * working as expected. */
5729 if (nwritten == -1) {
5730 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
5731 } else {
5732 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
5733 }
5734 exit(1);
5735 }
85a83172 5736 /* If a background append only file rewriting is in progress we want to
5737 * accumulate the differences between the child DB and the current one
5738 * in a buffer, so that when the child process will do its work we
5739 * can append the differences to the new append only file. */
5740 if (server.bgrewritechildpid != -1)
5741 server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
5742
5743 sdsfree(buf);
f80dff62 5744 now = time(NULL);
5745 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
5746 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
5747 now-server.lastfsync > 1))
5748 {
5749 fsync(server.appendfd); /* Let's try to get this data on the disk */
5750 server.lastfsync = now;
5751 }
5752}
5753
5754/* In Redis commands are always executed in the context of a client, so in
5755 * order to load the append only file we need to create a fake client. */
5756static struct redisClient *createFakeClient(void) {
5757 struct redisClient *c = zmalloc(sizeof(*c));
5758
5759 selectDb(c,0);
5760 c->fd = -1;
5761 c->querybuf = sdsempty();
5762 c->argc = 0;
5763 c->argv = NULL;
5764 c->flags = 0;
9387d17d 5765 /* We set the fake client as a slave waiting for the synchronization
5766 * so that Redis will not try to send replies to this client. */
5767 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
f80dff62 5768 c->reply = listCreate();
5769 listSetFreeMethod(c->reply,decrRefCount);
5770 listSetDupMethod(c->reply,dupClientReplyValue);
5771 return c;
5772}
5773
5774static void freeFakeClient(struct redisClient *c) {
5775 sdsfree(c->querybuf);
5776 listRelease(c->reply);
5777 zfree(c);
5778}
5779
5780/* Replay the append log file. On error REDIS_OK is returned. On non fatal
5781 * error (the append only file is zero-length) REDIS_ERR is returned. On
5782 * fatal error an error message is logged and the program exists. */
5783int loadAppendOnlyFile(char *filename) {
5784 struct redisClient *fakeClient;
5785 FILE *fp = fopen(filename,"r");
5786 struct redis_stat sb;
5787
5788 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
5789 return REDIS_ERR;
5790
5791 if (fp == NULL) {
5792 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
5793 exit(1);
5794 }
5795
5796 fakeClient = createFakeClient();
5797 while(1) {
5798 int argc, j;
5799 unsigned long len;
5800 robj **argv;
5801 char buf[128];
5802 sds argsds;
5803 struct redisCommand *cmd;
5804
5805 if (fgets(buf,sizeof(buf),fp) == NULL) {
5806 if (feof(fp))
5807 break;
5808 else
5809 goto readerr;
5810 }
5811 if (buf[0] != '*') goto fmterr;
5812 argc = atoi(buf+1);
5813 argv = zmalloc(sizeof(robj*)*argc);
5814 for (j = 0; j < argc; j++) {
5815 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
5816 if (buf[0] != '$') goto fmterr;
5817 len = strtol(buf+1,NULL,10);
5818 argsds = sdsnewlen(NULL,len);
0f151ef1 5819 if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
f80dff62 5820 argv[j] = createObject(REDIS_STRING,argsds);
5821 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
5822 }
5823
5824 /* Command lookup */
5825 cmd = lookupCommand(argv[0]->ptr);
5826 if (!cmd) {
5827 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
5828 exit(1);
5829 }
5830 /* Try object sharing and encoding */
5831 if (server.shareobjects) {
5832 int j;
5833 for(j = 1; j < argc; j++)
5834 argv[j] = tryObjectSharing(argv[j]);
5835 }
5836 if (cmd->flags & REDIS_CMD_BULK)
5837 tryObjectEncoding(argv[argc-1]);
5838 /* Run the command in the context of a fake client */
5839 fakeClient->argc = argc;
5840 fakeClient->argv = argv;
5841 cmd->proc(fakeClient);
5842 /* Discard the reply objects list from the fake client */
5843 while(listLength(fakeClient->reply))
5844 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
5845 /* Clean up, ready for the next command */
5846 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
5847 zfree(argv);
5848 }
5849 fclose(fp);
5850 freeFakeClient(fakeClient);
5851 return REDIS_OK;
5852
5853readerr:
5854 if (feof(fp)) {
5855 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
5856 } else {
5857 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
5858 }
5859 exit(1);
5860fmterr:
5861 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
5862 exit(1);
5863}
5864
9d65a1bb 5865/* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
5866static int fwriteBulk(FILE *fp, robj *obj) {
5867 char buf[128];
5868 obj = getDecodedObject(obj);
5869 snprintf(buf,sizeof(buf),"$%ld\r\n",(long)sdslen(obj->ptr));
5870 if (fwrite(buf,strlen(buf),1,fp) == 0) goto err;
e96e4fbf 5871 if (sdslen(obj->ptr) && fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0)
5872 goto err;
9d65a1bb 5873 if (fwrite("\r\n",2,1,fp) == 0) goto err;
5874 decrRefCount(obj);
5875 return 1;
5876err:
5877 decrRefCount(obj);
5878 return 0;
5879}
5880
5881/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
5882static int fwriteBulkDouble(FILE *fp, double d) {
5883 char buf[128], dbuf[128];
5884
5885 snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
5886 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
5887 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
5888 if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
5889 return 1;
5890}
5891
5892/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
5893static int fwriteBulkLong(FILE *fp, long l) {
5894 char buf[128], lbuf[128];
5895
5896 snprintf(lbuf,sizeof(lbuf),"%ld\r\n",l);
5897 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(lbuf)-2);
5898 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
5899 if (fwrite(lbuf,strlen(lbuf),1,fp) == 0) return 0;
5900 return 1;
5901}
5902
5903/* Write a sequence of commands able to fully rebuild the dataset into
5904 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
5905static int rewriteAppendOnlyFile(char *filename) {
5906 dictIterator *di = NULL;
5907 dictEntry *de;
5908 FILE *fp;
5909 char tmpfile[256];
5910 int j;
5911 time_t now = time(NULL);
5912
5913 /* Note that we have to use a different temp name here compared to the
5914 * one used by rewriteAppendOnlyFileBackground() function. */
5915 snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
5916 fp = fopen(tmpfile,"w");
5917 if (!fp) {
5918 redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
5919 return REDIS_ERR;
5920 }
5921 for (j = 0; j < server.dbnum; j++) {
5922 char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
5923 redisDb *db = server.db+j;
5924 dict *d = db->dict;
5925 if (dictSize(d) == 0) continue;
5926 di = dictGetIterator(d);
5927 if (!di) {
5928 fclose(fp);
5929 return REDIS_ERR;
5930 }
5931
5932 /* SELECT the new DB */
5933 if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
85a83172 5934 if (fwriteBulkLong(fp,j) == 0) goto werr;
9d65a1bb 5935
5936 /* Iterate this DB writing every entry */
5937 while((de = dictNext(di)) != NULL) {
5938 robj *key = dictGetEntryKey(de);
5939 robj *o = dictGetEntryVal(de);
5940 time_t expiretime = getExpire(db,key);
5941
5942 /* Save the key and associated value */
9d65a1bb 5943 if (o->type == REDIS_STRING) {
5944 /* Emit a SET command */
5945 char cmd[]="*3\r\n$3\r\nSET\r\n";
5946 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
5947 /* Key and value */
5948 if (fwriteBulk(fp,key) == 0) goto werr;
5949 if (fwriteBulk(fp,o) == 0) goto werr;
5950 } else if (o->type == REDIS_LIST) {
5951 /* Emit the RPUSHes needed to rebuild the list */
5952 list *list = o->ptr;
5953 listNode *ln;
5954
5955 listRewind(list);
5956 while((ln = listYield(list))) {
5957 char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
5958 robj *eleobj = listNodeValue(ln);
5959
5960 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
5961 if (fwriteBulk(fp,key) == 0) goto werr;
5962 if (fwriteBulk(fp,eleobj) == 0) goto werr;
5963 }
5964 } else if (o->type == REDIS_SET) {
5965 /* Emit the SADDs needed to rebuild the set */
5966 dict *set = o->ptr;
5967 dictIterator *di = dictGetIterator(set);
5968 dictEntry *de;
5969
5970 while((de = dictNext(di)) != NULL) {
5971 char cmd[]="*3\r\n$4\r\nSADD\r\n";
5972 robj *eleobj = dictGetEntryKey(de);
5973
5974 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
5975 if (fwriteBulk(fp,key) == 0) goto werr;
5976 if (fwriteBulk(fp,eleobj) == 0) goto werr;
5977 }
5978 dictReleaseIterator(di);
5979 } else if (o->type == REDIS_ZSET) {
5980 /* Emit the ZADDs needed to rebuild the sorted set */
5981 zset *zs = o->ptr;
5982 dictIterator *di = dictGetIterator(zs->dict);
5983 dictEntry *de;
5984
5985 while((de = dictNext(di)) != NULL) {
5986 char cmd[]="*4\r\n$4\r\nZADD\r\n";
5987 robj *eleobj = dictGetEntryKey(de);
5988 double *score = dictGetEntryVal(de);
5989
5990 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
5991 if (fwriteBulk(fp,key) == 0) goto werr;
5992 if (fwriteBulkDouble(fp,*score) == 0) goto werr;
5993 if (fwriteBulk(fp,eleobj) == 0) goto werr;
5994 }
5995 dictReleaseIterator(di);
5996 } else {
dfc5e96c 5997 redisAssert(0 != 0);
9d65a1bb 5998 }
5999 /* Save the expire time */
6000 if (expiretime != -1) {
e96e4fbf 6001 char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
9d65a1bb 6002 /* If this key is already expired skip it */
6003 if (expiretime < now) continue;
6004 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6005 if (fwriteBulk(fp,key) == 0) goto werr;
6006 if (fwriteBulkLong(fp,expiretime) == 0) goto werr;
6007 }
6008 }
6009 dictReleaseIterator(di);
6010 }
6011
6012 /* Make sure data will not remain on the OS's output buffers */
6013 fflush(fp);
6014 fsync(fileno(fp));
6015 fclose(fp);
6016
6017 /* Use RENAME to make sure the DB file is changed atomically only
6018 * if the generate DB file is ok. */
6019 if (rename(tmpfile,filename) == -1) {
6020 redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
6021 unlink(tmpfile);
6022 return REDIS_ERR;
6023 }
6024 redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
6025 return REDIS_OK;
6026
6027werr:
6028 fclose(fp);
6029 unlink(tmpfile);
e96e4fbf 6030 redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
9d65a1bb 6031 if (di) dictReleaseIterator(di);
6032 return REDIS_ERR;
6033}
6034
6035/* This is how rewriting of the append only file in background works:
6036 *
6037 * 1) The user calls BGREWRITEAOF
6038 * 2) Redis calls this function, that forks():
6039 * 2a) the child rewrite the append only file in a temp file.
6040 * 2b) the parent accumulates differences in server.bgrewritebuf.
6041 * 3) When the child finished '2a' exists.
6042 * 4) The parent will trap the exit code, if it's OK, will append the
6043 * data accumulated into server.bgrewritebuf into the temp file, and
6044 * finally will rename(2) the temp file in the actual file name.
6045 * The the new file is reopened as the new append only file. Profit!
6046 */
6047static int rewriteAppendOnlyFileBackground(void) {
6048 pid_t childpid;
6049
6050 if (server.bgrewritechildpid != -1) return REDIS_ERR;
6051 if ((childpid = fork()) == 0) {
6052 /* Child */
6053 char tmpfile[256];
6054 close(server.fd);
6055
6056 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
6057 if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
6058 exit(0);
6059 } else {
6060 exit(1);
6061 }
6062 } else {
6063 /* Parent */
6064 if (childpid == -1) {
6065 redisLog(REDIS_WARNING,
6066 "Can't rewrite append only file in background: fork: %s",
6067 strerror(errno));
6068 return REDIS_ERR;
6069 }
6070 redisLog(REDIS_NOTICE,
6071 "Background append only file rewriting started by pid %d",childpid);
6072 server.bgrewritechildpid = childpid;
85a83172 6073 /* We set appendseldb to -1 in order to force the next call to the
6074 * feedAppendOnlyFile() to issue a SELECT command, so the differences
6075 * accumulated by the parent into server.bgrewritebuf will start
6076 * with a SELECT statement and it will be safe to merge. */
6077 server.appendseldb = -1;
9d65a1bb 6078 return REDIS_OK;
6079 }
6080 return REDIS_OK; /* unreached */
6081}
6082
6083static void bgrewriteaofCommand(redisClient *c) {
6084 if (server.bgrewritechildpid != -1) {
6085 addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
6086 return;
6087 }
6088 if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
49b99ab4 6089 char *status = "+Background append only file rewriting started\r\n";
6090 addReplySds(c,sdsnew(status));
9d65a1bb 6091 } else {
6092 addReply(c,shared.err);
6093 }
6094}
6095
6096static void aofRemoveTempFile(pid_t childpid) {
6097 char tmpfile[256];
6098
6099 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
6100 unlink(tmpfile);
6101}
6102
7f957c92 6103/* ================================= Debugging ============================== */
6104
6105static void debugCommand(redisClient *c) {
6106 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
6107 *((char*)-1) = 'x';
210e29f7 6108 } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
6109 if (rdbSave(server.dbfilename) != REDIS_OK) {
6110 addReply(c,shared.err);
6111 return;
6112 }
6113 emptyDb();
6114 if (rdbLoad(server.dbfilename) != REDIS_OK) {
6115 addReply(c,shared.err);
6116 return;
6117 }
6118 redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
6119 addReply(c,shared.ok);
71c2b467 6120 } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
6121 emptyDb();
6122 if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
6123 addReply(c,shared.err);
6124 return;
6125 }
6126 redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
6127 addReply(c,shared.ok);
333298da 6128 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
6129 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
6130 robj *key, *val;
6131
6132 if (!de) {
6133 addReply(c,shared.nokeyerr);
6134 return;
6135 }
6136 key = dictGetEntryKey(de);
6137 val = dictGetEntryVal(de);
6138 addReplySds(c,sdscatprintf(sdsempty(),
942a3961 6139 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
682ac724 6140 (void*)key, key->refcount, (void*)val, val->refcount,
6141 val->encoding));
7f957c92 6142 } else {
333298da 6143 addReplySds(c,sdsnew(
210e29f7 6144 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|RELOAD]\r\n"));
7f957c92 6145 }
6146}
56906eef 6147
dfc5e96c 6148static void _redisAssert(char *estr) {
6149 redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
6150 redisLog(REDIS_WARNING,"==> %s\n",estr);
6151#ifdef HAVE_BACKTRACE
6152 redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
6153 *((char*)-1) = 'x';
6154#endif
6155}
6156
bcfc686d 6157/* =================================== Main! ================================ */
56906eef 6158
bcfc686d 6159#ifdef __linux__
6160int linuxOvercommitMemoryValue(void) {
6161 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
6162 char buf[64];
56906eef 6163
bcfc686d 6164 if (!fp) return -1;
6165 if (fgets(buf,64,fp) == NULL) {
6166 fclose(fp);
6167 return -1;
6168 }
6169 fclose(fp);
56906eef 6170
bcfc686d 6171 return atoi(buf);
6172}
6173
6174void linuxOvercommitMemoryWarning(void) {
6175 if (linuxOvercommitMemoryValue() == 0) {
6176 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
6177 }
6178}
6179#endif /* __linux__ */
6180
6181static void daemonize(void) {
6182 int fd;
6183 FILE *fp;
6184
6185 if (fork() != 0) exit(0); /* parent exits */
71c54b21 6186 printf("New pid: %d\n", getpid());
bcfc686d 6187 setsid(); /* create a new session */
6188
6189 /* Every output goes to /dev/null. If Redis is daemonized but
6190 * the 'logfile' is set to 'stdout' in the configuration file
6191 * it will not log at all. */
6192 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
6193 dup2(fd, STDIN_FILENO);
6194 dup2(fd, STDOUT_FILENO);
6195 dup2(fd, STDERR_FILENO);
6196 if (fd > STDERR_FILENO) close(fd);
6197 }
6198 /* Try to write the pid file */
6199 fp = fopen(server.pidfile,"w");
6200 if (fp) {
6201 fprintf(fp,"%d\n",getpid());
6202 fclose(fp);
56906eef 6203 }
56906eef 6204}
6205
bcfc686d 6206int main(int argc, char **argv) {
6207 initServerConfig();
6208 if (argc == 2) {
6209 resetServerSaveParams();
6210 loadServerConfig(argv[1]);
6211 } else if (argc > 2) {
6212 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
6213 exit(1);
6214 } else {
6215 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
6216 }
bcfc686d 6217 if (server.daemonize) daemonize();
71c54b21 6218 initServer();
bcfc686d 6219 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
6220#ifdef __linux__
6221 linuxOvercommitMemoryWarning();
6222#endif
6223 if (server.appendonly) {
6224 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
6225 redisLog(REDIS_NOTICE,"DB loaded from append only file");
6226 } else {
6227 if (rdbLoad(server.dbfilename) == REDIS_OK)
6228 redisLog(REDIS_NOTICE,"DB loaded from disk");
6229 }
6230 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
266373b2 6231 acceptHandler, NULL) == AE_ERR) oom("creating file event");
bcfc686d 6232 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
6233 aeMain(server.el);
6234 aeDeleteEventLoop(server.el);
6235 return 0;
6236}
6237
6238/* ============================= Backtrace support ========================= */
6239
6240#ifdef HAVE_BACKTRACE
6241static char *findFuncName(void *pointer, unsigned long *offset);
6242
56906eef 6243static void *getMcontextEip(ucontext_t *uc) {
6244#if defined(__FreeBSD__)
6245 return (void*) uc->uc_mcontext.mc_eip;
6246#elif defined(__dietlibc__)
6247 return (void*) uc->uc_mcontext.eip;
06db1f50 6248#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
da0a1620 6249 #if __x86_64__
6250 return (void*) uc->uc_mcontext->__ss.__rip;
6251 #else
56906eef 6252 return (void*) uc->uc_mcontext->__ss.__eip;
da0a1620 6253 #endif
06db1f50 6254#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 6255 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 6256 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 6257 #else
6258 return (void*) uc->uc_mcontext->__ss.__eip;
6259 #endif
b91cf5ef 6260#elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
56906eef 6261 return (void*) uc->uc_mcontext.gregs[REG_EIP];
b91cf5ef 6262#elif defined(__ia64__) /* Linux IA64 */
6263 return (void*) uc->uc_mcontext.sc_ip;
6264#else
6265 return NULL;
56906eef 6266#endif
6267}
6268
6269static void segvHandler(int sig, siginfo_t *info, void *secret) {
6270 void *trace[100];
6271 char **messages = NULL;
6272 int i, trace_size = 0;
6273 unsigned long offset=0;
56906eef 6274 ucontext_t *uc = (ucontext_t*) secret;
1c85b79f 6275 sds infostring;
56906eef 6276 REDIS_NOTUSED(info);
6277
6278 redisLog(REDIS_WARNING,
6279 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
1c85b79f 6280 infostring = genRedisInfoString();
6281 redisLog(REDIS_WARNING, "%s",infostring);
6282 /* It's not safe to sdsfree() the returned string under memory
6283 * corruption conditions. Let it leak as we are going to abort */
56906eef 6284
6285 trace_size = backtrace(trace, 100);
de96dbfe 6286 /* overwrite sigaction with caller's address */
b91cf5ef 6287 if (getMcontextEip(uc) != NULL) {
6288 trace[1] = getMcontextEip(uc);
6289 }
56906eef 6290 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 6291
d76412d1 6292 for (i=1; i<trace_size; ++i) {
56906eef 6293 char *fn = findFuncName(trace[i], &offset), *p;
6294
6295 p = strchr(messages[i],'+');
6296 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
6297 redisLog(REDIS_WARNING,"%s", messages[i]);
6298 } else {
6299 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
6300 }
6301 }
1c85b79f 6302 // free(messages); Don't call free() with possibly corrupted memory.
56906eef 6303 exit(0);
fe3bbfbe 6304}
56906eef 6305
6306static void setupSigSegvAction(void) {
6307 struct sigaction act;
6308
6309 sigemptyset (&act.sa_mask);
6310 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
6311 * is used. Otherwise, sa_handler is used */
6312 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
6313 act.sa_sigaction = segvHandler;
6314 sigaction (SIGSEGV, &act, NULL);
6315 sigaction (SIGBUS, &act, NULL);
12fea928 6316 sigaction (SIGFPE, &act, NULL);
6317 sigaction (SIGILL, &act, NULL);
6318 sigaction (SIGBUS, &act, NULL);
e65fdc78 6319 return;
56906eef 6320}
e65fdc78 6321
bcfc686d 6322#include "staticsymbols.h"
6323/* This function try to convert a pointer into a function name. It's used in
6324 * oreder to provide a backtrace under segmentation fault that's able to
6325 * display functions declared as static (otherwise the backtrace is useless). */
6326static char *findFuncName(void *pointer, unsigned long *offset){
6327 int i, ret = -1;
6328 unsigned long off, minoff = 0;
ed9b544e 6329
bcfc686d 6330 /* Try to match against the Symbol with the smallest offset */
6331 for (i=0; symsTable[i].pointer; i++) {
6332 unsigned long lp = (unsigned long) pointer;
0bc03378 6333
bcfc686d 6334 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
6335 off=lp-symsTable[i].pointer;
6336 if (ret < 0 || off < minoff) {
6337 minoff=off;
6338 ret=i;
6339 }
6340 }
0bc03378 6341 }
bcfc686d 6342 if (ret == -1) return NULL;
6343 *offset = minoff;
6344 return symsTable[ret].name;
0bc03378 6345}
bcfc686d 6346#else /* HAVE_BACKTRACE */
6347static void setupSigSegvAction(void) {
0bc03378 6348}
bcfc686d 6349#endif /* HAVE_BACKTRACE */
0bc03378 6350
ed9b544e 6351
ed9b544e 6352
bcfc686d 6353/* The End */
6354
6355
ed9b544e 6356