]> git.saurik.com Git - redis.git/blame - redis.c
TODO aesthetic changes
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
1812e024 30#define REDIS_VERSION "1.050"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
c9468bcf 40#define __USE_POSIX199309
ed9b544e 41#include <signal.h>
fbf9bcdb 42
43#ifdef HAVE_BACKTRACE
c9468bcf 44#include <execinfo.h>
45#include <ucontext.h>
fbf9bcdb 46#endif /* HAVE_BACKTRACE */
47
ed9b544e 48#include <sys/wait.h>
49#include <errno.h>
50#include <assert.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <inttypes.h>
54#include <arpa/inet.h>
55#include <sys/stat.h>
56#include <fcntl.h>
57#include <sys/time.h>
58#include <sys/resource.h>
2895e862 59#include <sys/uio.h>
f78fd11b 60#include <limits.h>
a7866db6 61#include <math.h>
0bc1b2f6 62
63#if defined(__sun)
5043dff3 64#include "solarisfixes.h"
65#endif
ed9b544e 66
c9468bcf 67#include "redis.h"
ed9b544e 68#include "ae.h" /* Event driven programming library */
69#include "sds.h" /* Dynamic safe strings */
70#include "anet.h" /* Networking the easy way */
71#include "dict.h" /* Hash tables */
72#include "adlist.h" /* Linked lists */
73#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 74#include "lzf.h" /* LZF compression library */
75#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ed9b544e 76
77/* Error codes */
78#define REDIS_OK 0
79#define REDIS_ERR -1
80
81/* Static server configuration */
82#define REDIS_SERVERPORT 6379 /* TCP port */
83#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 84#define REDIS_IOBUF_LEN 1024
ed9b544e 85#define REDIS_LOADBUF_LEN 1024
93ea3759 86#define REDIS_STATIC_ARGS 4
ed9b544e 87#define REDIS_DEFAULT_DBNUM 16
88#define REDIS_CONFIGLINE_MAX 1024
89#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
90#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94754ccc 91#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
6f376729 92#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
2895e862 93#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
94
95/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
96#define REDIS_WRITEV_THRESHOLD 3
97/* Max number of iovecs used for each writev call */
98#define REDIS_WRITEV_IOVEC_COUNT 256
ed9b544e 99
100/* Hash table parameters */
101#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 102
103/* Command flags */
3fd78bcd 104#define REDIS_CMD_BULK 1 /* Bulk write command */
105#define REDIS_CMD_INLINE 2 /* Inline command */
106/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
107 this flags will return an error when the 'maxmemory' option is set in the
108 config file and the server is using more than maxmemory bytes of memory.
109 In short this commands are denied on low memory conditions. */
110#define REDIS_CMD_DENYOOM 4
ed9b544e 111
112/* Object types */
113#define REDIS_STRING 0
114#define REDIS_LIST 1
115#define REDIS_SET 2
1812e024 116#define REDIS_ZSET 3
117#define REDIS_HASH 4
f78fd11b 118
942a3961 119/* Objects encoding */
120#define REDIS_ENCODING_RAW 0 /* Raw representation */
121#define REDIS_ENCODING_INT 1 /* Encoded as integer */
122
f78fd11b 123/* Object types only used for dumping to disk */
bb32ede5 124#define REDIS_EXPIRETIME 253
ed9b544e 125#define REDIS_SELECTDB 254
126#define REDIS_EOF 255
127
f78fd11b 128/* Defines related to the dump file format. To store 32 bits lengths for short
129 * keys requires a lot of space, so we check the most significant 2 bits of
130 * the first byte to interpreter the length:
131 *
132 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
133 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
134 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 135 * 11|000000 this means: specially encoded object will follow. The six bits
136 * number specify the kind of object that follows.
137 * See the REDIS_RDB_ENC_* defines.
f78fd11b 138 *
10c43610 139 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
140 * values, will fit inside. */
f78fd11b 141#define REDIS_RDB_6BITLEN 0
142#define REDIS_RDB_14BITLEN 1
143#define REDIS_RDB_32BITLEN 2
17be1a4a 144#define REDIS_RDB_ENCVAL 3
f78fd11b 145#define REDIS_RDB_LENERR UINT_MAX
146
a4d1ba9a 147/* When a length of a string object stored on disk has the first two bits
148 * set, the remaining two bits specify a special encoding for the object
149 * accordingly to the following defines: */
150#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
151#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
152#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 153#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 154
ed9b544e 155/* Client flags */
156#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
157#define REDIS_SLAVE 2 /* This client is a slave server */
158#define REDIS_MASTER 4 /* This client is a master server */
87eca727 159#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
ed9b544e 160
40d224a9 161/* Slave replication state - slave side */
ed9b544e 162#define REDIS_REPL_NONE 0 /* No active replication */
163#define REDIS_REPL_CONNECT 1 /* Must connect to master */
164#define REDIS_REPL_CONNECTED 2 /* Connected to master */
165
40d224a9 166/* Slave replication state - from the point of view of master
167 * Note that in SEND_BULK and ONLINE state the slave receives new updates
168 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
169 * to start the next background saving in order to send updates to it. */
170#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
171#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
172#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
173#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
174
ed9b544e 175/* List related stuff */
176#define REDIS_HEAD 0
177#define REDIS_TAIL 1
178
179/* Sort operations */
180#define REDIS_SORT_GET 0
443c6409 181#define REDIS_SORT_ASC 1
182#define REDIS_SORT_DESC 2
ed9b544e 183#define REDIS_SORTKEY_MAX 1024
184
185/* Log levels */
186#define REDIS_DEBUG 0
187#define REDIS_NOTICE 1
188#define REDIS_WARNING 2
189
190/* Anti-warning macro... */
191#define REDIS_NOTUSED(V) ((void) V)
192
6b47e12e 193#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
194#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 195
48f0308a 196/* Append only defines */
197#define APPENDFSYNC_NO 0
198#define APPENDFSYNC_ALWAYS 1
199#define APPENDFSYNC_EVERYSEC 2
200
ed9b544e 201/*================================= Data types ============================== */
202
203/* A redis object, that is a type able to hold a string / list / set */
204typedef struct redisObject {
ed9b544e 205 void *ptr;
942a3961 206 unsigned char type;
207 unsigned char encoding;
208 unsigned char notused[2];
ed9b544e 209 int refcount;
210} robj;
211
3305306f 212typedef struct redisDb {
213 dict *dict;
214 dict *expires;
215 int id;
216} redisDb;
217
ed9b544e 218/* With multiplexing we need to take per-clinet state.
219 * Clients are taken in a liked list. */
220typedef struct redisClient {
221 int fd;
3305306f 222 redisDb *db;
ed9b544e 223 int dictid;
224 sds querybuf;
e8a74421 225 robj **argv, **mbargv;
226 int argc, mbargc;
40d224a9 227 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 228 int multibulk; /* multi bulk command format active */
ed9b544e 229 list *reply;
230 int sentlen;
231 time_t lastinteraction; /* time of the last interaction, used for timeout */
40d224a9 232 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
233 int slaveseldb; /* slave selected db, if this client is a slave */
234 int authenticated; /* when requirepass is non-NULL */
235 int replstate; /* replication state if this is a slave */
236 int repldbfd; /* replication DB file descriptor */
6208b3a7 237 long repldboff; /* replication DB file offset */
40d224a9 238 off_t repldbsize; /* replication DB file size */
ed9b544e 239} redisClient;
240
241struct saveparam {
242 time_t seconds;
243 int changes;
244};
245
246/* Global server state structure */
247struct redisServer {
248 int port;
249 int fd;
3305306f 250 redisDb *db;
10c43610 251 dict *sharingpool;
252 unsigned int sharingpoolsize;
ed9b544e 253 long long dirty; /* changes to DB from the last save */
254 list *clients;
87eca727 255 list *slaves, *monitors;
ed9b544e 256 char neterr[ANET_ERR_LEN];
257 aeEventLoop *el;
258 int cronloops; /* number of times the cron function run */
259 list *objfreelist; /* A list of freed objects to avoid malloc() */
260 time_t lastsave; /* Unix time of last save succeeede */
5fba9f71 261 size_t usedmemory; /* Used memory in megabytes */
ed9b544e 262 /* Fields used only for stats */
263 time_t stat_starttime; /* server start time */
264 long long stat_numcommands; /* number of processed commands */
265 long long stat_numconnections; /* number of connections received */
266 /* Configuration */
267 int verbosity;
268 int glueoutputbuf;
269 int maxidletime;
270 int dbnum;
271 int daemonize;
44b38ef4 272 int appendonly;
48f0308a 273 int appendfsync;
274 time_t lastfsync;
44b38ef4 275 int appendfd;
276 int appendseldb;
ed329fcf 277 char *pidfile;
ed9b544e 278 int bgsaveinprogress;
9f3c422c 279 pid_t bgsavechildpid;
ed9b544e 280 struct saveparam *saveparams;
281 int saveparamslen;
282 char *logfile;
283 char *bindaddr;
284 char *dbfilename;
44b38ef4 285 char *appendfilename;
abcb223e 286 char *requirepass;
10c43610 287 int shareobjects;
ed9b544e 288 /* Replication related */
289 int isslave;
d0ccebcf 290 char *masterauth;
ed9b544e 291 char *masterhost;
292 int masterport;
40d224a9 293 redisClient *master; /* client that is master for this slave */
ed9b544e 294 int replstate;
285add55 295 unsigned int maxclients;
d4465900 296 unsigned long maxmemory;
ed9b544e 297 /* Sort parameters - qsort_r() is only available under BSD so we
298 * have to take this state global, in order to pass it to sortCompare() */
299 int sort_desc;
300 int sort_alpha;
301 int sort_bypattern;
302};
303
304typedef void redisCommandProc(redisClient *c);
305struct redisCommand {
306 char *name;
307 redisCommandProc *proc;
308 int arity;
309 int flags;
310};
311
de96dbfe 312struct redisFunctionSym {
313 char *name;
56906eef 314 unsigned long pointer;
de96dbfe 315};
316
ed9b544e 317typedef struct _redisSortObject {
318 robj *obj;
319 union {
320 double score;
321 robj *cmpobj;
322 } u;
323} redisSortObject;
324
325typedef struct _redisSortOperation {
326 int type;
327 robj *pattern;
328} redisSortOperation;
329
6b47e12e 330/* ZSETs use a specialized version of Skiplists */
331
332typedef struct zskiplistNode {
333 struct zskiplistNode **forward;
e3870fab 334 struct zskiplistNode *backward;
6b47e12e 335 double score;
336 robj *obj;
337} zskiplistNode;
338
339typedef struct zskiplist {
e3870fab 340 struct zskiplistNode *header, *tail;
d13f767c 341 unsigned long length;
6b47e12e 342 int level;
343} zskiplist;
344
1812e024 345typedef struct zset {
346 dict *dict;
6b47e12e 347 zskiplist *zsl;
1812e024 348} zset;
349
6b47e12e 350/* Our shared "common" objects */
351
ed9b544e 352struct sharedObjectsStruct {
c937aa89 353 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
7b45bfb2 354 *colon, *nullbulk, *nullmultibulk,
c937aa89 355 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
356 *outofrangeerr, *plus,
ed9b544e 357 *select0, *select1, *select2, *select3, *select4,
358 *select5, *select6, *select7, *select8, *select9;
359} shared;
360
a7866db6 361/* Global vars that are actally used as constants. The following double
362 * values are used for double on-disk serialization, and are initialized
363 * at runtime to avoid strange compiler optimizations. */
364
365static double R_Zero, R_PosInf, R_NegInf, R_Nan;
366
ed9b544e 367/*================================ Prototypes =============================== */
368
369static void freeStringObject(robj *o);
370static void freeListObject(robj *o);
371static void freeSetObject(robj *o);
372static void decrRefCount(void *o);
373static robj *createObject(int type, void *ptr);
374static void freeClient(redisClient *c);
f78fd11b 375static int rdbLoad(char *filename);
ed9b544e 376static void addReply(redisClient *c, robj *obj);
377static void addReplySds(redisClient *c, sds s);
378static void incrRefCount(robj *o);
f78fd11b 379static int rdbSaveBackground(char *filename);
ed9b544e 380static robj *createStringObject(char *ptr, size_t len);
87eca727 381static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
44b38ef4 382static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 383static int syncWithMaster(void);
10c43610 384static robj *tryObjectSharing(robj *o);
942a3961 385static int tryObjectEncoding(robj *o);
386static robj *getDecodedObject(const robj *o);
3305306f 387static int removeExpire(redisDb *db, robj *key);
388static int expireIfNeeded(redisDb *db, robj *key);
389static int deleteIfVolatile(redisDb *db, robj *key);
94754ccc 390static int deleteKey(redisDb *db, robj *key);
bb32ede5 391static time_t getExpire(redisDb *db, robj *key);
392static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 393static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 394static void freeMemoryIfNeeded(void);
de96dbfe 395static int processCommand(redisClient *c);
56906eef 396static void setupSigSegvAction(void);
a3b21203 397static void rdbRemoveTempFile(pid_t childpid);
0ea663ea 398static size_t stringObjectLen(robj *o);
638e42ac 399static void processInputBuffer(redisClient *c);
6b47e12e 400static zskiplist *zslCreate(void);
fd8ccf44 401static void zslFree(zskiplist *zsl);
2b59cfdf 402static void zslInsert(zskiplist *zsl, double score, robj *obj);
2895e862 403static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
ed9b544e 404
abcb223e 405static void authCommand(redisClient *c);
ed9b544e 406static void pingCommand(redisClient *c);
407static void echoCommand(redisClient *c);
408static void setCommand(redisClient *c);
409static void setnxCommand(redisClient *c);
410static void getCommand(redisClient *c);
411static void delCommand(redisClient *c);
412static void existsCommand(redisClient *c);
413static void incrCommand(redisClient *c);
414static void decrCommand(redisClient *c);
415static void incrbyCommand(redisClient *c);
416static void decrbyCommand(redisClient *c);
417static void selectCommand(redisClient *c);
418static void randomkeyCommand(redisClient *c);
419static void keysCommand(redisClient *c);
420static void dbsizeCommand(redisClient *c);
421static void lastsaveCommand(redisClient *c);
422static void saveCommand(redisClient *c);
423static void bgsaveCommand(redisClient *c);
424static void shutdownCommand(redisClient *c);
425static void moveCommand(redisClient *c);
426static void renameCommand(redisClient *c);
427static void renamenxCommand(redisClient *c);
428static void lpushCommand(redisClient *c);
429static void rpushCommand(redisClient *c);
430static void lpopCommand(redisClient *c);
431static void rpopCommand(redisClient *c);
432static void llenCommand(redisClient *c);
433static void lindexCommand(redisClient *c);
434static void lrangeCommand(redisClient *c);
435static void ltrimCommand(redisClient *c);
436static void typeCommand(redisClient *c);
437static void lsetCommand(redisClient *c);
438static void saddCommand(redisClient *c);
439static void sremCommand(redisClient *c);
a4460ef4 440static void smoveCommand(redisClient *c);
ed9b544e 441static void sismemberCommand(redisClient *c);
442static void scardCommand(redisClient *c);
12fea928 443static void spopCommand(redisClient *c);
2abb95a9 444static void srandmemberCommand(redisClient *c);
ed9b544e 445static void sinterCommand(redisClient *c);
446static void sinterstoreCommand(redisClient *c);
40d224a9 447static void sunionCommand(redisClient *c);
448static void sunionstoreCommand(redisClient *c);
f4f56e1d 449static void sdiffCommand(redisClient *c);
450static void sdiffstoreCommand(redisClient *c);
ed9b544e 451static void syncCommand(redisClient *c);
452static void flushdbCommand(redisClient *c);
453static void flushallCommand(redisClient *c);
454static void sortCommand(redisClient *c);
455static void lremCommand(redisClient *c);
0f5f7e9a 456static void rpoplpushcommand(redisClient *c);
ed9b544e 457static void infoCommand(redisClient *c);
70003d28 458static void mgetCommand(redisClient *c);
87eca727 459static void monitorCommand(redisClient *c);
3305306f 460static void expireCommand(redisClient *c);
802e8373 461static void expireatCommand(redisClient *c);
f6b141c5 462static void getsetCommand(redisClient *c);
fd88489a 463static void ttlCommand(redisClient *c);
321b0e13 464static void slaveofCommand(redisClient *c);
7f957c92 465static void debugCommand(redisClient *c);
f6b141c5 466static void msetCommand(redisClient *c);
467static void msetnxCommand(redisClient *c);
fd8ccf44 468static void zaddCommand(redisClient *c);
7db723ad 469static void zincrbyCommand(redisClient *c);
cc812361 470static void zrangeCommand(redisClient *c);
50c55df5 471static void zrangebyscoreCommand(redisClient *c);
e3870fab 472static void zrevrangeCommand(redisClient *c);
3c41331e 473static void zcardCommand(redisClient *c);
1b7106e7 474static void zremCommand(redisClient *c);
6e333bbe 475static void zscoreCommand(redisClient *c);
1807985b 476static void zremrangebyscoreCommand(redisClient *c);
f6b141c5 477
ed9b544e 478/*================================= Globals ================================= */
479
480/* Global vars */
481static struct redisServer server; /* server global state */
482static struct redisCommand cmdTable[] = {
483 {"get",getCommand,2,REDIS_CMD_INLINE},
3fd78bcd 484 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
485 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
5109cdff 486 {"del",delCommand,-2,REDIS_CMD_INLINE},
ed9b544e 487 {"exists",existsCommand,2,REDIS_CMD_INLINE},
3fd78bcd 488 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
489 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
70003d28 490 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
3fd78bcd 491 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
492 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 493 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
494 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
495 {"llen",llenCommand,2,REDIS_CMD_INLINE},
496 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
3fd78bcd 497 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 498 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
499 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
500 {"lrem",lremCommand,4,REDIS_CMD_BULK},
0f5f7e9a 501 {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_BULK},
3fd78bcd 502 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 503 {"srem",sremCommand,3,REDIS_CMD_BULK},
a4460ef4 504 {"smove",smoveCommand,4,REDIS_CMD_BULK},
ed9b544e 505 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
506 {"scard",scardCommand,2,REDIS_CMD_INLINE},
12fea928 507 {"spop",spopCommand,2,REDIS_CMD_INLINE},
2abb95a9 508 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
3fd78bcd 509 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
510 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
511 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
512 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
513 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
514 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 515 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
fd8ccf44 516 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
7db723ad 517 {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
1b7106e7 518 {"zrem",zremCommand,3,REDIS_CMD_BULK},
1807985b 519 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
cc812361 520 {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
50c55df5 521 {"zrangebyscore",zrangebyscoreCommand,4,REDIS_CMD_INLINE},
e3870fab 522 {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
3c41331e 523 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
6e333bbe 524 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 525 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
526 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
f6b141c5 527 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
528 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
529 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 530 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
531 {"select",selectCommand,2,REDIS_CMD_INLINE},
532 {"move",moveCommand,3,REDIS_CMD_INLINE},
533 {"rename",renameCommand,3,REDIS_CMD_INLINE},
534 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
321b0e13 535 {"expire",expireCommand,3,REDIS_CMD_INLINE},
802e8373 536 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
ed9b544e 537 {"keys",keysCommand,2,REDIS_CMD_INLINE},
538 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
abcb223e 539 {"auth",authCommand,2,REDIS_CMD_INLINE},
ed9b544e 540 {"ping",pingCommand,1,REDIS_CMD_INLINE},
541 {"echo",echoCommand,2,REDIS_CMD_BULK},
542 {"save",saveCommand,1,REDIS_CMD_INLINE},
543 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
544 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
545 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
546 {"type",typeCommand,2,REDIS_CMD_INLINE},
547 {"sync",syncCommand,1,REDIS_CMD_INLINE},
548 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
549 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
3fd78bcd 550 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 551 {"info",infoCommand,1,REDIS_CMD_INLINE},
87eca727 552 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
fd88489a 553 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
321b0e13 554 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
7f957c92 555 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
ed9b544e 556 {NULL,NULL,0,0}
557};
bcfc686d 558
ed9b544e 559/*============================ Utility functions ============================ */
560
561/* Glob-style pattern matching. */
562int stringmatchlen(const char *pattern, int patternLen,
563 const char *string, int stringLen, int nocase)
564{
565 while(patternLen) {
566 switch(pattern[0]) {
567 case '*':
568 while (pattern[1] == '*') {
569 pattern++;
570 patternLen--;
571 }
572 if (patternLen == 1)
573 return 1; /* match */
574 while(stringLen) {
575 if (stringmatchlen(pattern+1, patternLen-1,
576 string, stringLen, nocase))
577 return 1; /* match */
578 string++;
579 stringLen--;
580 }
581 return 0; /* no match */
582 break;
583 case '?':
584 if (stringLen == 0)
585 return 0; /* no match */
586 string++;
587 stringLen--;
588 break;
589 case '[':
590 {
591 int not, match;
592
593 pattern++;
594 patternLen--;
595 not = pattern[0] == '^';
596 if (not) {
597 pattern++;
598 patternLen--;
599 }
600 match = 0;
601 while(1) {
602 if (pattern[0] == '\\') {
603 pattern++;
604 patternLen--;
605 if (pattern[0] == string[0])
606 match = 1;
607 } else if (pattern[0] == ']') {
608 break;
609 } else if (patternLen == 0) {
610 pattern--;
611 patternLen++;
612 break;
613 } else if (pattern[1] == '-' && patternLen >= 3) {
614 int start = pattern[0];
615 int end = pattern[2];
616 int c = string[0];
617 if (start > end) {
618 int t = start;
619 start = end;
620 end = t;
621 }
622 if (nocase) {
623 start = tolower(start);
624 end = tolower(end);
625 c = tolower(c);
626 }
627 pattern += 2;
628 patternLen -= 2;
629 if (c >= start && c <= end)
630 match = 1;
631 } else {
632 if (!nocase) {
633 if (pattern[0] == string[0])
634 match = 1;
635 } else {
636 if (tolower((int)pattern[0]) == tolower((int)string[0]))
637 match = 1;
638 }
639 }
640 pattern++;
641 patternLen--;
642 }
643 if (not)
644 match = !match;
645 if (!match)
646 return 0; /* no match */
647 string++;
648 stringLen--;
649 break;
650 }
651 case '\\':
652 if (patternLen >= 2) {
653 pattern++;
654 patternLen--;
655 }
656 /* fall through */
657 default:
658 if (!nocase) {
659 if (pattern[0] != string[0])
660 return 0; /* no match */
661 } else {
662 if (tolower((int)pattern[0]) != tolower((int)string[0]))
663 return 0; /* no match */
664 }
665 string++;
666 stringLen--;
667 break;
668 }
669 pattern++;
670 patternLen--;
671 if (stringLen == 0) {
672 while(*pattern == '*') {
673 pattern++;
674 patternLen--;
675 }
676 break;
677 }
678 }
679 if (patternLen == 0 && stringLen == 0)
680 return 1;
681 return 0;
682}
683
56906eef 684static void redisLog(int level, const char *fmt, ...) {
ed9b544e 685 va_list ap;
686 FILE *fp;
687
688 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
689 if (!fp) return;
690
691 va_start(ap, fmt);
692 if (level >= server.verbosity) {
693 char *c = ".-*";
1904ecc1 694 char buf[64];
695 time_t now;
696
697 now = time(NULL);
6c9385e0 698 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
1904ecc1 699 fprintf(fp,"%s %c ",buf,c[level]);
ed9b544e 700 vfprintf(fp, fmt, ap);
701 fprintf(fp,"\n");
702 fflush(fp);
703 }
704 va_end(ap);
705
706 if (server.logfile) fclose(fp);
707}
708
709/*====================== Hash table type implementation ==================== */
710
711/* This is an hash table type that uses the SDS dynamic strings libary as
712 * keys and radis objects as values (objects can hold SDS strings,
713 * lists, sets). */
714
1812e024 715static void dictVanillaFree(void *privdata, void *val)
716{
717 DICT_NOTUSED(privdata);
718 zfree(val);
719}
720
ed9b544e 721static int sdsDictKeyCompare(void *privdata, const void *key1,
722 const void *key2)
723{
724 int l1,l2;
725 DICT_NOTUSED(privdata);
726
727 l1 = sdslen((sds)key1);
728 l2 = sdslen((sds)key2);
729 if (l1 != l2) return 0;
730 return memcmp(key1, key2, l1) == 0;
731}
732
733static void dictRedisObjectDestructor(void *privdata, void *val)
734{
735 DICT_NOTUSED(privdata);
736
737 decrRefCount(val);
738}
739
942a3961 740static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 741 const void *key2)
742{
743 const robj *o1 = key1, *o2 = key2;
744 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
745}
746
942a3961 747static unsigned int dictObjHash(const void *key) {
ed9b544e 748 const robj *o = key;
749 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
750}
751
942a3961 752static int dictEncObjKeyCompare(void *privdata, const void *key1,
753 const void *key2)
754{
755 const robj *o1 = key1, *o2 = key2;
756
757 if (o1->encoding == REDIS_ENCODING_RAW &&
758 o2->encoding == REDIS_ENCODING_RAW)
759 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
760 else {
761 robj *dec1, *dec2;
762 int cmp;
763
764 dec1 = o1->encoding != REDIS_ENCODING_RAW ?
765 getDecodedObject(o1) : (robj*)o1;
766 dec2 = o2->encoding != REDIS_ENCODING_RAW ?
767 getDecodedObject(o2) : (robj*)o2;
768 cmp = sdsDictKeyCompare(privdata,dec1->ptr,dec2->ptr);
769 if (dec1 != o1) decrRefCount(dec1);
770 if (dec2 != o2) decrRefCount(dec2);
771 return cmp;
772 }
773}
774
775static unsigned int dictEncObjHash(const void *key) {
776 const robj *o = key;
777
778 if (o->encoding == REDIS_ENCODING_RAW)
779 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
780 else {
781 robj *dec = getDecodedObject(o);
782 unsigned int hash = dictGenHashFunction(dec->ptr, sdslen((sds)dec->ptr));
783 decrRefCount(dec);
784 return hash;
785 }
786}
787
ed9b544e 788static dictType setDictType = {
942a3961 789 dictEncObjHash, /* hash function */
ed9b544e 790 NULL, /* key dup */
791 NULL, /* val dup */
942a3961 792 dictEncObjKeyCompare, /* key compare */
ed9b544e 793 dictRedisObjectDestructor, /* key destructor */
794 NULL /* val destructor */
795};
796
1812e024 797static dictType zsetDictType = {
798 dictEncObjHash, /* hash function */
799 NULL, /* key dup */
800 NULL, /* val dup */
801 dictEncObjKeyCompare, /* key compare */
802 dictRedisObjectDestructor, /* key destructor */
803 dictVanillaFree /* val destructor */
804};
805
ed9b544e 806static dictType hashDictType = {
942a3961 807 dictObjHash, /* hash function */
ed9b544e 808 NULL, /* key dup */
809 NULL, /* val dup */
942a3961 810 dictObjKeyCompare, /* key compare */
ed9b544e 811 dictRedisObjectDestructor, /* key destructor */
812 dictRedisObjectDestructor /* val destructor */
813};
814
815/* ========================= Random utility functions ======================= */
816
817/* Redis generally does not try to recover from out of memory conditions
818 * when allocating objects or strings, it is not clear if it will be possible
819 * to report this condition to the client since the networking layer itself
820 * is based on heap allocation for send buffers, so we simply abort.
821 * At least the code will be simpler to read... */
822static void oom(const char *msg) {
823 fprintf(stderr, "%s: Out of memory\n",msg);
824 fflush(stderr);
825 sleep(1);
826 abort();
827}
828
829/* ====================== Redis server networking stuff ===================== */
56906eef 830static void closeTimedoutClients(void) {
ed9b544e 831 redisClient *c;
ed9b544e 832 listNode *ln;
833 time_t now = time(NULL);
834
6208b3a7 835 listRewind(server.clients);
836 while ((ln = listYield(server.clients)) != NULL) {
ed9b544e 837 c = listNodeValue(ln);
838 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 839 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
ed9b544e 840 (now - c->lastinteraction > server.maxidletime)) {
841 redisLog(REDIS_DEBUG,"Closing idle client");
842 freeClient(c);
843 }
844 }
ed9b544e 845}
846
12fea928 847static int htNeedsResize(dict *dict) {
848 long long size, used;
849
850 size = dictSlots(dict);
851 used = dictSize(dict);
852 return (size && used && size > DICT_HT_INITIAL_SIZE &&
853 (used*100/size < REDIS_HT_MINFILL));
854}
855
0bc03378 856/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
857 * we resize the hash table to save memory */
56906eef 858static void tryResizeHashTables(void) {
0bc03378 859 int j;
860
861 for (j = 0; j < server.dbnum; j++) {
12fea928 862 if (htNeedsResize(server.db[j].dict)) {
863 redisLog(REDIS_DEBUG,"The hash table %d is too sparse, resize it...",j);
0bc03378 864 dictResize(server.db[j].dict);
12fea928 865 redisLog(REDIS_DEBUG,"Hash table %d resized.",j);
0bc03378 866 }
12fea928 867 if (htNeedsResize(server.db[j].expires))
868 dictResize(server.db[j].expires);
0bc03378 869 }
870}
871
56906eef 872static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 873 int j, loops = server.cronloops++;
ed9b544e 874 REDIS_NOTUSED(eventLoop);
875 REDIS_NOTUSED(id);
876 REDIS_NOTUSED(clientData);
877
878 /* Update the global state with the amount of used memory */
879 server.usedmemory = zmalloc_used_memory();
880
0bc03378 881 /* Show some info about non-empty databases */
ed9b544e 882 for (j = 0; j < server.dbnum; j++) {
dec423d9 883 long long size, used, vkeys;
94754ccc 884
3305306f 885 size = dictSlots(server.db[j].dict);
886 used = dictSize(server.db[j].dict);
94754ccc 887 vkeys = dictSize(server.db[j].expires);
c3cb078d 888 if (!(loops % 5) && (used || vkeys)) {
889 redisLog(REDIS_DEBUG,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 890 /* dictPrintStats(server.dict); */
ed9b544e 891 }
ed9b544e 892 }
893
0bc03378 894 /* We don't want to resize the hash tables while a bacground saving
895 * is in progress: the saving child is created using fork() that is
896 * implemented with a copy-on-write semantic in most modern systems, so
897 * if we resize the HT while there is the saving child at work actually
898 * a lot of memory movements in the parent will cause a lot of pages
899 * copied. */
900 if (!server.bgsaveinprogress) tryResizeHashTables();
901
ed9b544e 902 /* Show information about connected clients */
903 if (!(loops % 5)) {
21aecf4b 904 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
ed9b544e 905 listLength(server.clients)-listLength(server.slaves),
906 listLength(server.slaves),
10c43610 907 server.usedmemory,
3305306f 908 dictSize(server.sharingpool));
ed9b544e 909 }
910
911 /* Close connections of timedout clients */
0150db36 912 if (server.maxidletime && !(loops % 10))
ed9b544e 913 closeTimedoutClients();
914
915 /* Check if a background saving in progress terminated */
916 if (server.bgsaveinprogress) {
917 int statloc;
ef3ac64a 918 if (wait3(&statloc,WNOHANG,NULL)) {
ed9b544e 919 int exitcode = WEXITSTATUS(statloc);
9f3c422c 920 int bysignal = WIFSIGNALED(statloc);
921
922 if (!bysignal && exitcode == 0) {
ed9b544e 923 redisLog(REDIS_NOTICE,
924 "Background saving terminated with success");
925 server.dirty = 0;
926 server.lastsave = time(NULL);
9f3c422c 927 } else if (!bysignal && exitcode != 0) {
928 redisLog(REDIS_WARNING, "Background saving error");
ed9b544e 929 } else {
930 redisLog(REDIS_WARNING,
9f3c422c 931 "Background saving terminated by signal");
a3b21203 932 rdbRemoveTempFile(server.bgsavechildpid);
ed9b544e 933 }
934 server.bgsaveinprogress = 0;
9f3c422c 935 server.bgsavechildpid = -1;
a3b21203 936 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
ed9b544e 937 }
938 } else {
939 /* If there is not a background saving in progress check if
940 * we have to save now */
941 time_t now = time(NULL);
942 for (j = 0; j < server.saveparamslen; j++) {
943 struct saveparam *sp = server.saveparams+j;
944
945 if (server.dirty >= sp->changes &&
946 now-server.lastsave > sp->seconds) {
947 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
948 sp->changes, sp->seconds);
f78fd11b 949 rdbSaveBackground(server.dbfilename);
ed9b544e 950 break;
951 }
952 }
953 }
94754ccc 954
f2324293 955 /* Try to expire a few timed out keys. The algorithm used is adaptive and
956 * will use few CPU cycles if there are few expiring keys, otherwise
957 * it will get more aggressive to avoid that too much memory is used by
958 * keys that can be removed from the keyspace. */
94754ccc 959 for (j = 0; j < server.dbnum; j++) {
f2324293 960 int expired;
94754ccc 961 redisDb *db = server.db+j;
94754ccc 962
f2324293 963 /* Continue to expire if at the end of the cycle more than 25%
964 * of the keys were expired. */
965 do {
966 int num = dictSize(db->expires);
94754ccc 967 time_t now = time(NULL);
968
f2324293 969 expired = 0;
94754ccc 970 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
971 num = REDIS_EXPIRELOOKUPS_PER_CRON;
972 while (num--) {
973 dictEntry *de;
974 time_t t;
975
976 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
977 t = (time_t) dictGetEntryVal(de);
978 if (now > t) {
979 deleteKey(db,dictGetEntryKey(de));
f2324293 980 expired++;
94754ccc 981 }
982 }
f2324293 983 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
94754ccc 984 }
985
ed9b544e 986 /* Check if we should connect to a MASTER */
987 if (server.replstate == REDIS_REPL_CONNECT) {
988 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
989 if (syncWithMaster() == REDIS_OK) {
990 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
991 }
992 }
993 return 1000;
994}
995
996static void createSharedObjects(void) {
997 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
998 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
999 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 1000 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1001 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1002 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1003 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1004 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1005 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 1006 /* no such key */
ed9b544e 1007 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
1008 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1009 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 1010 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1011 "-ERR no such key\r\n"));
ed9b544e 1012 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1013 "-ERR syntax error\r\n"));
c937aa89 1014 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1015 "-ERR source and destination objects are the same\r\n"));
1016 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1017 "-ERR index out of range\r\n"));
ed9b544e 1018 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1019 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1020 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1021 shared.select0 = createStringObject("select 0\r\n",10);
1022 shared.select1 = createStringObject("select 1\r\n",10);
1023 shared.select2 = createStringObject("select 2\r\n",10);
1024 shared.select3 = createStringObject("select 3\r\n",10);
1025 shared.select4 = createStringObject("select 4\r\n",10);
1026 shared.select5 = createStringObject("select 5\r\n",10);
1027 shared.select6 = createStringObject("select 6\r\n",10);
1028 shared.select7 = createStringObject("select 7\r\n",10);
1029 shared.select8 = createStringObject("select 8\r\n",10);
1030 shared.select9 = createStringObject("select 9\r\n",10);
1031}
1032
1033static void appendServerSaveParams(time_t seconds, int changes) {
1034 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1035 server.saveparams[server.saveparamslen].seconds = seconds;
1036 server.saveparams[server.saveparamslen].changes = changes;
1037 server.saveparamslen++;
1038}
1039
bcfc686d 1040static void resetServerSaveParams() {
ed9b544e 1041 zfree(server.saveparams);
1042 server.saveparams = NULL;
1043 server.saveparamslen = 0;
1044}
1045
1046static void initServerConfig() {
1047 server.dbnum = REDIS_DEFAULT_DBNUM;
1048 server.port = REDIS_SERVERPORT;
1049 server.verbosity = REDIS_DEBUG;
1050 server.maxidletime = REDIS_MAXIDLETIME;
1051 server.saveparams = NULL;
1052 server.logfile = NULL; /* NULL = log on standard output */
1053 server.bindaddr = NULL;
1054 server.glueoutputbuf = 1;
1055 server.daemonize = 0;
44b38ef4 1056 server.appendonly = 0;
4e141d5a 1057 server.appendfsync = APPENDFSYNC_ALWAYS;
48f0308a 1058 server.lastfsync = time(NULL);
44b38ef4 1059 server.appendfd = -1;
1060 server.appendseldb = -1; /* Make sure the first time will not match */
ed329fcf 1061 server.pidfile = "/var/run/redis.pid";
ed9b544e 1062 server.dbfilename = "dump.rdb";
44b38ef4 1063 server.appendfilename = "appendonly.log";
abcb223e 1064 server.requirepass = NULL;
10c43610 1065 server.shareobjects = 0;
21aecf4b 1066 server.sharingpoolsize = 1024;
285add55 1067 server.maxclients = 0;
3fd78bcd 1068 server.maxmemory = 0;
bcfc686d 1069 resetServerSaveParams();
ed9b544e 1070
1071 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1072 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1073 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1074 /* Replication related */
1075 server.isslave = 0;
d0ccebcf 1076 server.masterauth = NULL;
ed9b544e 1077 server.masterhost = NULL;
1078 server.masterport = 6379;
1079 server.master = NULL;
1080 server.replstate = REDIS_REPL_NONE;
a7866db6 1081
1082 /* Double constants initialization */
1083 R_Zero = 0.0;
1084 R_PosInf = 1.0/R_Zero;
1085 R_NegInf = -1.0/R_Zero;
1086 R_Nan = R_Zero/R_Zero;
ed9b544e 1087}
1088
1089static void initServer() {
1090 int j;
1091
1092 signal(SIGHUP, SIG_IGN);
1093 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1094 setupSigSegvAction();
ed9b544e 1095
1096 server.clients = listCreate();
1097 server.slaves = listCreate();
87eca727 1098 server.monitors = listCreate();
ed9b544e 1099 server.objfreelist = listCreate();
1100 createSharedObjects();
1101 server.el = aeCreateEventLoop();
3305306f 1102 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
10c43610 1103 server.sharingpool = dictCreate(&setDictType,NULL);
ed9b544e 1104 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1105 if (server.fd == -1) {
1106 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1107 exit(1);
1108 }
3305306f 1109 for (j = 0; j < server.dbnum; j++) {
1110 server.db[j].dict = dictCreate(&hashDictType,NULL);
1111 server.db[j].expires = dictCreate(&setDictType,NULL);
1112 server.db[j].id = j;
1113 }
ed9b544e 1114 server.cronloops = 0;
1115 server.bgsaveinprogress = 0;
9f3c422c 1116 server.bgsavechildpid = -1;
ed9b544e 1117 server.lastsave = time(NULL);
1118 server.dirty = 0;
1119 server.usedmemory = 0;
1120 server.stat_numcommands = 0;
1121 server.stat_numconnections = 0;
1122 server.stat_starttime = time(NULL);
d8f8b666 1123 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
44b38ef4 1124
1125 if (server.appendonly) {
71eba477 1126 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
44b38ef4 1127 if (server.appendfd == -1) {
1128 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1129 strerror(errno));
1130 exit(1);
1131 }
1132 }
ed9b544e 1133}
1134
1135/* Empty the whole database */
ca37e9cd 1136static long long emptyDb() {
ed9b544e 1137 int j;
ca37e9cd 1138 long long removed = 0;
ed9b544e 1139
3305306f 1140 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1141 removed += dictSize(server.db[j].dict);
3305306f 1142 dictEmpty(server.db[j].dict);
1143 dictEmpty(server.db[j].expires);
1144 }
ca37e9cd 1145 return removed;
ed9b544e 1146}
1147
85dd2f3a 1148static int yesnotoi(char *s) {
1149 if (!strcasecmp(s,"yes")) return 1;
1150 else if (!strcasecmp(s,"no")) return 0;
1151 else return -1;
1152}
1153
ed9b544e 1154/* I agree, this is a very rudimental way to load a configuration...
1155 will improve later if the config gets more complex */
1156static void loadServerConfig(char *filename) {
c9a111ac 1157 FILE *fp;
ed9b544e 1158 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1159 int linenum = 0;
1160 sds line = NULL;
c9a111ac 1161
1162 if (filename[0] == '-' && filename[1] == '\0')
1163 fp = stdin;
1164 else {
1165 if ((fp = fopen(filename,"r")) == NULL) {
1166 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1167 exit(1);
1168 }
ed9b544e 1169 }
c9a111ac 1170
ed9b544e 1171 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1172 sds *argv;
1173 int argc, j;
1174
1175 linenum++;
1176 line = sdsnew(buf);
1177 line = sdstrim(line," \t\r\n");
1178
1179 /* Skip comments and blank lines*/
1180 if (line[0] == '#' || line[0] == '\0') {
1181 sdsfree(line);
1182 continue;
1183 }
1184
1185 /* Split into arguments */
1186 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1187 sdstolower(argv[0]);
1188
1189 /* Execute config directives */
bb0b03a3 1190 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1191 server.maxidletime = atoi(argv[1]);
0150db36 1192 if (server.maxidletime < 0) {
ed9b544e 1193 err = "Invalid timeout value"; goto loaderr;
1194 }
bb0b03a3 1195 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1196 server.port = atoi(argv[1]);
1197 if (server.port < 1 || server.port > 65535) {
1198 err = "Invalid port"; goto loaderr;
1199 }
bb0b03a3 1200 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1201 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1202 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1203 int seconds = atoi(argv[1]);
1204 int changes = atoi(argv[2]);
1205 if (seconds < 1 || changes < 0) {
1206 err = "Invalid save parameters"; goto loaderr;
1207 }
1208 appendServerSaveParams(seconds,changes);
bb0b03a3 1209 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1210 if (chdir(argv[1]) == -1) {
1211 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1212 argv[1], strerror(errno));
1213 exit(1);
1214 }
bb0b03a3 1215 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1216 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
1217 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1218 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1219 else {
1220 err = "Invalid log level. Must be one of debug, notice, warning";
1221 goto loaderr;
1222 }
bb0b03a3 1223 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1224 FILE *logfp;
ed9b544e 1225
1226 server.logfile = zstrdup(argv[1]);
bb0b03a3 1227 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1228 zfree(server.logfile);
1229 server.logfile = NULL;
1230 }
1231 if (server.logfile) {
1232 /* Test if we are able to open the file. The server will not
1233 * be able to abort just for this problem later... */
c9a111ac 1234 logfp = fopen(server.logfile,"a");
1235 if (logfp == NULL) {
ed9b544e 1236 err = sdscatprintf(sdsempty(),
1237 "Can't open the log file: %s", strerror(errno));
1238 goto loaderr;
1239 }
c9a111ac 1240 fclose(logfp);
ed9b544e 1241 }
bb0b03a3 1242 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1243 server.dbnum = atoi(argv[1]);
1244 if (server.dbnum < 1) {
1245 err = "Invalid number of databases"; goto loaderr;
1246 }
285add55 1247 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1248 server.maxclients = atoi(argv[1]);
3fd78bcd 1249 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
d4465900 1250 server.maxmemory = strtoll(argv[1], NULL, 10);
bb0b03a3 1251 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1252 server.masterhost = sdsnew(argv[1]);
1253 server.masterport = atoi(argv[2]);
1254 server.replstate = REDIS_REPL_CONNECT;
d0ccebcf 1255 } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
1256 server.masterauth = zstrdup(argv[1]);
bb0b03a3 1257 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1258 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1259 err = "argument must be 'yes' or 'no'"; goto loaderr;
1260 }
bb0b03a3 1261 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
85dd2f3a 1262 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
10c43610 1263 err = "argument must be 'yes' or 'no'"; goto loaderr;
1264 }
e52c65b9 1265 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1266 server.sharingpoolsize = atoi(argv[1]);
1267 if (server.sharingpoolsize < 1) {
1268 err = "invalid object sharing pool size"; goto loaderr;
1269 }
bb0b03a3 1270 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1271 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1272 err = "argument must be 'yes' or 'no'"; goto loaderr;
1273 }
44b38ef4 1274 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1275 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1276 err = "argument must be 'yes' or 'no'"; goto loaderr;
1277 }
48f0308a 1278 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1766c6da 1279 if (!strcasecmp(argv[1],"no")) {
48f0308a 1280 server.appendfsync = APPENDFSYNC_NO;
1766c6da 1281 } else if (!strcasecmp(argv[1],"always")) {
48f0308a 1282 server.appendfsync = APPENDFSYNC_ALWAYS;
1766c6da 1283 } else if (!strcasecmp(argv[1],"everysec")) {
48f0308a 1284 server.appendfsync = APPENDFSYNC_EVERYSEC;
1285 } else {
1286 err = "argument must be 'no', 'always' or 'everysec'";
1287 goto loaderr;
1288 }
bb0b03a3 1289 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
abcb223e 1290 server.requirepass = zstrdup(argv[1]);
bb0b03a3 1291 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
ed329fcf 1292 server.pidfile = zstrdup(argv[1]);
bb0b03a3 1293 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
b8b553c8 1294 server.dbfilename = zstrdup(argv[1]);
ed9b544e 1295 } else {
1296 err = "Bad directive or wrong number of arguments"; goto loaderr;
1297 }
1298 for (j = 0; j < argc; j++)
1299 sdsfree(argv[j]);
1300 zfree(argv);
1301 sdsfree(line);
1302 }
c9a111ac 1303 if (fp != stdin) fclose(fp);
ed9b544e 1304 return;
1305
1306loaderr:
1307 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1308 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1309 fprintf(stderr, ">>> '%s'\n", line);
1310 fprintf(stderr, "%s\n", err);
1311 exit(1);
1312}
1313
1314static void freeClientArgv(redisClient *c) {
1315 int j;
1316
1317 for (j = 0; j < c->argc; j++)
1318 decrRefCount(c->argv[j]);
e8a74421 1319 for (j = 0; j < c->mbargc; j++)
1320 decrRefCount(c->mbargv[j]);
ed9b544e 1321 c->argc = 0;
e8a74421 1322 c->mbargc = 0;
ed9b544e 1323}
1324
1325static void freeClient(redisClient *c) {
1326 listNode *ln;
1327
1328 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1329 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1330 sdsfree(c->querybuf);
1331 listRelease(c->reply);
1332 freeClientArgv(c);
1333 close(c->fd);
1334 ln = listSearchKey(server.clients,c);
1335 assert(ln != NULL);
1336 listDelNode(server.clients,ln);
1337 if (c->flags & REDIS_SLAVE) {
6208b3a7 1338 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1339 close(c->repldbfd);
87eca727 1340 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1341 ln = listSearchKey(l,c);
ed9b544e 1342 assert(ln != NULL);
87eca727 1343 listDelNode(l,ln);
ed9b544e 1344 }
1345 if (c->flags & REDIS_MASTER) {
1346 server.master = NULL;
1347 server.replstate = REDIS_REPL_CONNECT;
1348 }
93ea3759 1349 zfree(c->argv);
e8a74421 1350 zfree(c->mbargv);
ed9b544e 1351 zfree(c);
1352}
1353
cc30e368 1354#define GLUEREPLY_UP_TO (1024)
ed9b544e 1355static void glueReplyBuffersIfNeeded(redisClient *c) {
c28b42ac 1356 int copylen = 0;
1357 char buf[GLUEREPLY_UP_TO];
6208b3a7 1358 listNode *ln;
ed9b544e 1359 robj *o;
1360
6208b3a7 1361 listRewind(c->reply);
1362 while((ln = listYield(c->reply))) {
c28b42ac 1363 int objlen;
1364
ed9b544e 1365 o = ln->value;
c28b42ac 1366 objlen = sdslen(o->ptr);
1367 if (copylen + objlen <= GLUEREPLY_UP_TO) {
1368 memcpy(buf+copylen,o->ptr,objlen);
1369 copylen += objlen;
ed9b544e 1370 listDelNode(c->reply,ln);
c28b42ac 1371 } else {
1372 if (copylen == 0) return;
1373 break;
ed9b544e 1374 }
ed9b544e 1375 }
c28b42ac 1376 /* Now the output buffer is empty, add the new single element */
1377 o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
1378 listAddNodeHead(c->reply,o);
ed9b544e 1379}
1380
1381static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1382 redisClient *c = privdata;
1383 int nwritten = 0, totwritten = 0, objlen;
1384 robj *o;
1385 REDIS_NOTUSED(el);
1386 REDIS_NOTUSED(mask);
1387
2895e862 1388 /* Use writev() if we have enough buffers to send */
7ea870c0 1389 if (!server.glueoutputbuf &&
1390 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
1391 !(c->flags & REDIS_MASTER))
2895e862 1392 {
1393 sendReplyToClientWritev(el, fd, privdata, mask);
1394 return;
1395 }
2895e862 1396
ed9b544e 1397 while(listLength(c->reply)) {
c28b42ac 1398 if (server.glueoutputbuf && listLength(c->reply) > 1)
1399 glueReplyBuffersIfNeeded(c);
1400
ed9b544e 1401 o = listNodeValue(listFirst(c->reply));
1402 objlen = sdslen(o->ptr);
1403
1404 if (objlen == 0) {
1405 listDelNode(c->reply,listFirst(c->reply));
1406 continue;
1407 }
1408
1409 if (c->flags & REDIS_MASTER) {
6f376729 1410 /* Don't reply to a master */
ed9b544e 1411 nwritten = objlen - c->sentlen;
1412 } else {
a4d1ba9a 1413 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 1414 if (nwritten <= 0) break;
1415 }
1416 c->sentlen += nwritten;
1417 totwritten += nwritten;
1418 /* If we fully sent the object on head go to the next one */
1419 if (c->sentlen == objlen) {
1420 listDelNode(c->reply,listFirst(c->reply));
1421 c->sentlen = 0;
1422 }
6f376729 1423 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
12f9d551 1424 * bytes, in a single threaded server it's a good idea to serve
6f376729 1425 * other clients as well, even if a very large request comes from
1426 * super fast link that is always able to accept data (in real world
12f9d551 1427 * scenario think about 'KEYS *' against the loopback interfae) */
6f376729 1428 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 1429 }
1430 if (nwritten == -1) {
1431 if (errno == EAGAIN) {
1432 nwritten = 0;
1433 } else {
1434 redisLog(REDIS_DEBUG,
1435 "Error writing to client: %s", strerror(errno));
1436 freeClient(c);
1437 return;
1438 }
1439 }
1440 if (totwritten > 0) c->lastinteraction = time(NULL);
1441 if (listLength(c->reply) == 0) {
1442 c->sentlen = 0;
1443 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1444 }
1445}
1446
2895e862 1447static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
1448{
1449 redisClient *c = privdata;
1450 int nwritten = 0, totwritten = 0, objlen, willwrite;
1451 robj *o;
1452 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
1453 int offset, ion = 0;
1454 REDIS_NOTUSED(el);
1455 REDIS_NOTUSED(mask);
1456
1457 listNode *node;
1458 while (listLength(c->reply)) {
1459 offset = c->sentlen;
1460 ion = 0;
1461 willwrite = 0;
1462
1463 /* fill-in the iov[] array */
1464 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
1465 o = listNodeValue(node);
1466 objlen = sdslen(o->ptr);
1467
1468 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
1469 break;
1470
1471 if(ion == REDIS_WRITEV_IOVEC_COUNT)
1472 break; /* no more iovecs */
1473
1474 iov[ion].iov_base = ((char*)o->ptr) + offset;
1475 iov[ion].iov_len = objlen - offset;
1476 willwrite += objlen - offset;
1477 offset = 0; /* just for the first item */
1478 ion++;
1479 }
1480
1481 if(willwrite == 0)
1482 break;
1483
1484 /* write all collected blocks at once */
1485 if((nwritten = writev(fd, iov, ion)) < 0) {
1486 if (errno != EAGAIN) {
1487 redisLog(REDIS_DEBUG,
1488 "Error writing to client: %s", strerror(errno));
1489 freeClient(c);
1490 return;
1491 }
1492 break;
1493 }
1494
1495 totwritten += nwritten;
1496 offset = c->sentlen;
1497
1498 /* remove written robjs from c->reply */
1499 while (nwritten && listLength(c->reply)) {
1500 o = listNodeValue(listFirst(c->reply));
1501 objlen = sdslen(o->ptr);
1502
1503 if(nwritten >= objlen - offset) {
1504 listDelNode(c->reply, listFirst(c->reply));
1505 nwritten -= objlen - offset;
1506 c->sentlen = 0;
1507 } else {
1508 /* partial write */
1509 c->sentlen += nwritten;
1510 break;
1511 }
1512 offset = 0;
1513 }
1514 }
1515
1516 if (totwritten > 0)
1517 c->lastinteraction = time(NULL);
1518
1519 if (listLength(c->reply) == 0) {
1520 c->sentlen = 0;
1521 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1522 }
1523}
1524
ed9b544e 1525static struct redisCommand *lookupCommand(char *name) {
1526 int j = 0;
1527 while(cmdTable[j].name != NULL) {
bb0b03a3 1528 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
ed9b544e 1529 j++;
1530 }
1531 return NULL;
1532}
1533
1534/* resetClient prepare the client to process the next command */
1535static void resetClient(redisClient *c) {
1536 freeClientArgv(c);
1537 c->bulklen = -1;
e8a74421 1538 c->multibulk = 0;
ed9b544e 1539}
1540
1541/* If this function gets called we already read a whole
1542 * command, argments are in the client argv/argc fields.
1543 * processCommand() execute the command or prepare the
1544 * server for a bulk read from the client.
1545 *
1546 * If 1 is returned the client is still alive and valid and
1547 * and other operations can be performed by the caller. Otherwise
1548 * if 0 is returned the client was destroied (i.e. after QUIT). */
1549static int processCommand(redisClient *c) {
1550 struct redisCommand *cmd;
1551 long long dirty;
1552
3fd78bcd 1553 /* Free some memory if needed (maxmemory setting) */
1554 if (server.maxmemory) freeMemoryIfNeeded();
1555
e8a74421 1556 /* Handle the multi bulk command type. This is an alternative protocol
1557 * supported by Redis in order to receive commands that are composed of
1558 * multiple binary-safe "bulk" arguments. The latency of processing is
1559 * a bit higher but this allows things like multi-sets, so if this
1560 * protocol is used only for MSET and similar commands this is a big win. */
1561 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1562 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1563 if (c->multibulk <= 0) {
1564 resetClient(c);
1565 return 1;
1566 } else {
1567 decrRefCount(c->argv[c->argc-1]);
1568 c->argc--;
1569 return 1;
1570 }
1571 } else if (c->multibulk) {
1572 if (c->bulklen == -1) {
1573 if (((char*)c->argv[0]->ptr)[0] != '$') {
1574 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1575 resetClient(c);
1576 return 1;
1577 } else {
1578 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1579 decrRefCount(c->argv[0]);
1580 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1581 c->argc--;
1582 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1583 resetClient(c);
1584 return 1;
1585 }
1586 c->argc--;
1587 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1588 return 1;
1589 }
1590 } else {
1591 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1592 c->mbargv[c->mbargc] = c->argv[0];
1593 c->mbargc++;
1594 c->argc--;
1595 c->multibulk--;
1596 if (c->multibulk == 0) {
1597 robj **auxargv;
1598 int auxargc;
1599
1600 /* Here we need to swap the multi-bulk argc/argv with the
1601 * normal argc/argv of the client structure. */
1602 auxargv = c->argv;
1603 c->argv = c->mbargv;
1604 c->mbargv = auxargv;
1605
1606 auxargc = c->argc;
1607 c->argc = c->mbargc;
1608 c->mbargc = auxargc;
1609
1610 /* We need to set bulklen to something different than -1
1611 * in order for the code below to process the command without
1612 * to try to read the last argument of a bulk command as
1613 * a special argument. */
1614 c->bulklen = 0;
1615 /* continue below and process the command */
1616 } else {
1617 c->bulklen = -1;
1618 return 1;
1619 }
1620 }
1621 }
1622 /* -- end of multi bulk commands processing -- */
1623
ed9b544e 1624 /* The QUIT command is handled as a special case. Normal command
1625 * procs are unable to close the client connection safely */
bb0b03a3 1626 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 1627 freeClient(c);
1628 return 0;
1629 }
1630 cmd = lookupCommand(c->argv[0]->ptr);
1631 if (!cmd) {
1632 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1633 resetClient(c);
1634 return 1;
1635 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1636 (c->argc < -cmd->arity)) {
1637 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
1638 resetClient(c);
1639 return 1;
3fd78bcd 1640 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1641 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1642 resetClient(c);
1643 return 1;
ed9b544e 1644 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1645 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1646
1647 decrRefCount(c->argv[c->argc-1]);
1648 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1649 c->argc--;
1650 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1651 resetClient(c);
1652 return 1;
1653 }
1654 c->argc--;
1655 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1656 /* It is possible that the bulk read is already in the
8d0490e7 1657 * buffer. Check this condition and handle it accordingly.
1658 * This is just a fast path, alternative to call processInputBuffer().
1659 * It's a good idea since the code is small and this condition
1660 * happens most of the times. */
ed9b544e 1661 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1662 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1663 c->argc++;
1664 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1665 } else {
1666 return 1;
1667 }
1668 }
10c43610 1669 /* Let's try to share objects on the command arguments vector */
1670 if (server.shareobjects) {
1671 int j;
1672 for(j = 1; j < c->argc; j++)
1673 c->argv[j] = tryObjectSharing(c->argv[j]);
1674 }
942a3961 1675 /* Let's try to encode the bulk object to save space. */
1676 if (cmd->flags & REDIS_CMD_BULK)
1677 tryObjectEncoding(c->argv[c->argc-1]);
1678
e63943a4 1679 /* Check if the user is authenticated */
1680 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1681 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1682 resetClient(c);
1683 return 1;
1684 }
1685
ed9b544e 1686 /* Exec the command */
1687 dirty = server.dirty;
1688 cmd->proc(c);
33ed1a42 1689 if (server.appendonly && server.dirty-dirty)
44b38ef4 1690 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
33ed1a42 1691 if (server.dirty-dirty && listLength(server.slaves))
3305306f 1692 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
87eca727 1693 if (listLength(server.monitors))
3305306f 1694 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
ed9b544e 1695 server.stat_numcommands++;
1696
1697 /* Prepare the client for the next command */
1698 if (c->flags & REDIS_CLOSE) {
1699 freeClient(c);
1700 return 0;
1701 }
1702 resetClient(c);
1703 return 1;
1704}
1705
87eca727 1706static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6208b3a7 1707 listNode *ln;
ed9b544e 1708 int outc = 0, j;
93ea3759 1709 robj **outv;
1710 /* (args*2)+1 is enough room for args, spaces, newlines */
1711 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1712
1713 if (argc <= REDIS_STATIC_ARGS) {
1714 outv = static_outv;
1715 } else {
1716 outv = zmalloc(sizeof(robj*)*(argc*2+1));
93ea3759 1717 }
ed9b544e 1718
1719 for (j = 0; j < argc; j++) {
1720 if (j != 0) outv[outc++] = shared.space;
1721 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1722 robj *lenobj;
1723
1724 lenobj = createObject(REDIS_STRING,
0ea663ea 1725 sdscatprintf(sdsempty(),"%d\r\n",
1726 stringObjectLen(argv[j])));
ed9b544e 1727 lenobj->refcount = 0;
1728 outv[outc++] = lenobj;
1729 }
1730 outv[outc++] = argv[j];
1731 }
1732 outv[outc++] = shared.crlf;
1733
40d224a9 1734 /* Increment all the refcounts at start and decrement at end in order to
1735 * be sure to free objects if there is no slave in a replication state
1736 * able to be feed with commands */
1737 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
6208b3a7 1738 listRewind(slaves);
1739 while((ln = listYield(slaves))) {
ed9b544e 1740 redisClient *slave = ln->value;
40d224a9 1741
1742 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 1743 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 1744
1745 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 1746 if (slave->slaveseldb != dictid) {
1747 robj *selectcmd;
1748
1749 switch(dictid) {
1750 case 0: selectcmd = shared.select0; break;
1751 case 1: selectcmd = shared.select1; break;
1752 case 2: selectcmd = shared.select2; break;
1753 case 3: selectcmd = shared.select3; break;
1754 case 4: selectcmd = shared.select4; break;
1755 case 5: selectcmd = shared.select5; break;
1756 case 6: selectcmd = shared.select6; break;
1757 case 7: selectcmd = shared.select7; break;
1758 case 8: selectcmd = shared.select8; break;
1759 case 9: selectcmd = shared.select9; break;
1760 default:
1761 selectcmd = createObject(REDIS_STRING,
1762 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1763 selectcmd->refcount = 0;
1764 break;
1765 }
1766 addReply(slave,selectcmd);
1767 slave->slaveseldb = dictid;
1768 }
1769 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 1770 }
40d224a9 1771 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 1772 if (outv != static_outv) zfree(outv);
ed9b544e 1773}
1774
638e42ac 1775static void processInputBuffer(redisClient *c) {
ed9b544e 1776again:
1777 if (c->bulklen == -1) {
1778 /* Read the first line of the query */
1779 char *p = strchr(c->querybuf,'\n');
1780 size_t querylen;
644fafa3 1781
ed9b544e 1782 if (p) {
1783 sds query, *argv;
1784 int argc, j;
1785
1786 query = c->querybuf;
1787 c->querybuf = sdsempty();
1788 querylen = 1+(p-(query));
1789 if (sdslen(query) > querylen) {
1790 /* leave data after the first line of the query in the buffer */
1791 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1792 }
1793 *p = '\0'; /* remove "\n" */
1794 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1795 sdsupdatelen(query);
1796
1797 /* Now we can split the query in arguments */
1798 if (sdslen(query) == 0) {
1799 /* Ignore empty query */
1800 sdsfree(query);
1801 return;
1802 }
1803 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 1804 sdsfree(query);
1805
1806 if (c->argv) zfree(c->argv);
1807 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 1808
1809 for (j = 0; j < argc; j++) {
ed9b544e 1810 if (sdslen(argv[j])) {
1811 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1812 c->argc++;
1813 } else {
1814 sdsfree(argv[j]);
1815 }
1816 }
1817 zfree(argv);
1818 /* Execute the command. If the client is still valid
1819 * after processCommand() return and there is something
1820 * on the query buffer try to process the next command. */
af807d87 1821 if (c->argc && processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1822 return;
644fafa3 1823 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
ed9b544e 1824 redisLog(REDIS_DEBUG, "Client protocol error");
1825 freeClient(c);
1826 return;
1827 }
1828 } else {
1829 /* Bulk read handling. Note that if we are at this point
1830 the client already sent a command terminated with a newline,
1831 we are reading the bulk data that is actually the last
1832 argument of the command. */
1833 int qbl = sdslen(c->querybuf);
1834
1835 if (c->bulklen <= qbl) {
1836 /* Copy everything but the final CRLF as final argument */
1837 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1838 c->argc++;
1839 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 1840 /* Process the command. If the client is still valid after
1841 * the processing and there is more data in the buffer
1842 * try to parse it. */
1843 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1844 return;
1845 }
1846 }
1847}
1848
638e42ac 1849static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1850 redisClient *c = (redisClient*) privdata;
1851 char buf[REDIS_IOBUF_LEN];
1852 int nread;
1853 REDIS_NOTUSED(el);
1854 REDIS_NOTUSED(mask);
1855
1856 nread = read(fd, buf, REDIS_IOBUF_LEN);
1857 if (nread == -1) {
1858 if (errno == EAGAIN) {
1859 nread = 0;
1860 } else {
1861 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1862 freeClient(c);
1863 return;
1864 }
1865 } else if (nread == 0) {
1866 redisLog(REDIS_DEBUG, "Client closed connection");
1867 freeClient(c);
1868 return;
1869 }
1870 if (nread) {
1871 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1872 c->lastinteraction = time(NULL);
1873 } else {
1874 return;
1875 }
1876 processInputBuffer(c);
1877}
1878
ed9b544e 1879static int selectDb(redisClient *c, int id) {
1880 if (id < 0 || id >= server.dbnum)
1881 return REDIS_ERR;
3305306f 1882 c->db = &server.db[id];
ed9b544e 1883 return REDIS_OK;
1884}
1885
40d224a9 1886static void *dupClientReplyValue(void *o) {
1887 incrRefCount((robj*)o);
1888 return 0;
1889}
1890
ed9b544e 1891static redisClient *createClient(int fd) {
1892 redisClient *c = zmalloc(sizeof(*c));
1893
1894 anetNonBlock(NULL,fd);
1895 anetTcpNoDelay(NULL,fd);
1896 if (!c) return NULL;
1897 selectDb(c,0);
1898 c->fd = fd;
1899 c->querybuf = sdsempty();
1900 c->argc = 0;
93ea3759 1901 c->argv = NULL;
ed9b544e 1902 c->bulklen = -1;
e8a74421 1903 c->multibulk = 0;
1904 c->mbargc = 0;
1905 c->mbargv = NULL;
ed9b544e 1906 c->sentlen = 0;
1907 c->flags = 0;
1908 c->lastinteraction = time(NULL);
abcb223e 1909 c->authenticated = 0;
40d224a9 1910 c->replstate = REDIS_REPL_NONE;
6b47e12e 1911 c->reply = listCreate();
ed9b544e 1912 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 1913 listSetDupMethod(c->reply,dupClientReplyValue);
ed9b544e 1914 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1915 readQueryFromClient, c, NULL) == AE_ERR) {
1916 freeClient(c);
1917 return NULL;
1918 }
6b47e12e 1919 listAddNodeTail(server.clients,c);
ed9b544e 1920 return c;
1921}
1922
1923static void addReply(redisClient *c, robj *obj) {
1924 if (listLength(c->reply) == 0 &&
6208b3a7 1925 (c->replstate == REDIS_REPL_NONE ||
1926 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 1927 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
1928 sendReplyToClient, c, NULL) == AE_ERR) return;
942a3961 1929 if (obj->encoding != REDIS_ENCODING_RAW) {
1930 obj = getDecodedObject(obj);
1931 } else {
1932 incrRefCount(obj);
1933 }
6b47e12e 1934 listAddNodeTail(c->reply,obj);
ed9b544e 1935}
1936
1937static void addReplySds(redisClient *c, sds s) {
1938 robj *o = createObject(REDIS_STRING,s);
1939 addReply(c,o);
1940 decrRefCount(o);
1941}
1942
e2665397 1943static void addReplyDouble(redisClient *c, double d) {
1944 char buf[128];
1945
1946 snprintf(buf,sizeof(buf),"%.17g",d);
1947 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
1948 strlen(buf),buf));
1949}
1950
942a3961 1951static void addReplyBulkLen(redisClient *c, robj *obj) {
1952 size_t len;
1953
1954 if (obj->encoding == REDIS_ENCODING_RAW) {
1955 len = sdslen(obj->ptr);
1956 } else {
1957 long n = (long)obj->ptr;
1958
1959 len = 1;
1960 if (n < 0) {
1961 len++;
1962 n = -n;
1963 }
1964 while((n = n/10) != 0) {
1965 len++;
1966 }
1967 }
1968 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",len));
1969}
1970
ed9b544e 1971static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
1972 int cport, cfd;
1973 char cip[128];
285add55 1974 redisClient *c;
ed9b544e 1975 REDIS_NOTUSED(el);
1976 REDIS_NOTUSED(mask);
1977 REDIS_NOTUSED(privdata);
1978
1979 cfd = anetAccept(server.neterr, fd, cip, &cport);
1980 if (cfd == AE_ERR) {
1981 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
1982 return;
1983 }
1984 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
285add55 1985 if ((c = createClient(cfd)) == NULL) {
ed9b544e 1986 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
1987 close(cfd); /* May be already closed, just ingore errors */
1988 return;
1989 }
285add55 1990 /* If maxclient directive is set and this is one client more... close the
1991 * connection. Note that we create the client instead to check before
1992 * for this condition, since now the socket is already set in nonblocking
1993 * mode and we can send an error for free using the Kernel I/O */
1994 if (server.maxclients && listLength(server.clients) > server.maxclients) {
1995 char *err = "-ERR max number of clients reached\r\n";
1996
1997 /* That's a best effort error message, don't check write errors */
fee803ba 1998 if (write(c->fd,err,strlen(err)) == -1) {
1999 /* Nothing to do, Just to avoid the warning... */
2000 }
285add55 2001 freeClient(c);
2002 return;
2003 }
ed9b544e 2004 server.stat_numconnections++;
2005}
2006
2007/* ======================= Redis objects implementation ===================== */
2008
2009static robj *createObject(int type, void *ptr) {
2010 robj *o;
2011
2012 if (listLength(server.objfreelist)) {
2013 listNode *head = listFirst(server.objfreelist);
2014 o = listNodeValue(head);
2015 listDelNode(server.objfreelist,head);
2016 } else {
2017 o = zmalloc(sizeof(*o));
2018 }
ed9b544e 2019 o->type = type;
942a3961 2020 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 2021 o->ptr = ptr;
2022 o->refcount = 1;
2023 return o;
2024}
2025
2026static robj *createStringObject(char *ptr, size_t len) {
2027 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
2028}
2029
2030static robj *createListObject(void) {
2031 list *l = listCreate();
2032
ed9b544e 2033 listSetFreeMethod(l,decrRefCount);
2034 return createObject(REDIS_LIST,l);
2035}
2036
2037static robj *createSetObject(void) {
2038 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 2039 return createObject(REDIS_SET,d);
2040}
2041
1812e024 2042static robj *createZsetObject(void) {
6b47e12e 2043 zset *zs = zmalloc(sizeof(*zs));
2044
2045 zs->dict = dictCreate(&zsetDictType,NULL);
2046 zs->zsl = zslCreate();
2047 return createObject(REDIS_ZSET,zs);
1812e024 2048}
2049
ed9b544e 2050static void freeStringObject(robj *o) {
942a3961 2051 if (o->encoding == REDIS_ENCODING_RAW) {
2052 sdsfree(o->ptr);
2053 }
ed9b544e 2054}
2055
2056static void freeListObject(robj *o) {
2057 listRelease((list*) o->ptr);
2058}
2059
2060static void freeSetObject(robj *o) {
2061 dictRelease((dict*) o->ptr);
2062}
2063
fd8ccf44 2064static void freeZsetObject(robj *o) {
2065 zset *zs = o->ptr;
2066
2067 dictRelease(zs->dict);
2068 zslFree(zs->zsl);
2069 zfree(zs);
2070}
2071
ed9b544e 2072static void freeHashObject(robj *o) {
2073 dictRelease((dict*) o->ptr);
2074}
2075
2076static void incrRefCount(robj *o) {
2077 o->refcount++;
94754ccc 2078#ifdef DEBUG_REFCOUNT
2079 if (o->type == REDIS_STRING)
2080 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
2081#endif
ed9b544e 2082}
2083
2084static void decrRefCount(void *obj) {
2085 robj *o = obj;
94754ccc 2086
2087#ifdef DEBUG_REFCOUNT
2088 if (o->type == REDIS_STRING)
2089 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
2090#endif
ed9b544e 2091 if (--(o->refcount) == 0) {
2092 switch(o->type) {
2093 case REDIS_STRING: freeStringObject(o); break;
2094 case REDIS_LIST: freeListObject(o); break;
2095 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 2096 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 2097 case REDIS_HASH: freeHashObject(o); break;
2098 default: assert(0 != 0); break;
2099 }
2100 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2101 !listAddNodeHead(server.objfreelist,o))
2102 zfree(o);
2103 }
2104}
2105
942a3961 2106static robj *lookupKey(redisDb *db, robj *key) {
2107 dictEntry *de = dictFind(db->dict,key);
2108 return de ? dictGetEntryVal(de) : NULL;
2109}
2110
2111static robj *lookupKeyRead(redisDb *db, robj *key) {
2112 expireIfNeeded(db,key);
2113 return lookupKey(db,key);
2114}
2115
2116static robj *lookupKeyWrite(redisDb *db, robj *key) {
2117 deleteIfVolatile(db,key);
2118 return lookupKey(db,key);
2119}
2120
2121static int deleteKey(redisDb *db, robj *key) {
2122 int retval;
2123
2124 /* We need to protect key from destruction: after the first dictDelete()
2125 * it may happen that 'key' is no longer valid if we don't increment
2126 * it's count. This may happen when we get the object reference directly
2127 * from the hash table with dictRandomKey() or dict iterators */
2128 incrRefCount(key);
2129 if (dictSize(db->expires)) dictDelete(db->expires,key);
2130 retval = dictDelete(db->dict,key);
2131 decrRefCount(key);
2132
2133 return retval == DICT_OK;
2134}
2135
10c43610 2136/* Try to share an object against the shared objects pool */
2137static robj *tryObjectSharing(robj *o) {
2138 struct dictEntry *de;
2139 unsigned long c;
2140
3305306f 2141 if (o == NULL || server.shareobjects == 0) return o;
10c43610 2142
2143 assert(o->type == REDIS_STRING);
2144 de = dictFind(server.sharingpool,o);
2145 if (de) {
2146 robj *shared = dictGetEntryKey(de);
2147
2148 c = ((unsigned long) dictGetEntryVal(de))+1;
2149 dictGetEntryVal(de) = (void*) c;
2150 incrRefCount(shared);
2151 decrRefCount(o);
2152 return shared;
2153 } else {
2154 /* Here we are using a stream algorihtm: Every time an object is
2155 * shared we increment its count, everytime there is a miss we
2156 * recrement the counter of a random object. If this object reaches
2157 * zero we remove the object and put the current object instead. */
3305306f 2158 if (dictSize(server.sharingpool) >=
10c43610 2159 server.sharingpoolsize) {
2160 de = dictGetRandomKey(server.sharingpool);
2161 assert(de != NULL);
2162 c = ((unsigned long) dictGetEntryVal(de))-1;
2163 dictGetEntryVal(de) = (void*) c;
2164 if (c == 0) {
2165 dictDelete(server.sharingpool,de->key);
2166 }
2167 } else {
2168 c = 0; /* If the pool is empty we want to add this object */
2169 }
2170 if (c == 0) {
2171 int retval;
2172
2173 retval = dictAdd(server.sharingpool,o,(void*)1);
2174 assert(retval == DICT_OK);
2175 incrRefCount(o);
2176 }
2177 return o;
2178 }
2179}
2180
724a51b1 2181/* Check if the nul-terminated string 's' can be represented by a long
2182 * (that is, is a number that fits into long without any other space or
2183 * character before or after the digits).
2184 *
2185 * If so, the function returns REDIS_OK and *longval is set to the value
2186 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 2187static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 2188 char buf[32], *endptr;
2189 long value;
2190 int slen;
2191
2192 value = strtol(s, &endptr, 10);
2193 if (endptr[0] != '\0') return REDIS_ERR;
2194 slen = snprintf(buf,32,"%ld",value);
2195
2196 /* If the number converted back into a string is not identical
2197 * then it's not possible to encode the string as integer */
f69f2cba 2198 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 2199 if (longval) *longval = value;
2200 return REDIS_OK;
2201}
2202
942a3961 2203/* Try to encode a string object in order to save space */
2204static int tryObjectEncoding(robj *o) {
2205 long value;
942a3961 2206 sds s = o->ptr;
3305306f 2207
942a3961 2208 if (o->encoding != REDIS_ENCODING_RAW)
2209 return REDIS_ERR; /* Already encoded */
3305306f 2210
942a3961 2211 /* It's not save to encode shared objects: shared objects can be shared
2212 * everywhere in the "object space" of Redis. Encoded objects can only
2213 * appear as "values" (and not, for instance, as keys) */
2214 if (o->refcount > 1) return REDIS_ERR;
3305306f 2215
942a3961 2216 /* Currently we try to encode only strings */
2217 assert(o->type == REDIS_STRING);
94754ccc 2218
724a51b1 2219 /* Check if we can represent this string as a long integer */
2220 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
942a3961 2221
2222 /* Ok, this object can be encoded */
2223 o->encoding = REDIS_ENCODING_INT;
2224 sdsfree(o->ptr);
2225 o->ptr = (void*) value;
2226 return REDIS_OK;
2227}
2228
2229/* Get a decoded version of an encoded object (returned as a new object) */
2230static robj *getDecodedObject(const robj *o) {
2231 robj *dec;
2232
2233 assert(o->encoding != REDIS_ENCODING_RAW);
2234 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2235 char buf[32];
2236
2237 snprintf(buf,32,"%ld",(long)o->ptr);
2238 dec = createStringObject(buf,strlen(buf));
2239 return dec;
2240 } else {
2241 assert(1 != 1);
2242 }
3305306f 2243}
2244
d7f43c08 2245/* Compare two string objects via strcmp() or alike.
2246 * Note that the objects may be integer-encoded. In such a case we
2247 * use snprintf() to get a string representation of the numbers on the stack
2248 * and compare the strings, it's much faster than calling getDecodedObject(). */
724a51b1 2249static int compareStringObjects(robj *a, robj *b) {
2250 assert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 2251 char bufa[128], bufb[128], *astr, *bstr;
2252 int bothsds = 1;
724a51b1 2253
e197b441 2254 if (a == b) return 0;
d7f43c08 2255 if (a->encoding != REDIS_ENCODING_RAW) {
2256 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2257 astr = bufa;
2258 bothsds = 0;
724a51b1 2259 } else {
d7f43c08 2260 astr = a->ptr;
724a51b1 2261 }
d7f43c08 2262 if (b->encoding != REDIS_ENCODING_RAW) {
2263 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2264 bstr = bufb;
2265 bothsds = 0;
2266 } else {
2267 bstr = b->ptr;
2268 }
2269 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 2270}
2271
0ea663ea 2272static size_t stringObjectLen(robj *o) {
2273 assert(o->type == REDIS_STRING);
2274 if (o->encoding == REDIS_ENCODING_RAW) {
2275 return sdslen(o->ptr);
2276 } else {
2277 char buf[32];
2278
2279 return snprintf(buf,32,"%ld",(long)o->ptr);
2280 }
2281}
2282
ed9b544e 2283/*============================ DB saving/loading ============================ */
2284
f78fd11b 2285static int rdbSaveType(FILE *fp, unsigned char type) {
2286 if (fwrite(&type,1,1,fp) == 0) return -1;
2287 return 0;
2288}
2289
bb32ede5 2290static int rdbSaveTime(FILE *fp, time_t t) {
2291 int32_t t32 = (int32_t) t;
2292 if (fwrite(&t32,4,1,fp) == 0) return -1;
2293 return 0;
2294}
2295
e3566d4b 2296/* check rdbLoadLen() comments for more info */
f78fd11b 2297static int rdbSaveLen(FILE *fp, uint32_t len) {
2298 unsigned char buf[2];
2299
2300 if (len < (1<<6)) {
2301 /* Save a 6 bit len */
10c43610 2302 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 2303 if (fwrite(buf,1,1,fp) == 0) return -1;
2304 } else if (len < (1<<14)) {
2305 /* Save a 14 bit len */
10c43610 2306 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 2307 buf[1] = len&0xFF;
17be1a4a 2308 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 2309 } else {
2310 /* Save a 32 bit len */
10c43610 2311 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 2312 if (fwrite(buf,1,1,fp) == 0) return -1;
2313 len = htonl(len);
2314 if (fwrite(&len,4,1,fp) == 0) return -1;
2315 }
2316 return 0;
2317}
2318
e3566d4b 2319/* String objects in the form "2391" "-100" without any space and with a
2320 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2321 * encoded as integers to save space */
56906eef 2322static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
e3566d4b 2323 long long value;
2324 char *endptr, buf[32];
2325
2326 /* Check if it's possible to encode this value as a number */
2327 value = strtoll(s, &endptr, 10);
2328 if (endptr[0] != '\0') return 0;
2329 snprintf(buf,32,"%lld",value);
2330
2331 /* If the number converted back into a string is not identical
2332 * then it's not possible to encode the string as integer */
2333 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2334
2335 /* Finally check if it fits in our ranges */
2336 if (value >= -(1<<7) && value <= (1<<7)-1) {
2337 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2338 enc[1] = value&0xFF;
2339 return 2;
2340 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2341 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2342 enc[1] = value&0xFF;
2343 enc[2] = (value>>8)&0xFF;
2344 return 3;
2345 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2346 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2347 enc[1] = value&0xFF;
2348 enc[2] = (value>>8)&0xFF;
2349 enc[3] = (value>>16)&0xFF;
2350 enc[4] = (value>>24)&0xFF;
2351 return 5;
2352 } else {
2353 return 0;
2354 }
2355}
2356
774e3047 2357static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2358 unsigned int comprlen, outlen;
2359 unsigned char byte;
2360 void *out;
2361
2362 /* We require at least four bytes compression for this to be worth it */
2363 outlen = sdslen(obj->ptr)-4;
2364 if (outlen <= 0) return 0;
3a2694c4 2365 if ((out = zmalloc(outlen+1)) == NULL) return 0;
774e3047 2366 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2367 if (comprlen == 0) {
88e85998 2368 zfree(out);
774e3047 2369 return 0;
2370 }
2371 /* Data compressed! Let's save it on disk */
2372 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2373 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2374 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2375 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2376 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 2377 zfree(out);
774e3047 2378 return comprlen;
2379
2380writeerr:
88e85998 2381 zfree(out);
774e3047 2382 return -1;
2383}
2384
e3566d4b 2385/* Save a string objet as [len][data] on disk. If the object is a string
2386 * representation of an integer value we try to safe it in a special form */
942a3961 2387static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2388 size_t len;
e3566d4b 2389 int enclen;
10c43610 2390
942a3961 2391 len = sdslen(obj->ptr);
2392
774e3047 2393 /* Try integer encoding */
e3566d4b 2394 if (len <= 11) {
2395 unsigned char buf[5];
2396 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2397 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2398 return 0;
2399 }
2400 }
774e3047 2401
2402 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 2403 * aaaaaaaaaaaaaaaaaa so skip it */
942a3961 2404 if (len > 20) {
774e3047 2405 int retval;
2406
2407 retval = rdbSaveLzfStringObject(fp,obj);
2408 if (retval == -1) return -1;
2409 if (retval > 0) return 0;
2410 /* retval == 0 means data can't be compressed, save the old way */
2411 }
2412
2413 /* Store verbatim */
10c43610 2414 if (rdbSaveLen(fp,len) == -1) return -1;
2415 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2416 return 0;
2417}
2418
942a3961 2419/* Like rdbSaveStringObjectRaw() but handle encoded objects */
2420static int rdbSaveStringObject(FILE *fp, robj *obj) {
2421 int retval;
2422 robj *dec;
2423
2424 if (obj->encoding != REDIS_ENCODING_RAW) {
2425 dec = getDecodedObject(obj);
2426 retval = rdbSaveStringObjectRaw(fp,dec);
2427 decrRefCount(dec);
2428 return retval;
2429 } else {
2430 return rdbSaveStringObjectRaw(fp,obj);
2431 }
2432}
2433
a7866db6 2434/* Save a double value. Doubles are saved as strings prefixed by an unsigned
2435 * 8 bit integer specifing the length of the representation.
2436 * This 8 bit integer has special values in order to specify the following
2437 * conditions:
2438 * 253: not a number
2439 * 254: + inf
2440 * 255: - inf
2441 */
2442static int rdbSaveDoubleValue(FILE *fp, double val) {
2443 unsigned char buf[128];
2444 int len;
2445
2446 if (isnan(val)) {
2447 buf[0] = 253;
2448 len = 1;
2449 } else if (!isfinite(val)) {
2450 len = 1;
2451 buf[0] = (val < 0) ? 255 : 254;
2452 } else {
eaa256ad 2453 snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
a7866db6 2454 buf[0] = strlen((char*)buf);
2455 len = buf[0]+1;
2456 }
2457 if (fwrite(buf,len,1,fp) == 0) return -1;
2458 return 0;
2459}
2460
ed9b544e 2461/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 2462static int rdbSave(char *filename) {
ed9b544e 2463 dictIterator *di = NULL;
2464 dictEntry *de;
ed9b544e 2465 FILE *fp;
2466 char tmpfile[256];
2467 int j;
bb32ede5 2468 time_t now = time(NULL);
ed9b544e 2469
a3b21203 2470 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 2471 fp = fopen(tmpfile,"w");
2472 if (!fp) {
2473 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2474 return REDIS_ERR;
2475 }
f78fd11b 2476 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 2477 for (j = 0; j < server.dbnum; j++) {
bb32ede5 2478 redisDb *db = server.db+j;
2479 dict *d = db->dict;
3305306f 2480 if (dictSize(d) == 0) continue;
ed9b544e 2481 di = dictGetIterator(d);
2482 if (!di) {
2483 fclose(fp);
2484 return REDIS_ERR;
2485 }
2486
2487 /* Write the SELECT DB opcode */
f78fd11b 2488 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2489 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 2490
2491 /* Iterate this DB writing every entry */
2492 while((de = dictNext(di)) != NULL) {
2493 robj *key = dictGetEntryKey(de);
2494 robj *o = dictGetEntryVal(de);
bb32ede5 2495 time_t expiretime = getExpire(db,key);
2496
2497 /* Save the expire time */
2498 if (expiretime != -1) {
2499 /* If this key is already expired skip it */
2500 if (expiretime < now) continue;
2501 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2502 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2503 }
2504 /* Save the key and associated value */
f78fd11b 2505 if (rdbSaveType(fp,o->type) == -1) goto werr;
10c43610 2506 if (rdbSaveStringObject(fp,key) == -1) goto werr;
f78fd11b 2507 if (o->type == REDIS_STRING) {
ed9b544e 2508 /* Save a string value */
10c43610 2509 if (rdbSaveStringObject(fp,o) == -1) goto werr;
f78fd11b 2510 } else if (o->type == REDIS_LIST) {
ed9b544e 2511 /* Save a list value */
2512 list *list = o->ptr;
6208b3a7 2513 listNode *ln;
ed9b544e 2514
6208b3a7 2515 listRewind(list);
f78fd11b 2516 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
6208b3a7 2517 while((ln = listYield(list))) {
ed9b544e 2518 robj *eleobj = listNodeValue(ln);
f78fd11b 2519
10c43610 2520 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2521 }
f78fd11b 2522 } else if (o->type == REDIS_SET) {
ed9b544e 2523 /* Save a set value */
2524 dict *set = o->ptr;
2525 dictIterator *di = dictGetIterator(set);
2526 dictEntry *de;
2527
3305306f 2528 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
ed9b544e 2529 while((de = dictNext(di)) != NULL) {
10c43610 2530 robj *eleobj = dictGetEntryKey(de);
ed9b544e 2531
10c43610 2532 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2533 }
2534 dictReleaseIterator(di);
2b59cfdf 2535 } else if (o->type == REDIS_ZSET) {
2536 /* Save a set value */
2537 zset *zs = o->ptr;
2538 dictIterator *di = dictGetIterator(zs->dict);
2539 dictEntry *de;
2540
2541 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
2542 while((de = dictNext(di)) != NULL) {
2543 robj *eleobj = dictGetEntryKey(de);
2544 double *score = dictGetEntryVal(de);
2545
2546 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2547 if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
2548 }
2549 dictReleaseIterator(di);
ed9b544e 2550 } else {
2551 assert(0 != 0);
2552 }
2553 }
2554 dictReleaseIterator(di);
2555 }
2556 /* EOF opcode */
f78fd11b 2557 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2558
2559 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 2560 fflush(fp);
2561 fsync(fileno(fp));
2562 fclose(fp);
2563
2564 /* Use RENAME to make sure the DB file is changed atomically only
2565 * if the generate DB file is ok. */
2566 if (rename(tmpfile,filename) == -1) {
325d1eb4 2567 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 2568 unlink(tmpfile);
2569 return REDIS_ERR;
2570 }
2571 redisLog(REDIS_NOTICE,"DB saved on disk");
2572 server.dirty = 0;
2573 server.lastsave = time(NULL);
2574 return REDIS_OK;
2575
2576werr:
2577 fclose(fp);
2578 unlink(tmpfile);
2579 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2580 if (di) dictReleaseIterator(di);
2581 return REDIS_ERR;
2582}
2583
f78fd11b 2584static int rdbSaveBackground(char *filename) {
ed9b544e 2585 pid_t childpid;
2586
2587 if (server.bgsaveinprogress) return REDIS_ERR;
2588 if ((childpid = fork()) == 0) {
2589 /* Child */
2590 close(server.fd);
f78fd11b 2591 if (rdbSave(filename) == REDIS_OK) {
ed9b544e 2592 exit(0);
2593 } else {
2594 exit(1);
2595 }
2596 } else {
2597 /* Parent */
5a7c647e 2598 if (childpid == -1) {
2599 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2600 strerror(errno));
2601 return REDIS_ERR;
2602 }
ed9b544e 2603 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
2604 server.bgsaveinprogress = 1;
9f3c422c 2605 server.bgsavechildpid = childpid;
ed9b544e 2606 return REDIS_OK;
2607 }
2608 return REDIS_OK; /* unreached */
2609}
2610
a3b21203 2611static void rdbRemoveTempFile(pid_t childpid) {
2612 char tmpfile[256];
2613
2614 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2615 unlink(tmpfile);
2616}
2617
f78fd11b 2618static int rdbLoadType(FILE *fp) {
2619 unsigned char type;
7b45bfb2 2620 if (fread(&type,1,1,fp) == 0) return -1;
2621 return type;
2622}
2623
bb32ede5 2624static time_t rdbLoadTime(FILE *fp) {
2625 int32_t t32;
2626 if (fread(&t32,4,1,fp) == 0) return -1;
2627 return (time_t) t32;
2628}
2629
e3566d4b 2630/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
2631 * of this file for a description of how this are stored on disk.
2632 *
2633 * isencoded is set to 1 if the readed length is not actually a length but
2634 * an "encoding type", check the above comments for more info */
2635static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
f78fd11b 2636 unsigned char buf[2];
2637 uint32_t len;
2638
e3566d4b 2639 if (isencoded) *isencoded = 0;
f78fd11b 2640 if (rdbver == 0) {
2641 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2642 return ntohl(len);
2643 } else {
17be1a4a 2644 int type;
2645
f78fd11b 2646 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
17be1a4a 2647 type = (buf[0]&0xC0)>>6;
2648 if (type == REDIS_RDB_6BITLEN) {
f78fd11b 2649 /* Read a 6 bit len */
e3566d4b 2650 return buf[0]&0x3F;
2651 } else if (type == REDIS_RDB_ENCVAL) {
2652 /* Read a 6 bit len encoding type */
2653 if (isencoded) *isencoded = 1;
2654 return buf[0]&0x3F;
17be1a4a 2655 } else if (type == REDIS_RDB_14BITLEN) {
f78fd11b 2656 /* Read a 14 bit len */
2657 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
2658 return ((buf[0]&0x3F)<<8)|buf[1];
2659 } else {
2660 /* Read a 32 bit len */
2661 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2662 return ntohl(len);
2663 }
2664 }
f78fd11b 2665}
2666
e3566d4b 2667static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
2668 unsigned char enc[4];
2669 long long val;
2670
2671 if (enctype == REDIS_RDB_ENC_INT8) {
2672 if (fread(enc,1,1,fp) == 0) return NULL;
2673 val = (signed char)enc[0];
2674 } else if (enctype == REDIS_RDB_ENC_INT16) {
2675 uint16_t v;
2676 if (fread(enc,2,1,fp) == 0) return NULL;
2677 v = enc[0]|(enc[1]<<8);
2678 val = (int16_t)v;
2679 } else if (enctype == REDIS_RDB_ENC_INT32) {
2680 uint32_t v;
2681 if (fread(enc,4,1,fp) == 0) return NULL;
2682 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2683 val = (int32_t)v;
2684 } else {
2685 val = 0; /* anti-warning */
2686 assert(0!=0);
2687 }
2688 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2689}
2690
88e85998 2691static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2692 unsigned int len, clen;
2693 unsigned char *c = NULL;
2694 sds val = NULL;
2695
2696 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2697 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2698 if ((c = zmalloc(clen)) == NULL) goto err;
2699 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2700 if (fread(c,clen,1,fp) == 0) goto err;
2701 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 2702 zfree(c);
88e85998 2703 return createObject(REDIS_STRING,val);
2704err:
2705 zfree(c);
2706 sdsfree(val);
2707 return NULL;
2708}
2709
e3566d4b 2710static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2711 int isencoded;
2712 uint32_t len;
f78fd11b 2713 sds val;
2714
e3566d4b 2715 len = rdbLoadLen(fp,rdbver,&isencoded);
2716 if (isencoded) {
2717 switch(len) {
2718 case REDIS_RDB_ENC_INT8:
2719 case REDIS_RDB_ENC_INT16:
2720 case REDIS_RDB_ENC_INT32:
3305306f 2721 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
88e85998 2722 case REDIS_RDB_ENC_LZF:
2723 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
e3566d4b 2724 default:
2725 assert(0!=0);
2726 }
2727 }
2728
f78fd11b 2729 if (len == REDIS_RDB_LENERR) return NULL;
2730 val = sdsnewlen(NULL,len);
2731 if (len && fread(val,len,1,fp) == 0) {
2732 sdsfree(val);
2733 return NULL;
2734 }
10c43610 2735 return tryObjectSharing(createObject(REDIS_STRING,val));
f78fd11b 2736}
2737
a7866db6 2738/* For information about double serialization check rdbSaveDoubleValue() */
2739static int rdbLoadDoubleValue(FILE *fp, double *val) {
2740 char buf[128];
2741 unsigned char len;
2742
2743 if (fread(&len,1,1,fp) == 0) return -1;
2744 switch(len) {
2745 case 255: *val = R_NegInf; return 0;
2746 case 254: *val = R_PosInf; return 0;
2747 case 253: *val = R_Nan; return 0;
2748 default:
2749 if (fread(buf,len,1,fp) == 0) return -1;
2750 sscanf(buf, "%lg", val);
2751 return 0;
2752 }
2753}
2754
f78fd11b 2755static int rdbLoad(char *filename) {
ed9b544e 2756 FILE *fp;
f78fd11b 2757 robj *keyobj = NULL;
2758 uint32_t dbid;
bb32ede5 2759 int type, retval, rdbver;
3305306f 2760 dict *d = server.db[0].dict;
bb32ede5 2761 redisDb *db = server.db+0;
f78fd11b 2762 char buf[1024];
bb32ede5 2763 time_t expiretime = -1, now = time(NULL);
2764
ed9b544e 2765 fp = fopen(filename,"r");
2766 if (!fp) return REDIS_ERR;
2767 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 2768 buf[9] = '\0';
2769 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 2770 fclose(fp);
2771 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2772 return REDIS_ERR;
2773 }
f78fd11b 2774 rdbver = atoi(buf+5);
2775 if (rdbver > 1) {
2776 fclose(fp);
2777 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2778 return REDIS_ERR;
2779 }
ed9b544e 2780 while(1) {
2781 robj *o;
2782
2783 /* Read type. */
f78fd11b 2784 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 2785 if (type == REDIS_EXPIRETIME) {
2786 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2787 /* We read the time so we need to read the object type again */
2788 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2789 }
ed9b544e 2790 if (type == REDIS_EOF) break;
2791 /* Handle SELECT DB opcode as a special case */
2792 if (type == REDIS_SELECTDB) {
e3566d4b 2793 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2794 goto eoferr;
ed9b544e 2795 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 2796 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 2797 exit(1);
2798 }
bb32ede5 2799 db = server.db+dbid;
2800 d = db->dict;
ed9b544e 2801 continue;
2802 }
2803 /* Read key */
f78fd11b 2804 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
ed9b544e 2805
2806 if (type == REDIS_STRING) {
2807 /* Read string value */
f78fd11b 2808 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2809 tryObjectEncoding(o);
ed9b544e 2810 } else if (type == REDIS_LIST || type == REDIS_SET) {
2811 /* Read list/set value */
2812 uint32_t listlen;
f78fd11b 2813
e3566d4b 2814 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
f78fd11b 2815 goto eoferr;
ed9b544e 2816 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2817 /* Load every single element of the list/set */
2818 while(listlen--) {
2819 robj *ele;
2820
f78fd11b 2821 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2822 tryObjectEncoding(ele);
ed9b544e 2823 if (type == REDIS_LIST) {
6b47e12e 2824 listAddNodeTail((list*)o->ptr,ele);
ed9b544e 2825 } else {
6b47e12e 2826 dictAdd((dict*)o->ptr,ele,NULL);
ed9b544e 2827 }
ed9b544e 2828 }
2b59cfdf 2829 } else if (type == REDIS_ZSET) {
2830 /* Read list/set value */
2831 uint32_t zsetlen;
2832 zset *zs;
2833
2834 if ((zsetlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2835 goto eoferr;
2836 o = createZsetObject();
2837 zs = o->ptr;
2838 /* Load every single element of the list/set */
2839 while(zsetlen--) {
2840 robj *ele;
2841 double *score = zmalloc(sizeof(double));
2842
2843 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2844 tryObjectEncoding(ele);
2845 if (rdbLoadDoubleValue(fp,score) == -1) goto eoferr;
2846 dictAdd(zs->dict,ele,score);
2847 zslInsert(zs->zsl,*score,ele);
2848 incrRefCount(ele); /* added to skiplist */
2849 }
ed9b544e 2850 } else {
2851 assert(0 != 0);
2852 }
2853 /* Add the new object in the hash table */
f78fd11b 2854 retval = dictAdd(d,keyobj,o);
ed9b544e 2855 if (retval == DICT_ERR) {
f78fd11b 2856 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
ed9b544e 2857 exit(1);
2858 }
bb32ede5 2859 /* Set the expire time if needed */
2860 if (expiretime != -1) {
2861 setExpire(db,keyobj,expiretime);
2862 /* Delete this key if already expired */
2863 if (expiretime < now) deleteKey(db,keyobj);
2864 expiretime = -1;
2865 }
f78fd11b 2866 keyobj = o = NULL;
ed9b544e 2867 }
2868 fclose(fp);
2869 return REDIS_OK;
2870
2871eoferr: /* unexpected end of file is handled here with a fatal exit */
e3566d4b 2872 if (keyobj) decrRefCount(keyobj);
f80dff62 2873 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
ed9b544e 2874 exit(1);
2875 return REDIS_ERR; /* Just to avoid warning */
2876}
2877
2878/*================================== Commands =============================== */
2879
abcb223e 2880static void authCommand(redisClient *c) {
2e77c2ee 2881 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
2882 c->authenticated = 1;
2883 addReply(c,shared.ok);
2884 } else {
2885 c->authenticated = 0;
fa4c0aba 2886 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
2887 }
2888}
2889
ed9b544e 2890static void pingCommand(redisClient *c) {
2891 addReply(c,shared.pong);
2892}
2893
2894static void echoCommand(redisClient *c) {
942a3961 2895 addReplyBulkLen(c,c->argv[1]);
ed9b544e 2896 addReply(c,c->argv[1]);
2897 addReply(c,shared.crlf);
2898}
2899
2900/*=================================== Strings =============================== */
2901
2902static void setGenericCommand(redisClient *c, int nx) {
2903 int retval;
2904
3305306f 2905 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2906 if (retval == DICT_ERR) {
2907 if (!nx) {
3305306f 2908 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2909 incrRefCount(c->argv[2]);
2910 } else {
c937aa89 2911 addReply(c,shared.czero);
ed9b544e 2912 return;
2913 }
2914 } else {
2915 incrRefCount(c->argv[1]);
2916 incrRefCount(c->argv[2]);
2917 }
2918 server.dirty++;
3305306f 2919 removeExpire(c->db,c->argv[1]);
c937aa89 2920 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 2921}
2922
2923static void setCommand(redisClient *c) {
a4d1ba9a 2924 setGenericCommand(c,0);
ed9b544e 2925}
2926
2927static void setnxCommand(redisClient *c) {
a4d1ba9a 2928 setGenericCommand(c,1);
ed9b544e 2929}
2930
2931static void getCommand(redisClient *c) {
3305306f 2932 robj *o = lookupKeyRead(c->db,c->argv[1]);
2933
2934 if (o == NULL) {
c937aa89 2935 addReply(c,shared.nullbulk);
ed9b544e 2936 } else {
ed9b544e 2937 if (o->type != REDIS_STRING) {
c937aa89 2938 addReply(c,shared.wrongtypeerr);
ed9b544e 2939 } else {
942a3961 2940 addReplyBulkLen(c,o);
ed9b544e 2941 addReply(c,o);
2942 addReply(c,shared.crlf);
2943 }
2944 }
2945}
2946
f6b141c5 2947static void getsetCommand(redisClient *c) {
a431eb74 2948 getCommand(c);
2949 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
2950 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2951 } else {
2952 incrRefCount(c->argv[1]);
2953 }
2954 incrRefCount(c->argv[2]);
2955 server.dirty++;
2956 removeExpire(c->db,c->argv[1]);
2957}
2958
70003d28 2959static void mgetCommand(redisClient *c) {
70003d28 2960 int j;
2961
c937aa89 2962 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 2963 for (j = 1; j < c->argc; j++) {
3305306f 2964 robj *o = lookupKeyRead(c->db,c->argv[j]);
2965 if (o == NULL) {
c937aa89 2966 addReply(c,shared.nullbulk);
70003d28 2967 } else {
70003d28 2968 if (o->type != REDIS_STRING) {
c937aa89 2969 addReply(c,shared.nullbulk);
70003d28 2970 } else {
942a3961 2971 addReplyBulkLen(c,o);
70003d28 2972 addReply(c,o);
2973 addReply(c,shared.crlf);
2974 }
2975 }
2976 }
2977}
2978
d68ed120 2979static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 2980 long long value;
2981 int retval;
2982 robj *o;
2983
3305306f 2984 o = lookupKeyWrite(c->db,c->argv[1]);
2985 if (o == NULL) {
ed9b544e 2986 value = 0;
2987 } else {
ed9b544e 2988 if (o->type != REDIS_STRING) {
2989 value = 0;
2990 } else {
2991 char *eptr;
2992
942a3961 2993 if (o->encoding == REDIS_ENCODING_RAW)
2994 value = strtoll(o->ptr, &eptr, 10);
2995 else if (o->encoding == REDIS_ENCODING_INT)
2996 value = (long)o->ptr;
2997 else
2998 assert(1 != 1);
ed9b544e 2999 }
3000 }
3001
3002 value += incr;
3003 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
942a3961 3004 tryObjectEncoding(o);
3305306f 3005 retval = dictAdd(c->db->dict,c->argv[1],o);
ed9b544e 3006 if (retval == DICT_ERR) {
3305306f 3007 dictReplace(c->db->dict,c->argv[1],o);
3008 removeExpire(c->db,c->argv[1]);
ed9b544e 3009 } else {
3010 incrRefCount(c->argv[1]);
3011 }
3012 server.dirty++;
c937aa89 3013 addReply(c,shared.colon);
ed9b544e 3014 addReply(c,o);
3015 addReply(c,shared.crlf);
3016}
3017
3018static void incrCommand(redisClient *c) {
a4d1ba9a 3019 incrDecrCommand(c,1);
ed9b544e 3020}
3021
3022static void decrCommand(redisClient *c) {
a4d1ba9a 3023 incrDecrCommand(c,-1);
ed9b544e 3024}
3025
3026static void incrbyCommand(redisClient *c) {
d68ed120 3027 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3028 incrDecrCommand(c,incr);
ed9b544e 3029}
3030
3031static void decrbyCommand(redisClient *c) {
d68ed120 3032 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3033 incrDecrCommand(c,-incr);
ed9b544e 3034}
3035
3036/* ========================= Type agnostic commands ========================= */
3037
3038static void delCommand(redisClient *c) {
5109cdff 3039 int deleted = 0, j;
3040
3041 for (j = 1; j < c->argc; j++) {
3042 if (deleteKey(c->db,c->argv[j])) {
3043 server.dirty++;
3044 deleted++;
3045 }
3046 }
3047 switch(deleted) {
3048 case 0:
c937aa89 3049 addReply(c,shared.czero);
5109cdff 3050 break;
3051 case 1:
3052 addReply(c,shared.cone);
3053 break;
3054 default:
3055 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
3056 break;
ed9b544e 3057 }
3058}
3059
3060static void existsCommand(redisClient *c) {
3305306f 3061 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
ed9b544e 3062}
3063
3064static void selectCommand(redisClient *c) {
3065 int id = atoi(c->argv[1]->ptr);
3066
3067 if (selectDb(c,id) == REDIS_ERR) {
774e3047 3068 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 3069 } else {
3070 addReply(c,shared.ok);
3071 }
3072}
3073
3074static void randomkeyCommand(redisClient *c) {
3075 dictEntry *de;
3305306f 3076
3077 while(1) {
3078 de = dictGetRandomKey(c->db->dict);
ce7bef07 3079 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3305306f 3080 }
ed9b544e 3081 if (de == NULL) {
ce7bef07 3082 addReply(c,shared.plus);
ed9b544e 3083 addReply(c,shared.crlf);
3084 } else {
c937aa89 3085 addReply(c,shared.plus);
ed9b544e 3086 addReply(c,dictGetEntryKey(de));
3087 addReply(c,shared.crlf);
3088 }
3089}
3090
3091static void keysCommand(redisClient *c) {
3092 dictIterator *di;
3093 dictEntry *de;
3094 sds pattern = c->argv[1]->ptr;
3095 int plen = sdslen(pattern);
3096 int numkeys = 0, keyslen = 0;
3097 robj *lenobj = createObject(REDIS_STRING,NULL);
3098
3305306f 3099 di = dictGetIterator(c->db->dict);
ed9b544e 3100 addReply(c,lenobj);
3101 decrRefCount(lenobj);
3102 while((de = dictNext(di)) != NULL) {
3103 robj *keyobj = dictGetEntryKey(de);
3305306f 3104
ed9b544e 3105 sds key = keyobj->ptr;
3106 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3107 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3305306f 3108 if (expireIfNeeded(c->db,keyobj) == 0) {
3109 if (numkeys != 0)
3110 addReply(c,shared.space);
3111 addReply(c,keyobj);
3112 numkeys++;
3113 keyslen += sdslen(key);
3114 }
ed9b544e 3115 }
3116 }
3117 dictReleaseIterator(di);
c937aa89 3118 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
ed9b544e 3119 addReply(c,shared.crlf);
3120}
3121
3122static void dbsizeCommand(redisClient *c) {
3123 addReplySds(c,
3305306f 3124 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 3125}
3126
3127static void lastsaveCommand(redisClient *c) {
3128 addReplySds(c,
c937aa89 3129 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 3130}
3131
3132static void typeCommand(redisClient *c) {
3305306f 3133 robj *o;
ed9b544e 3134 char *type;
3305306f 3135
3136 o = lookupKeyRead(c->db,c->argv[1]);
3137 if (o == NULL) {
c937aa89 3138 type = "+none";
ed9b544e 3139 } else {
ed9b544e 3140 switch(o->type) {
c937aa89 3141 case REDIS_STRING: type = "+string"; break;
3142 case REDIS_LIST: type = "+list"; break;
3143 case REDIS_SET: type = "+set"; break;
412a8bce 3144 case REDIS_ZSET: type = "+zset"; break;
ed9b544e 3145 default: type = "unknown"; break;
3146 }
3147 }
3148 addReplySds(c,sdsnew(type));
3149 addReply(c,shared.crlf);
3150}
3151
3152static void saveCommand(redisClient *c) {
05557f6d 3153 if (server.bgsaveinprogress) {
3154 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3155 return;
3156 }
f78fd11b 3157 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 3158 addReply(c,shared.ok);
3159 } else {
3160 addReply(c,shared.err);
3161 }
3162}
3163
3164static void bgsaveCommand(redisClient *c) {
3165 if (server.bgsaveinprogress) {
3166 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3167 return;
3168 }
f78fd11b 3169 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
ed9b544e 3170 addReply(c,shared.ok);
3171 } else {
3172 addReply(c,shared.err);
3173 }
3174}
3175
3176static void shutdownCommand(redisClient *c) {
3177 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
a3b21203 3178 /* Kill the saving child if there is a background saving in progress.
3179 We want to avoid race conditions, for instance our saving child may
3180 overwrite the synchronous saving did by SHUTDOWN. */
9f3c422c 3181 if (server.bgsaveinprogress) {
3182 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3183 kill(server.bgsavechildpid,SIGKILL);
a3b21203 3184 rdbRemoveTempFile(server.bgsavechildpid);
9f3c422c 3185 }
a3b21203 3186 /* SYNC SAVE */
f78fd11b 3187 if (rdbSave(server.dbfilename) == REDIS_OK) {
9f3c422c 3188 if (server.daemonize)
b284af55 3189 unlink(server.pidfile);
b284af55 3190 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
ed9b544e 3191 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3192 exit(1);
3193 } else {
a3b21203 3194 /* Ooops.. error saving! The best we can do is to continue operating.
3195 * Note that if there was a background saving process, in the next
3196 * cron() Redis will be notified that the background saving aborted,
3197 * handling special stuff like slaves pending for synchronization... */
ed9b544e 3198 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3199 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3200 }
3201}
3202
3203static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 3204 robj *o;
3205
3206 /* To use the same key as src and dst is probably an error */
3207 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 3208 addReply(c,shared.sameobjecterr);
ed9b544e 3209 return;
3210 }
3211
3305306f 3212 o = lookupKeyWrite(c->db,c->argv[1]);
3213 if (o == NULL) {
c937aa89 3214 addReply(c,shared.nokeyerr);
ed9b544e 3215 return;
3216 }
ed9b544e 3217 incrRefCount(o);
3305306f 3218 deleteIfVolatile(c->db,c->argv[2]);
3219 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
ed9b544e 3220 if (nx) {
3221 decrRefCount(o);
c937aa89 3222 addReply(c,shared.czero);
ed9b544e 3223 return;
3224 }
3305306f 3225 dictReplace(c->db->dict,c->argv[2],o);
ed9b544e 3226 } else {
3227 incrRefCount(c->argv[2]);
3228 }
3305306f 3229 deleteKey(c->db,c->argv[1]);
ed9b544e 3230 server.dirty++;
c937aa89 3231 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 3232}
3233
3234static void renameCommand(redisClient *c) {
3235 renameGenericCommand(c,0);
3236}
3237
3238static void renamenxCommand(redisClient *c) {
3239 renameGenericCommand(c,1);
3240}
3241
3242static void moveCommand(redisClient *c) {
3305306f 3243 robj *o;
3244 redisDb *src, *dst;
ed9b544e 3245 int srcid;
3246
3247 /* Obtain source and target DB pointers */
3305306f 3248 src = c->db;
3249 srcid = c->db->id;
ed9b544e 3250 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 3251 addReply(c,shared.outofrangeerr);
ed9b544e 3252 return;
3253 }
3305306f 3254 dst = c->db;
3255 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 3256
3257 /* If the user is moving using as target the same
3258 * DB as the source DB it is probably an error. */
3259 if (src == dst) {
c937aa89 3260 addReply(c,shared.sameobjecterr);
ed9b544e 3261 return;
3262 }
3263
3264 /* Check if the element exists and get a reference */
3305306f 3265 o = lookupKeyWrite(c->db,c->argv[1]);
3266 if (!o) {
c937aa89 3267 addReply(c,shared.czero);
ed9b544e 3268 return;
3269 }
3270
3271 /* Try to add the element to the target DB */
3305306f 3272 deleteIfVolatile(dst,c->argv[1]);
3273 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
c937aa89 3274 addReply(c,shared.czero);
ed9b544e 3275 return;
3276 }
3305306f 3277 incrRefCount(c->argv[1]);
ed9b544e 3278 incrRefCount(o);
3279
3280 /* OK! key moved, free the entry in the source DB */
3305306f 3281 deleteKey(src,c->argv[1]);
ed9b544e 3282 server.dirty++;
c937aa89 3283 addReply(c,shared.cone);
ed9b544e 3284}
3285
3286/* =================================== Lists ================================ */
3287static void pushGenericCommand(redisClient *c, int where) {
3288 robj *lobj;
ed9b544e 3289 list *list;
3305306f 3290
3291 lobj = lookupKeyWrite(c->db,c->argv[1]);
3292 if (lobj == NULL) {
ed9b544e 3293 lobj = createListObject();
3294 list = lobj->ptr;
3295 if (where == REDIS_HEAD) {
6b47e12e 3296 listAddNodeHead(list,c->argv[2]);
ed9b544e 3297 } else {
6b47e12e 3298 listAddNodeTail(list,c->argv[2]);
ed9b544e 3299 }
3305306f 3300 dictAdd(c->db->dict,c->argv[1],lobj);
ed9b544e 3301 incrRefCount(c->argv[1]);
3302 incrRefCount(c->argv[2]);
3303 } else {
ed9b544e 3304 if (lobj->type != REDIS_LIST) {
3305 addReply(c,shared.wrongtypeerr);
3306 return;
3307 }
3308 list = lobj->ptr;
3309 if (where == REDIS_HEAD) {
6b47e12e 3310 listAddNodeHead(list,c->argv[2]);
ed9b544e 3311 } else {
6b47e12e 3312 listAddNodeTail(list,c->argv[2]);
ed9b544e 3313 }
3314 incrRefCount(c->argv[2]);
3315 }
3316 server.dirty++;
3317 addReply(c,shared.ok);
3318}
3319
3320static void lpushCommand(redisClient *c) {
3321 pushGenericCommand(c,REDIS_HEAD);
3322}
3323
3324static void rpushCommand(redisClient *c) {
3325 pushGenericCommand(c,REDIS_TAIL);
3326}
3327
3328static void llenCommand(redisClient *c) {
3305306f 3329 robj *o;
ed9b544e 3330 list *l;
3331
3305306f 3332 o = lookupKeyRead(c->db,c->argv[1]);
3333 if (o == NULL) {
c937aa89 3334 addReply(c,shared.czero);
ed9b544e 3335 return;
3336 } else {
ed9b544e 3337 if (o->type != REDIS_LIST) {
c937aa89 3338 addReply(c,shared.wrongtypeerr);
ed9b544e 3339 } else {
3340 l = o->ptr;
c937aa89 3341 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
ed9b544e 3342 }
3343 }
3344}
3345
3346static void lindexCommand(redisClient *c) {
3305306f 3347 robj *o;
ed9b544e 3348 int index = atoi(c->argv[2]->ptr);
3349
3305306f 3350 o = lookupKeyRead(c->db,c->argv[1]);
3351 if (o == NULL) {
c937aa89 3352 addReply(c,shared.nullbulk);
ed9b544e 3353 } else {
ed9b544e 3354 if (o->type != REDIS_LIST) {
c937aa89 3355 addReply(c,shared.wrongtypeerr);
ed9b544e 3356 } else {
3357 list *list = o->ptr;
3358 listNode *ln;
3359
3360 ln = listIndex(list, index);
3361 if (ln == NULL) {
c937aa89 3362 addReply(c,shared.nullbulk);
ed9b544e 3363 } else {
3364 robj *ele = listNodeValue(ln);
942a3961 3365 addReplyBulkLen(c,ele);
ed9b544e 3366 addReply(c,ele);
3367 addReply(c,shared.crlf);
3368 }
3369 }
3370 }
3371}
3372
3373static void lsetCommand(redisClient *c) {
3305306f 3374 robj *o;
ed9b544e 3375 int index = atoi(c->argv[2]->ptr);
3376
3305306f 3377 o = lookupKeyWrite(c->db,c->argv[1]);
3378 if (o == NULL) {
ed9b544e 3379 addReply(c,shared.nokeyerr);
3380 } else {
ed9b544e 3381 if (o->type != REDIS_LIST) {
3382 addReply(c,shared.wrongtypeerr);
3383 } else {
3384 list *list = o->ptr;
3385 listNode *ln;
3386
3387 ln = listIndex(list, index);
3388 if (ln == NULL) {
c937aa89 3389 addReply(c,shared.outofrangeerr);
ed9b544e 3390 } else {
3391 robj *ele = listNodeValue(ln);
3392
3393 decrRefCount(ele);
3394 listNodeValue(ln) = c->argv[3];
3395 incrRefCount(c->argv[3]);
3396 addReply(c,shared.ok);
3397 server.dirty++;
3398 }
3399 }
3400 }
3401}
3402
3403static void popGenericCommand(redisClient *c, int where) {
3305306f 3404 robj *o;
3405
3406 o = lookupKeyWrite(c->db,c->argv[1]);
3407 if (o == NULL) {
c937aa89 3408 addReply(c,shared.nullbulk);
ed9b544e 3409 } else {
ed9b544e 3410 if (o->type != REDIS_LIST) {
c937aa89 3411 addReply(c,shared.wrongtypeerr);
ed9b544e 3412 } else {
3413 list *list = o->ptr;
3414 listNode *ln;
3415
3416 if (where == REDIS_HEAD)
3417 ln = listFirst(list);
3418 else
3419 ln = listLast(list);
3420
3421 if (ln == NULL) {
c937aa89 3422 addReply(c,shared.nullbulk);
ed9b544e 3423 } else {
3424 robj *ele = listNodeValue(ln);
942a3961 3425 addReplyBulkLen(c,ele);
ed9b544e 3426 addReply(c,ele);
3427 addReply(c,shared.crlf);
3428 listDelNode(list,ln);
3429 server.dirty++;
3430 }
3431 }
3432 }
3433}
3434
3435static void lpopCommand(redisClient *c) {
3436 popGenericCommand(c,REDIS_HEAD);
3437}
3438
3439static void rpopCommand(redisClient *c) {
3440 popGenericCommand(c,REDIS_TAIL);
3441}
3442
3443static void lrangeCommand(redisClient *c) {
3305306f 3444 robj *o;
ed9b544e 3445 int start = atoi(c->argv[2]->ptr);
3446 int end = atoi(c->argv[3]->ptr);
3305306f 3447
3448 o = lookupKeyRead(c->db,c->argv[1]);
3449 if (o == NULL) {
c937aa89 3450 addReply(c,shared.nullmultibulk);
ed9b544e 3451 } else {
ed9b544e 3452 if (o->type != REDIS_LIST) {
c937aa89 3453 addReply(c,shared.wrongtypeerr);
ed9b544e 3454 } else {
3455 list *list = o->ptr;
3456 listNode *ln;
3457 int llen = listLength(list);
3458 int rangelen, j;
3459 robj *ele;
3460
3461 /* convert negative indexes */
3462 if (start < 0) start = llen+start;
3463 if (end < 0) end = llen+end;
3464 if (start < 0) start = 0;
3465 if (end < 0) end = 0;
3466
3467 /* indexes sanity checks */
3468 if (start > end || start >= llen) {
3469 /* Out of range start or start > end result in empty list */
c937aa89 3470 addReply(c,shared.emptymultibulk);
ed9b544e 3471 return;
3472 }
3473 if (end >= llen) end = llen-1;
3474 rangelen = (end-start)+1;
3475
3476 /* Return the result in form of a multi-bulk reply */
3477 ln = listIndex(list, start);
c937aa89 3478 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
ed9b544e 3479 for (j = 0; j < rangelen; j++) {
3480 ele = listNodeValue(ln);
942a3961 3481 addReplyBulkLen(c,ele);
ed9b544e 3482 addReply(c,ele);
3483 addReply(c,shared.crlf);
3484 ln = ln->next;
3485 }
3486 }
3487 }
3488}
3489
3490static void ltrimCommand(redisClient *c) {
3305306f 3491 robj *o;
ed9b544e 3492 int start = atoi(c->argv[2]->ptr);
3493 int end = atoi(c->argv[3]->ptr);
3494
3305306f 3495 o = lookupKeyWrite(c->db,c->argv[1]);
3496 if (o == NULL) {
ed9b544e 3497 addReply(c,shared.nokeyerr);
3498 } else {
ed9b544e 3499 if (o->type != REDIS_LIST) {
3500 addReply(c,shared.wrongtypeerr);
3501 } else {
3502 list *list = o->ptr;
3503 listNode *ln;
3504 int llen = listLength(list);
3505 int j, ltrim, rtrim;
3506
3507 /* convert negative indexes */
3508 if (start < 0) start = llen+start;
3509 if (end < 0) end = llen+end;
3510 if (start < 0) start = 0;
3511 if (end < 0) end = 0;
3512
3513 /* indexes sanity checks */
3514 if (start > end || start >= llen) {
3515 /* Out of range start or start > end result in empty list */
3516 ltrim = llen;
3517 rtrim = 0;
3518 } else {
3519 if (end >= llen) end = llen-1;
3520 ltrim = start;
3521 rtrim = llen-end-1;
3522 }
3523
3524 /* Remove list elements to perform the trim */
3525 for (j = 0; j < ltrim; j++) {
3526 ln = listFirst(list);
3527 listDelNode(list,ln);
3528 }
3529 for (j = 0; j < rtrim; j++) {
3530 ln = listLast(list);
3531 listDelNode(list,ln);
3532 }
ed9b544e 3533 server.dirty++;
e59229a2 3534 addReply(c,shared.ok);
ed9b544e 3535 }
3536 }
3537}
3538
3539static void lremCommand(redisClient *c) {
3305306f 3540 robj *o;
ed9b544e 3541
3305306f 3542 o = lookupKeyWrite(c->db,c->argv[1]);
3543 if (o == NULL) {
33c08b39 3544 addReply(c,shared.czero);
ed9b544e 3545 } else {
ed9b544e 3546 if (o->type != REDIS_LIST) {
c937aa89 3547 addReply(c,shared.wrongtypeerr);
ed9b544e 3548 } else {
3549 list *list = o->ptr;
3550 listNode *ln, *next;
3551 int toremove = atoi(c->argv[2]->ptr);
3552 int removed = 0;
3553 int fromtail = 0;
3554
3555 if (toremove < 0) {
3556 toremove = -toremove;
3557 fromtail = 1;
3558 }
3559 ln = fromtail ? list->tail : list->head;
3560 while (ln) {
ed9b544e 3561 robj *ele = listNodeValue(ln);
a4d1ba9a 3562
3563 next = fromtail ? ln->prev : ln->next;
724a51b1 3564 if (compareStringObjects(ele,c->argv[3]) == 0) {
ed9b544e 3565 listDelNode(list,ln);
3566 server.dirty++;
3567 removed++;
3568 if (toremove && removed == toremove) break;
3569 }
3570 ln = next;
3571 }
c937aa89 3572 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 3573 }
3574 }
3575}
3576
12f9d551 3577/* This is the semantic of this command:
0f5f7e9a 3578 * RPOPLPUSH srclist dstlist:
12f9d551 3579 * IF LLEN(srclist) > 0
3580 * element = RPOP srclist
3581 * LPUSH dstlist element
3582 * RETURN element
3583 * ELSE
3584 * RETURN nil
3585 * END
3586 * END
3587 *
3588 * The idea is to be able to get an element from a list in a reliable way
3589 * since the element is not just returned but pushed against another list
3590 * as well. This command was originally proposed by Ezra Zygmuntowicz.
3591 */
0f5f7e9a 3592static void rpoplpushcommand(redisClient *c) {
12f9d551 3593 robj *sobj;
3594
3595 sobj = lookupKeyWrite(c->db,c->argv[1]);
3596 if (sobj == NULL) {
3597 addReply(c,shared.nullbulk);
3598 } else {
3599 if (sobj->type != REDIS_LIST) {
3600 addReply(c,shared.wrongtypeerr);
3601 } else {
3602 list *srclist = sobj->ptr;
3603 listNode *ln = listLast(srclist);
3604
3605 if (ln == NULL) {
3606 addReply(c,shared.nullbulk);
3607 } else {
3608 robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
3609 robj *ele = listNodeValue(ln);
3610 list *dstlist;
3611
3612 if (dobj == NULL) {
3613
3614 /* Create the list if the key does not exist */
3615 dobj = createListObject();
3616 dictAdd(c->db->dict,c->argv[2],dobj);
3617 incrRefCount(c->argv[2]);
3618 } else if (dobj->type != REDIS_LIST) {
3619 addReply(c,shared.wrongtypeerr);
3620 return;
3621 }
3622 /* Add the element to the target list */
3623 dstlist = dobj->ptr;
3624 listAddNodeHead(dstlist,ele);
3625 incrRefCount(ele);
3626
3627 /* Send the element to the client as reply as well */
3628 addReplyBulkLen(c,ele);
3629 addReply(c,ele);
3630 addReply(c,shared.crlf);
3631
3632 /* Finally remove the element from the source list */
3633 listDelNode(srclist,ln);
3634 server.dirty++;
3635 }
3636 }
3637 }
3638}
3639
3640
ed9b544e 3641/* ==================================== Sets ================================ */
3642
3643static void saddCommand(redisClient *c) {
ed9b544e 3644 robj *set;
3645
3305306f 3646 set = lookupKeyWrite(c->db,c->argv[1]);
3647 if (set == NULL) {
ed9b544e 3648 set = createSetObject();
3305306f 3649 dictAdd(c->db->dict,c->argv[1],set);
ed9b544e 3650 incrRefCount(c->argv[1]);
3651 } else {
ed9b544e 3652 if (set->type != REDIS_SET) {
c937aa89 3653 addReply(c,shared.wrongtypeerr);
ed9b544e 3654 return;
3655 }
3656 }
3657 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
3658 incrRefCount(c->argv[2]);
3659 server.dirty++;
c937aa89 3660 addReply(c,shared.cone);
ed9b544e 3661 } else {
c937aa89 3662 addReply(c,shared.czero);
ed9b544e 3663 }
3664}
3665
3666static void sremCommand(redisClient *c) {
3305306f 3667 robj *set;
ed9b544e 3668
3305306f 3669 set = lookupKeyWrite(c->db,c->argv[1]);
3670 if (set == NULL) {
c937aa89 3671 addReply(c,shared.czero);
ed9b544e 3672 } else {
ed9b544e 3673 if (set->type != REDIS_SET) {
c937aa89 3674 addReply(c,shared.wrongtypeerr);
ed9b544e 3675 return;
3676 }
3677 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
3678 server.dirty++;
12fea928 3679 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
c937aa89 3680 addReply(c,shared.cone);
ed9b544e 3681 } else {
c937aa89 3682 addReply(c,shared.czero);
ed9b544e 3683 }
3684 }
3685}
3686
a4460ef4 3687static void smoveCommand(redisClient *c) {
3688 robj *srcset, *dstset;
3689
3690 srcset = lookupKeyWrite(c->db,c->argv[1]);
3691 dstset = lookupKeyWrite(c->db,c->argv[2]);
3692
3693 /* If the source key does not exist return 0, if it's of the wrong type
3694 * raise an error */
3695 if (srcset == NULL || srcset->type != REDIS_SET) {
3696 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
3697 return;
3698 }
3699 /* Error if the destination key is not a set as well */
3700 if (dstset && dstset->type != REDIS_SET) {
3701 addReply(c,shared.wrongtypeerr);
3702 return;
3703 }
3704 /* Remove the element from the source set */
3705 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
3706 /* Key not found in the src set! return zero */
3707 addReply(c,shared.czero);
3708 return;
3709 }
3710 server.dirty++;
3711 /* Add the element to the destination set */
3712 if (!dstset) {
3713 dstset = createSetObject();
3714 dictAdd(c->db->dict,c->argv[2],dstset);
3715 incrRefCount(c->argv[2]);
3716 }
3717 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
3718 incrRefCount(c->argv[3]);
3719 addReply(c,shared.cone);
3720}
3721
ed9b544e 3722static void sismemberCommand(redisClient *c) {
3305306f 3723 robj *set;
ed9b544e 3724
3305306f 3725 set = lookupKeyRead(c->db,c->argv[1]);
3726 if (set == NULL) {
c937aa89 3727 addReply(c,shared.czero);
ed9b544e 3728 } else {
ed9b544e 3729 if (set->type != REDIS_SET) {
c937aa89 3730 addReply(c,shared.wrongtypeerr);
ed9b544e 3731 return;
3732 }
3733 if (dictFind(set->ptr,c->argv[2]))
c937aa89 3734 addReply(c,shared.cone);
ed9b544e 3735 else
c937aa89 3736 addReply(c,shared.czero);
ed9b544e 3737 }
3738}
3739
3740static void scardCommand(redisClient *c) {
3305306f 3741 robj *o;
ed9b544e 3742 dict *s;
3743
3305306f 3744 o = lookupKeyRead(c->db,c->argv[1]);
3745 if (o == NULL) {
c937aa89 3746 addReply(c,shared.czero);
ed9b544e 3747 return;
3748 } else {
ed9b544e 3749 if (o->type != REDIS_SET) {
c937aa89 3750 addReply(c,shared.wrongtypeerr);
ed9b544e 3751 } else {
3752 s = o->ptr;
c937aa89 3753 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3305306f 3754 dictSize(s)));
ed9b544e 3755 }
3756 }
3757}
3758
12fea928 3759static void spopCommand(redisClient *c) {
3760 robj *set;
3761 dictEntry *de;
3762
3763 set = lookupKeyWrite(c->db,c->argv[1]);
3764 if (set == NULL) {
3765 addReply(c,shared.nullbulk);
3766 } else {
3767 if (set->type != REDIS_SET) {
3768 addReply(c,shared.wrongtypeerr);
3769 return;
3770 }
3771 de = dictGetRandomKey(set->ptr);
3772 if (de == NULL) {
3773 addReply(c,shared.nullbulk);
3774 } else {
3775 robj *ele = dictGetEntryKey(de);
3776
942a3961 3777 addReplyBulkLen(c,ele);
12fea928 3778 addReply(c,ele);
3779 addReply(c,shared.crlf);
3780 dictDelete(set->ptr,ele);
3781 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3782 server.dirty++;
3783 }
3784 }
3785}
3786
2abb95a9 3787static void srandmemberCommand(redisClient *c) {
3788 robj *set;
3789 dictEntry *de;
3790
3791 set = lookupKeyRead(c->db,c->argv[1]);
3792 if (set == NULL) {
3793 addReply(c,shared.nullbulk);
3794 } else {
3795 if (set->type != REDIS_SET) {
3796 addReply(c,shared.wrongtypeerr);
3797 return;
3798 }
3799 de = dictGetRandomKey(set->ptr);
3800 if (de == NULL) {
3801 addReply(c,shared.nullbulk);
3802 } else {
3803 robj *ele = dictGetEntryKey(de);
3804
3805 addReplyBulkLen(c,ele);
3806 addReply(c,ele);
3807 addReply(c,shared.crlf);
3808 }
3809 }
3810}
3811
ed9b544e 3812static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
3813 dict **d1 = (void*) s1, **d2 = (void*) s2;
3814
3305306f 3815 return dictSize(*d1)-dictSize(*d2);
ed9b544e 3816}
3817
3818static void sinterGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey) {
3819 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3820 dictIterator *di;
3821 dictEntry *de;
3822 robj *lenobj = NULL, *dstset = NULL;
3823 int j, cardinality = 0;
3824
ed9b544e 3825 for (j = 0; j < setsnum; j++) {
3826 robj *setobj;
3305306f 3827
3828 setobj = dstkey ?
3829 lookupKeyWrite(c->db,setskeys[j]) :
3830 lookupKeyRead(c->db,setskeys[j]);
3831 if (!setobj) {
ed9b544e 3832 zfree(dv);
5faa6025 3833 if (dstkey) {
3834 deleteKey(c->db,dstkey);
3835 addReply(c,shared.ok);
3836 } else {
3837 addReply(c,shared.nullmultibulk);
3838 }
ed9b544e 3839 return;
3840 }
ed9b544e 3841 if (setobj->type != REDIS_SET) {
3842 zfree(dv);
c937aa89 3843 addReply(c,shared.wrongtypeerr);
ed9b544e 3844 return;
3845 }
3846 dv[j] = setobj->ptr;
3847 }
3848 /* Sort sets from the smallest to largest, this will improve our
3849 * algorithm's performace */
3850 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3851
3852 /* The first thing we should output is the total number of elements...
3853 * since this is a multi-bulk write, but at this stage we don't know
3854 * the intersection set size, so we use a trick, append an empty object
3855 * to the output list and save the pointer to later modify it with the
3856 * right length */
3857 if (!dstkey) {
3858 lenobj = createObject(REDIS_STRING,NULL);
3859 addReply(c,lenobj);
3860 decrRefCount(lenobj);
3861 } else {
3862 /* If we have a target key where to store the resulting set
3863 * create this key with an empty set inside */
3864 dstset = createSetObject();
ed9b544e 3865 }
3866
3867 /* Iterate all the elements of the first (smallest) set, and test
3868 * the element against all the other sets, if at least one set does
3869 * not include the element it is discarded */
3870 di = dictGetIterator(dv[0]);
ed9b544e 3871
3872 while((de = dictNext(di)) != NULL) {
3873 robj *ele;
3874
3875 for (j = 1; j < setsnum; j++)
3876 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
3877 if (j != setsnum)
3878 continue; /* at least one set does not contain the member */
3879 ele = dictGetEntryKey(de);
3880 if (!dstkey) {
942a3961 3881 addReplyBulkLen(c,ele);
ed9b544e 3882 addReply(c,ele);
3883 addReply(c,shared.crlf);
3884 cardinality++;
3885 } else {
3886 dictAdd(dstset->ptr,ele,NULL);
3887 incrRefCount(ele);
3888 }
3889 }
3890 dictReleaseIterator(di);
3891
83cdfe18
AG
3892 if (dstkey) {
3893 /* Store the resulting set into the target */
3894 deleteKey(c->db,dstkey);
3895 dictAdd(c->db->dict,dstkey,dstset);
3896 incrRefCount(dstkey);
3897 }
3898
40d224a9 3899 if (!dstkey) {
c937aa89 3900 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",cardinality);
40d224a9 3901 } else {
03fd01c7 3902 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3903 dictSize((dict*)dstset->ptr)));
40d224a9 3904 server.dirty++;
3905 }
ed9b544e 3906 zfree(dv);
3907}
3908
3909static void sinterCommand(redisClient *c) {
3910 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
3911}
3912
3913static void sinterstoreCommand(redisClient *c) {
3914 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
3915}
3916
f4f56e1d 3917#define REDIS_OP_UNION 0
3918#define REDIS_OP_DIFF 1
3919
3920static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 3921 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3922 dictIterator *di;
3923 dictEntry *de;
f4f56e1d 3924 robj *dstset = NULL;
40d224a9 3925 int j, cardinality = 0;
3926
40d224a9 3927 for (j = 0; j < setsnum; j++) {
3928 robj *setobj;
3929
3930 setobj = dstkey ?
3931 lookupKeyWrite(c->db,setskeys[j]) :
3932 lookupKeyRead(c->db,setskeys[j]);
3933 if (!setobj) {
3934 dv[j] = NULL;
3935 continue;
3936 }
3937 if (setobj->type != REDIS_SET) {
3938 zfree(dv);
3939 addReply(c,shared.wrongtypeerr);
3940 return;
3941 }
3942 dv[j] = setobj->ptr;
3943 }
3944
3945 /* We need a temp set object to store our union. If the dstkey
3946 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
3947 * this set object will be the resulting object to set into the target key*/
3948 dstset = createSetObject();
3949
40d224a9 3950 /* Iterate all the elements of all the sets, add every element a single
3951 * time to the result set */
3952 for (j = 0; j < setsnum; j++) {
51829ed3 3953 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 3954 if (!dv[j]) continue; /* non existing keys are like empty sets */
3955
3956 di = dictGetIterator(dv[j]);
40d224a9 3957
3958 while((de = dictNext(di)) != NULL) {
3959 robj *ele;
3960
3961 /* dictAdd will not add the same element multiple times */
3962 ele = dictGetEntryKey(de);
f4f56e1d 3963 if (op == REDIS_OP_UNION || j == 0) {
3964 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
3965 incrRefCount(ele);
40d224a9 3966 cardinality++;
3967 }
f4f56e1d 3968 } else if (op == REDIS_OP_DIFF) {
3969 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
3970 cardinality--;
3971 }
40d224a9 3972 }
3973 }
3974 dictReleaseIterator(di);
51829ed3
AG
3975
3976 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
40d224a9 3977 }
3978
f4f56e1d 3979 /* Output the content of the resulting set, if not in STORE mode */
3980 if (!dstkey) {
3981 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
3982 di = dictGetIterator(dstset->ptr);
f4f56e1d 3983 while((de = dictNext(di)) != NULL) {
3984 robj *ele;
3985
3986 ele = dictGetEntryKey(de);
942a3961 3987 addReplyBulkLen(c,ele);
f4f56e1d 3988 addReply(c,ele);
3989 addReply(c,shared.crlf);
3990 }
3991 dictReleaseIterator(di);
83cdfe18
AG
3992 } else {
3993 /* If we have a target key where to store the resulting set
3994 * create this key with the result set inside */
3995 deleteKey(c->db,dstkey);
3996 dictAdd(c->db->dict,dstkey,dstset);
3997 incrRefCount(dstkey);
f4f56e1d 3998 }
3999
4000 /* Cleanup */
40d224a9 4001 if (!dstkey) {
40d224a9 4002 decrRefCount(dstset);
4003 } else {
03fd01c7 4004 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
4005 dictSize((dict*)dstset->ptr)));
40d224a9 4006 server.dirty++;
4007 }
4008 zfree(dv);
4009}
4010
4011static void sunionCommand(redisClient *c) {
f4f56e1d 4012 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 4013}
4014
4015static void sunionstoreCommand(redisClient *c) {
f4f56e1d 4016 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
4017}
4018
4019static void sdiffCommand(redisClient *c) {
4020 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
4021}
4022
4023static void sdiffstoreCommand(redisClient *c) {
4024 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 4025}
4026
6b47e12e 4027/* ==================================== ZSets =============================== */
4028
4029/* ZSETs are ordered sets using two data structures to hold the same elements
4030 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4031 * data structure.
4032 *
4033 * The elements are added to an hash table mapping Redis objects to scores.
4034 * At the same time the elements are added to a skip list mapping scores
4035 * to Redis objects (so objects are sorted by scores in this "view"). */
4036
4037/* This skiplist implementation is almost a C translation of the original
4038 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4039 * Alternative to Balanced Trees", modified in three ways:
4040 * a) this implementation allows for repeated values.
4041 * b) the comparison is not just by key (our 'score') but by satellite data.
4042 * c) there is a back pointer, so it's a doubly linked list with the back
4043 * pointers being only at "level 1". This allows to traverse the list
4044 * from tail to head, useful for ZREVRANGE. */
4045
4046static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
4047 zskiplistNode *zn = zmalloc(sizeof(*zn));
4048
4049 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
4050 zn->score = score;
4051 zn->obj = obj;
4052 return zn;
4053}
4054
4055static zskiplist *zslCreate(void) {
4056 int j;
4057 zskiplist *zsl;
4058
4059 zsl = zmalloc(sizeof(*zsl));
4060 zsl->level = 1;
cc812361 4061 zsl->length = 0;
6b47e12e 4062 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
4063 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
4064 zsl->header->forward[j] = NULL;
e3870fab 4065 zsl->header->backward = NULL;
4066 zsl->tail = NULL;
6b47e12e 4067 return zsl;
4068}
4069
fd8ccf44 4070static void zslFreeNode(zskiplistNode *node) {
4071 decrRefCount(node->obj);
ad807e6f 4072 zfree(node->forward);
fd8ccf44 4073 zfree(node);
4074}
4075
4076static void zslFree(zskiplist *zsl) {
ad807e6f 4077 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 4078
ad807e6f 4079 zfree(zsl->header->forward);
4080 zfree(zsl->header);
fd8ccf44 4081 while(node) {
599379dd 4082 next = node->forward[0];
fd8ccf44 4083 zslFreeNode(node);
4084 node = next;
4085 }
ad807e6f 4086 zfree(zsl);
fd8ccf44 4087}
4088
6b47e12e 4089static int zslRandomLevel(void) {
4090 int level = 1;
4091 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
4092 level += 1;
4093 return level;
4094}
4095
4096static void zslInsert(zskiplist *zsl, double score, robj *obj) {
4097 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4098 int i, level;
4099
4100 x = zsl->header;
4101 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4102 while (x->forward[i] &&
4103 (x->forward[i]->score < score ||
4104 (x->forward[i]->score == score &&
4105 compareStringObjects(x->forward[i]->obj,obj) < 0)))
6b47e12e 4106 x = x->forward[i];
4107 update[i] = x;
4108 }
6b47e12e 4109 /* we assume the key is not already inside, since we allow duplicated
4110 * scores, and the re-insertion of score and redis object should never
4111 * happpen since the caller of zslInsert() should test in the hash table
4112 * if the element is already inside or not. */
4113 level = zslRandomLevel();
4114 if (level > zsl->level) {
4115 for (i = zsl->level; i < level; i++)
4116 update[i] = zsl->header;
4117 zsl->level = level;
4118 }
4119 x = zslCreateNode(level,score,obj);
4120 for (i = 0; i < level; i++) {
4121 x->forward[i] = update[i]->forward[i];
4122 update[i]->forward[i] = x;
4123 }
bb975144 4124 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 4125 if (x->forward[0])
4126 x->forward[0]->backward = x;
4127 else
4128 zsl->tail = x;
cc812361 4129 zsl->length++;
6b47e12e 4130}
4131
50c55df5 4132/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 4133static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 4134 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4135 int i;
4136
4137 x = zsl->header;
4138 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4139 while (x->forward[i] &&
4140 (x->forward[i]->score < score ||
4141 (x->forward[i]->score == score &&
4142 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 4143 x = x->forward[i];
4144 update[i] = x;
4145 }
4146 /* We may have multiple elements with the same score, what we need
4147 * is to find the element with both the right score and object. */
4148 x = x->forward[0];
50c55df5 4149 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
9d60e6e4 4150 for (i = 0; i < zsl->level; i++) {
4151 if (update[i]->forward[i] != x) break;
4152 update[i]->forward[i] = x->forward[i];
4153 }
4154 if (x->forward[0]) {
4155 x->forward[0]->backward = (x->backward == zsl->header) ?
4156 NULL : x->backward;
e197b441 4157 } else {
9d60e6e4 4158 zsl->tail = x->backward;
e197b441 4159 }
9d60e6e4 4160 zslFreeNode(x);
4161 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4162 zsl->level--;
4163 zsl->length--;
4164 return 1;
4165 } else {
4166 return 0; /* not found */
e197b441 4167 }
4168 return 0; /* not found */
fd8ccf44 4169}
4170
1807985b 4171/* Delete all the elements with score between min and max from the skiplist.
4172 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4173 * Note that this function takes the reference to the hash table view of the
4174 * sorted set, in order to remove the elements from the hash table too. */
4175static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4176 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4177 unsigned long removed = 0;
4178 int i;
4179
4180 x = zsl->header;
4181 for (i = zsl->level-1; i >= 0; i--) {
4182 while (x->forward[i] && x->forward[i]->score < min)
4183 x = x->forward[i];
4184 update[i] = x;
4185 }
4186 /* We may have multiple elements with the same score, what we need
4187 * is to find the element with both the right score and object. */
4188 x = x->forward[0];
4189 while (x && x->score <= max) {
4190 zskiplistNode *next;
4191
4192 for (i = 0; i < zsl->level; i++) {
4193 if (update[i]->forward[i] != x) break;
4194 update[i]->forward[i] = x->forward[i];
4195 }
4196 if (x->forward[0]) {
4197 x->forward[0]->backward = (x->backward == zsl->header) ?
4198 NULL : x->backward;
4199 } else {
4200 zsl->tail = x->backward;
4201 }
4202 next = x->forward[0];
4203 dictDelete(dict,x->obj);
4204 zslFreeNode(x);
4205 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4206 zsl->level--;
4207 zsl->length--;
4208 removed++;
4209 x = next;
4210 }
4211 return removed; /* not found */
4212}
4213
50c55df5 4214/* Find the first node having a score equal or greater than the specified one.
4215 * Returns NULL if there is no match. */
4216static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4217 zskiplistNode *x;
4218 int i;
4219
4220 x = zsl->header;
4221 for (i = zsl->level-1; i >= 0; i--) {
4222 while (x->forward[i] && x->forward[i]->score < score)
4223 x = x->forward[i];
4224 }
4225 /* We may have multiple elements with the same score, what we need
4226 * is to find the element with both the right score and object. */
4227 return x->forward[0];
4228}
4229
fd8ccf44 4230/* The actual Z-commands implementations */
4231
7db723ad 4232/* This generic command implements both ZADD and ZINCRBY.
e2665397 4233 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
7db723ad 4234 * the increment if the operation is a ZINCRBY (doincrement == 1). */
e2665397 4235static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
fd8ccf44 4236 robj *zsetobj;
4237 zset *zs;
4238 double *score;
4239
e2665397 4240 zsetobj = lookupKeyWrite(c->db,key);
fd8ccf44 4241 if (zsetobj == NULL) {
4242 zsetobj = createZsetObject();
e2665397 4243 dictAdd(c->db->dict,key,zsetobj);
4244 incrRefCount(key);
fd8ccf44 4245 } else {
4246 if (zsetobj->type != REDIS_ZSET) {
4247 addReply(c,shared.wrongtypeerr);
4248 return;
4249 }
4250 }
fd8ccf44 4251 zs = zsetobj->ptr;
e2665397 4252
7db723ad 4253 /* Ok now since we implement both ZADD and ZINCRBY here the code
e2665397 4254 * needs to handle the two different conditions. It's all about setting
4255 * '*score', that is, the new score to set, to the right value. */
4256 score = zmalloc(sizeof(double));
4257 if (doincrement) {
4258 dictEntry *de;
4259
4260 /* Read the old score. If the element was not present starts from 0 */
4261 de = dictFind(zs->dict,ele);
4262 if (de) {
4263 double *oldscore = dictGetEntryVal(de);
4264 *score = *oldscore + scoreval;
4265 } else {
4266 *score = scoreval;
4267 }
4268 } else {
4269 *score = scoreval;
4270 }
4271
4272 /* What follows is a simple remove and re-insert operation that is common
7db723ad 4273 * to both ZADD and ZINCRBY... */
e2665397 4274 if (dictAdd(zs->dict,ele,score) == DICT_OK) {
fd8ccf44 4275 /* case 1: New element */
e2665397 4276 incrRefCount(ele); /* added to hash */
4277 zslInsert(zs->zsl,*score,ele);
4278 incrRefCount(ele); /* added to skiplist */
fd8ccf44 4279 server.dirty++;
e2665397 4280 if (doincrement)
e2665397 4281 addReplyDouble(c,*score);
91d71bfc 4282 else
4283 addReply(c,shared.cone);
fd8ccf44 4284 } else {
4285 dictEntry *de;
4286 double *oldscore;
4287
4288 /* case 2: Score update operation */
e2665397 4289 de = dictFind(zs->dict,ele);
fd8ccf44 4290 assert(de != NULL);
4291 oldscore = dictGetEntryVal(de);
4292 if (*score != *oldscore) {
4293 int deleted;
4294
e2665397 4295 /* Remove and insert the element in the skip list with new score */
4296 deleted = zslDelete(zs->zsl,*oldscore,ele);
fd8ccf44 4297 assert(deleted != 0);
e2665397 4298 zslInsert(zs->zsl,*score,ele);
4299 incrRefCount(ele);
4300 /* Update the score in the hash table */
4301 dictReplace(zs->dict,ele,score);
fd8ccf44 4302 server.dirty++;
2161a965 4303 } else {
4304 zfree(score);
fd8ccf44 4305 }
e2665397 4306 if (doincrement)
4307 addReplyDouble(c,*score);
4308 else
4309 addReply(c,shared.czero);
fd8ccf44 4310 }
4311}
4312
e2665397 4313static void zaddCommand(redisClient *c) {
4314 double scoreval;
4315
4316 scoreval = strtod(c->argv[2]->ptr,NULL);
4317 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
4318}
4319
7db723ad 4320static void zincrbyCommand(redisClient *c) {
e2665397 4321 double scoreval;
4322
4323 scoreval = strtod(c->argv[2]->ptr,NULL);
4324 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
4325}
4326
1b7106e7 4327static void zremCommand(redisClient *c) {
4328 robj *zsetobj;
4329 zset *zs;
4330
4331 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4332 if (zsetobj == NULL) {
4333 addReply(c,shared.czero);
4334 } else {
4335 dictEntry *de;
4336 double *oldscore;
4337 int deleted;
4338
4339 if (zsetobj->type != REDIS_ZSET) {
4340 addReply(c,shared.wrongtypeerr);
4341 return;
4342 }
4343 zs = zsetobj->ptr;
4344 de = dictFind(zs->dict,c->argv[2]);
4345 if (de == NULL) {
4346 addReply(c,shared.czero);
4347 return;
4348 }
4349 /* Delete from the skiplist */
4350 oldscore = dictGetEntryVal(de);
4351 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
4352 assert(deleted != 0);
4353
4354 /* Delete from the hash table */
4355 dictDelete(zs->dict,c->argv[2]);
4356 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4357 server.dirty++;
4358 addReply(c,shared.cone);
4359 }
4360}
4361
1807985b 4362static void zremrangebyscoreCommand(redisClient *c) {
4363 double min = strtod(c->argv[2]->ptr,NULL);
4364 double max = strtod(c->argv[3]->ptr,NULL);
4365 robj *zsetobj;
4366 zset *zs;
4367
4368 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4369 if (zsetobj == NULL) {
4370 addReply(c,shared.czero);
4371 } else {
4372 long deleted;
4373
4374 if (zsetobj->type != REDIS_ZSET) {
4375 addReply(c,shared.wrongtypeerr);
4376 return;
4377 }
4378 zs = zsetobj->ptr;
4379 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4380 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4381 server.dirty += deleted;
4382 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4383 }
4384}
4385
e3870fab 4386static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 4387 robj *o;
4388 int start = atoi(c->argv[2]->ptr);
4389 int end = atoi(c->argv[3]->ptr);
4390
4391 o = lookupKeyRead(c->db,c->argv[1]);
4392 if (o == NULL) {
4393 addReply(c,shared.nullmultibulk);
4394 } else {
4395 if (o->type != REDIS_ZSET) {
4396 addReply(c,shared.wrongtypeerr);
4397 } else {
4398 zset *zsetobj = o->ptr;
4399 zskiplist *zsl = zsetobj->zsl;
4400 zskiplistNode *ln;
4401
4402 int llen = zsl->length;
4403 int rangelen, j;
4404 robj *ele;
4405
4406 /* convert negative indexes */
4407 if (start < 0) start = llen+start;
4408 if (end < 0) end = llen+end;
4409 if (start < 0) start = 0;
4410 if (end < 0) end = 0;
4411
4412 /* indexes sanity checks */
4413 if (start > end || start >= llen) {
4414 /* Out of range start or start > end result in empty list */
4415 addReply(c,shared.emptymultibulk);
4416 return;
4417 }
4418 if (end >= llen) end = llen-1;
4419 rangelen = (end-start)+1;
4420
4421 /* Return the result in form of a multi-bulk reply */
e3870fab 4422 if (reverse) {
4423 ln = zsl->tail;
4424 while (start--)
4425 ln = ln->backward;
4426 } else {
4427 ln = zsl->header->forward[0];
4428 while (start--)
4429 ln = ln->forward[0];
4430 }
cc812361 4431
4432 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
4433 for (j = 0; j < rangelen; j++) {
0aad7a19 4434 ele = ln->obj;
cc812361 4435 addReplyBulkLen(c,ele);
4436 addReply(c,ele);
4437 addReply(c,shared.crlf);
e3870fab 4438 ln = reverse ? ln->backward : ln->forward[0];
cc812361 4439 }
4440 }
4441 }
4442}
4443
e3870fab 4444static void zrangeCommand(redisClient *c) {
4445 zrangeGenericCommand(c,0);
4446}
4447
4448static void zrevrangeCommand(redisClient *c) {
4449 zrangeGenericCommand(c,1);
4450}
4451
50c55df5 4452static void zrangebyscoreCommand(redisClient *c) {
4453 robj *o;
4454 double min = strtod(c->argv[2]->ptr,NULL);
4455 double max = strtod(c->argv[3]->ptr,NULL);
4456
4457 o = lookupKeyRead(c->db,c->argv[1]);
4458 if (o == NULL) {
4459 addReply(c,shared.nullmultibulk);
4460 } else {
4461 if (o->type != REDIS_ZSET) {
4462 addReply(c,shared.wrongtypeerr);
4463 } else {
4464 zset *zsetobj = o->ptr;
4465 zskiplist *zsl = zsetobj->zsl;
4466 zskiplistNode *ln;
4467 robj *ele, *lenobj;
4468 unsigned int rangelen = 0;
4469
4470 /* Get the first node with the score >= min */
4471 ln = zslFirstWithScore(zsl,min);
4472 if (ln == NULL) {
4473 /* No element matching the speciifed interval */
4474 addReply(c,shared.emptymultibulk);
4475 return;
4476 }
4477
4478 /* We don't know in advance how many matching elements there
4479 * are in the list, so we push this object that will represent
4480 * the multi-bulk length in the output buffer, and will "fix"
4481 * it later */
4482 lenobj = createObject(REDIS_STRING,NULL);
4483 addReply(c,lenobj);
4484
dbbc7285 4485 while(ln && ln->score <= max) {
50c55df5 4486 ele = ln->obj;
4487 addReplyBulkLen(c,ele);
4488 addReply(c,ele);
4489 addReply(c,shared.crlf);
4490 ln = ln->forward[0];
4491 rangelen++;
4492 }
4493 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4494 }
4495 }
4496}
4497
3c41331e 4498static void zcardCommand(redisClient *c) {
e197b441 4499 robj *o;
4500 zset *zs;
4501
4502 o = lookupKeyRead(c->db,c->argv[1]);
4503 if (o == NULL) {
4504 addReply(c,shared.czero);
4505 return;
4506 } else {
4507 if (o->type != REDIS_ZSET) {
4508 addReply(c,shared.wrongtypeerr);
4509 } else {
4510 zs = o->ptr;
4511 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",zs->zsl->length));
4512 }
4513 }
4514}
4515
6e333bbe 4516static void zscoreCommand(redisClient *c) {
4517 robj *o;
4518 zset *zs;
4519
4520 o = lookupKeyRead(c->db,c->argv[1]);
4521 if (o == NULL) {
96d8b4ee 4522 addReply(c,shared.nullbulk);
6e333bbe 4523 return;
4524 } else {
4525 if (o->type != REDIS_ZSET) {
4526 addReply(c,shared.wrongtypeerr);
4527 } else {
4528 dictEntry *de;
4529
4530 zs = o->ptr;
4531 de = dictFind(zs->dict,c->argv[2]);
4532 if (!de) {
4533 addReply(c,shared.nullbulk);
4534 } else {
6e333bbe 4535 double *score = dictGetEntryVal(de);
4536
e2665397 4537 addReplyDouble(c,*score);
6e333bbe 4538 }
4539 }
4540 }
4541}
4542
6b47e12e 4543/* ========================= Non type-specific commands ==================== */
4544
ed9b544e 4545static void flushdbCommand(redisClient *c) {
ca37e9cd 4546 server.dirty += dictSize(c->db->dict);
3305306f 4547 dictEmpty(c->db->dict);
4548 dictEmpty(c->db->expires);
ed9b544e 4549 addReply(c,shared.ok);
ed9b544e 4550}
4551
4552static void flushallCommand(redisClient *c) {
ca37e9cd 4553 server.dirty += emptyDb();
ed9b544e 4554 addReply(c,shared.ok);
f78fd11b 4555 rdbSave(server.dbfilename);
ca37e9cd 4556 server.dirty++;
ed9b544e 4557}
4558
56906eef 4559static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 4560 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 4561 so->type = type;
4562 so->pattern = pattern;
4563 return so;
4564}
4565
4566/* Return the value associated to the key with a name obtained
4567 * substituting the first occurence of '*' in 'pattern' with 'subst' */
56906eef 4568static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
ed9b544e 4569 char *p;
4570 sds spat, ssub;
4571 robj keyobj;
4572 int prefixlen, sublen, postfixlen;
ed9b544e 4573 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
4574 struct {
f1017b3f 4575 long len;
4576 long free;
ed9b544e 4577 char buf[REDIS_SORTKEY_MAX+1];
4578 } keyname;
4579
28173a49 4580 /* If the pattern is "#" return the substitution object itself in order
4581 * to implement the "SORT ... GET #" feature. */
4582 spat = pattern->ptr;
4583 if (spat[0] == '#' && spat[1] == '\0') {
4584 return subst;
4585 }
4586
4587 /* The substitution object may be specially encoded. If so we create
4588 * a decoded object on the fly. */
942a3961 4589 if (subst->encoding == REDIS_ENCODING_RAW)
28173a49 4590 /* If we don't need to get a decoded object increment the refcount
4591 * so that the final decrRefCount() call will restore the original
4592 * count */
942a3961 4593 incrRefCount(subst);
4594 else {
4595 subst = getDecodedObject(subst);
4596 }
4597
ed9b544e 4598 ssub = subst->ptr;
4599 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
4600 p = strchr(spat,'*');
ed5a857a 4601 if (!p) {
4602 decrRefCount(subst);
4603 return NULL;
4604 }
ed9b544e 4605
4606 prefixlen = p-spat;
4607 sublen = sdslen(ssub);
4608 postfixlen = sdslen(spat)-(prefixlen+1);
4609 memcpy(keyname.buf,spat,prefixlen);
4610 memcpy(keyname.buf+prefixlen,ssub,sublen);
4611 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
4612 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
4613 keyname.len = prefixlen+sublen+postfixlen;
4614
4615 keyobj.refcount = 1;
4616 keyobj.type = REDIS_STRING;
4617 keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
4618
942a3961 4619 decrRefCount(subst);
4620
a4d1ba9a 4621 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3305306f 4622 return lookupKeyRead(db,&keyobj);
ed9b544e 4623}
4624
4625/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
4626 * the additional parameter is not standard but a BSD-specific we have to
4627 * pass sorting parameters via the global 'server' structure */
4628static int sortCompare(const void *s1, const void *s2) {
4629 const redisSortObject *so1 = s1, *so2 = s2;
4630 int cmp;
4631
4632 if (!server.sort_alpha) {
4633 /* Numeric sorting. Here it's trivial as we precomputed scores */
4634 if (so1->u.score > so2->u.score) {
4635 cmp = 1;
4636 } else if (so1->u.score < so2->u.score) {
4637 cmp = -1;
4638 } else {
4639 cmp = 0;
4640 }
4641 } else {
4642 /* Alphanumeric sorting */
4643 if (server.sort_bypattern) {
4644 if (!so1->u.cmpobj || !so2->u.cmpobj) {
4645 /* At least one compare object is NULL */
4646 if (so1->u.cmpobj == so2->u.cmpobj)
4647 cmp = 0;
4648 else if (so1->u.cmpobj == NULL)
4649 cmp = -1;
4650 else
4651 cmp = 1;
4652 } else {
4653 /* We have both the objects, use strcoll */
4654 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
4655 }
4656 } else {
4657 /* Compare elements directly */
942a3961 4658 if (so1->obj->encoding == REDIS_ENCODING_RAW &&
4659 so2->obj->encoding == REDIS_ENCODING_RAW) {
4660 cmp = strcoll(so1->obj->ptr,so2->obj->ptr);
4661 } else {
4662 robj *dec1, *dec2;
4663
4664 dec1 = so1->obj->encoding == REDIS_ENCODING_RAW ?
4665 so1->obj : getDecodedObject(so1->obj);
4666 dec2 = so2->obj->encoding == REDIS_ENCODING_RAW ?
4667 so2->obj : getDecodedObject(so2->obj);
4668 cmp = strcoll(dec1->ptr,dec2->ptr);
4669 if (dec1 != so1->obj) decrRefCount(dec1);
4670 if (dec2 != so2->obj) decrRefCount(dec2);
4671 }
ed9b544e 4672 }
4673 }
4674 return server.sort_desc ? -cmp : cmp;
4675}
4676
4677/* The SORT command is the most complex command in Redis. Warning: this code
4678 * is optimized for speed and a bit less for readability */
4679static void sortCommand(redisClient *c) {
ed9b544e 4680 list *operations;
4681 int outputlen = 0;
4682 int desc = 0, alpha = 0;
4683 int limit_start = 0, limit_count = -1, start, end;
4684 int j, dontsort = 0, vectorlen;
4685 int getop = 0; /* GET operation counter */
443c6409 4686 robj *sortval, *sortby = NULL, *storekey = NULL;
ed9b544e 4687 redisSortObject *vector; /* Resulting vector to sort */
4688
4689 /* Lookup the key to sort. It must be of the right types */
3305306f 4690 sortval = lookupKeyRead(c->db,c->argv[1]);
4691 if (sortval == NULL) {
c937aa89 4692 addReply(c,shared.nokeyerr);
ed9b544e 4693 return;
4694 }
ed9b544e 4695 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
c937aa89 4696 addReply(c,shared.wrongtypeerr);
ed9b544e 4697 return;
4698 }
4699
4700 /* Create a list of operations to perform for every sorted element.
4701 * Operations can be GET/DEL/INCR/DECR */
4702 operations = listCreate();
092dac2a 4703 listSetFreeMethod(operations,zfree);
ed9b544e 4704 j = 2;
4705
4706 /* Now we need to protect sortval incrementing its count, in the future
4707 * SORT may have options able to overwrite/delete keys during the sorting
4708 * and the sorted key itself may get destroied */
4709 incrRefCount(sortval);
4710
4711 /* The SORT command has an SQL-alike syntax, parse it */
4712 while(j < c->argc) {
4713 int leftargs = c->argc-j-1;
4714 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
4715 desc = 0;
4716 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
4717 desc = 1;
4718 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
4719 alpha = 1;
4720 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
4721 limit_start = atoi(c->argv[j+1]->ptr);
4722 limit_count = atoi(c->argv[j+2]->ptr);
4723 j+=2;
443c6409 4724 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
4725 storekey = c->argv[j+1];
4726 j++;
ed9b544e 4727 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
4728 sortby = c->argv[j+1];
4729 /* If the BY pattern does not contain '*', i.e. it is constant,
4730 * we don't need to sort nor to lookup the weight keys. */
4731 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
4732 j++;
4733 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4734 listAddNodeTail(operations,createSortOperation(
4735 REDIS_SORT_GET,c->argv[j+1]));
4736 getop++;
4737 j++;
ed9b544e 4738 } else {
4739 decrRefCount(sortval);
4740 listRelease(operations);
c937aa89 4741 addReply(c,shared.syntaxerr);
ed9b544e 4742 return;
4743 }
4744 j++;
4745 }
4746
4747 /* Load the sorting vector with all the objects to sort */
4748 vectorlen = (sortval->type == REDIS_LIST) ?
4749 listLength((list*)sortval->ptr) :
3305306f 4750 dictSize((dict*)sortval->ptr);
ed9b544e 4751 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 4752 j = 0;
4753 if (sortval->type == REDIS_LIST) {
4754 list *list = sortval->ptr;
6208b3a7 4755 listNode *ln;
4756
4757 listRewind(list);
4758 while((ln = listYield(list))) {
ed9b544e 4759 robj *ele = ln->value;
4760 vector[j].obj = ele;
4761 vector[j].u.score = 0;
4762 vector[j].u.cmpobj = NULL;
ed9b544e 4763 j++;
4764 }
4765 } else {
4766 dict *set = sortval->ptr;
4767 dictIterator *di;
4768 dictEntry *setele;
4769
4770 di = dictGetIterator(set);
ed9b544e 4771 while((setele = dictNext(di)) != NULL) {
4772 vector[j].obj = dictGetEntryKey(setele);
4773 vector[j].u.score = 0;
4774 vector[j].u.cmpobj = NULL;
4775 j++;
4776 }
4777 dictReleaseIterator(di);
4778 }
4779 assert(j == vectorlen);
4780
4781 /* Now it's time to load the right scores in the sorting vector */
4782 if (dontsort == 0) {
4783 for (j = 0; j < vectorlen; j++) {
4784 if (sortby) {
4785 robj *byval;
4786
3305306f 4787 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
ed9b544e 4788 if (!byval || byval->type != REDIS_STRING) continue;
4789 if (alpha) {
942a3961 4790 if (byval->encoding == REDIS_ENCODING_RAW) {
4791 vector[j].u.cmpobj = byval;
4792 incrRefCount(byval);
4793 } else {
4794 vector[j].u.cmpobj = getDecodedObject(byval);
4795 }
ed9b544e 4796 } else {
942a3961 4797 if (byval->encoding == REDIS_ENCODING_RAW) {
4798 vector[j].u.score = strtod(byval->ptr,NULL);
4799 } else {
f1017b3f 4800 if (byval->encoding == REDIS_ENCODING_INT) {
942a3961 4801 vector[j].u.score = (long)byval->ptr;
f1017b3f 4802 } else
942a3961 4803 assert(1 != 1);
4804 }
ed9b544e 4805 }
4806 } else {
942a3961 4807 if (!alpha) {
4808 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
4809 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
4810 else {
4811 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
4812 vector[j].u.score = (long) vector[j].obj->ptr;
4813 else
4814 assert(1 != 1);
4815 }
4816 }
ed9b544e 4817 }
4818 }
4819 }
4820
4821 /* We are ready to sort the vector... perform a bit of sanity check
4822 * on the LIMIT option too. We'll use a partial version of quicksort. */
4823 start = (limit_start < 0) ? 0 : limit_start;
4824 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
4825 if (start >= vectorlen) {
4826 start = vectorlen-1;
4827 end = vectorlen-2;
4828 }
4829 if (end >= vectorlen) end = vectorlen-1;
4830
4831 if (dontsort == 0) {
4832 server.sort_desc = desc;
4833 server.sort_alpha = alpha;
4834 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 4835 if (sortby && (start != 0 || end != vectorlen-1))
4836 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
4837 else
4838 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 4839 }
4840
4841 /* Send command output to the output buffer, performing the specified
4842 * GET/DEL/INCR/DECR operations if any. */
4843 outputlen = getop ? getop*(end-start+1) : end-start+1;
443c6409 4844 if (storekey == NULL) {
4845 /* STORE option not specified, sent the sorting result to client */
4846 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
4847 for (j = start; j <= end; j++) {
4848 listNode *ln;
4849 if (!getop) {
4850 addReplyBulkLen(c,vector[j].obj);
4851 addReply(c,vector[j].obj);
4852 addReply(c,shared.crlf);
4853 }
4854 listRewind(operations);
4855 while((ln = listYield(operations))) {
4856 redisSortOperation *sop = ln->value;
4857 robj *val = lookupKeyByPattern(c->db,sop->pattern,
4858 vector[j].obj);
4859
4860 if (sop->type == REDIS_SORT_GET) {
4861 if (!val || val->type != REDIS_STRING) {
4862 addReply(c,shared.nullbulk);
4863 } else {
4864 addReplyBulkLen(c,val);
4865 addReply(c,val);
4866 addReply(c,shared.crlf);
4867 }
4868 } else {
4869 assert(sop->type == REDIS_SORT_GET); /* always fails */
4870 }
4871 }
ed9b544e 4872 }
443c6409 4873 } else {
4874 robj *listObject = createListObject();
4875 list *listPtr = (list*) listObject->ptr;
4876
4877 /* STORE option specified, set the sorting result as a List object */
4878 for (j = start; j <= end; j++) {
4879 listNode *ln;
4880 if (!getop) {
4881 listAddNodeTail(listPtr,vector[j].obj);
4882 incrRefCount(vector[j].obj);
4883 }
4884 listRewind(operations);
4885 while((ln = listYield(operations))) {
4886 redisSortOperation *sop = ln->value;
4887 robj *val = lookupKeyByPattern(c->db,sop->pattern,
4888 vector[j].obj);
4889
4890 if (sop->type == REDIS_SORT_GET) {
4891 if (!val || val->type != REDIS_STRING) {
4892 listAddNodeTail(listPtr,createStringObject("",0));
4893 } else {
4894 listAddNodeTail(listPtr,val);
4895 incrRefCount(val);
4896 }
ed9b544e 4897 } else {
443c6409 4898 assert(sop->type == REDIS_SORT_GET); /* always fails */
ed9b544e 4899 }
ed9b544e 4900 }
ed9b544e 4901 }
121796f7 4902 if (dictReplace(c->db->dict,storekey,listObject)) {
4903 incrRefCount(storekey);
4904 }
443c6409 4905 /* Note: we add 1 because the DB is dirty anyway since even if the
4906 * SORT result is empty a new key is set and maybe the old content
4907 * replaced. */
4908 server.dirty += 1+outputlen;
4909 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
ed9b544e 4910 }
4911
4912 /* Cleanup */
4913 decrRefCount(sortval);
4914 listRelease(operations);
4915 for (j = 0; j < vectorlen; j++) {
4916 if (sortby && alpha && vector[j].u.cmpobj)
4917 decrRefCount(vector[j].u.cmpobj);
4918 }
4919 zfree(vector);
4920}
4921
4922static void infoCommand(redisClient *c) {
4923 sds info;
4924 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 4925 int j;
ed9b544e 4926
4927 info = sdscatprintf(sdsempty(),
4928 "redis_version:%s\r\n"
f1017b3f 4929 "arch_bits:%s\r\n"
a0f643ea 4930 "uptime_in_seconds:%d\r\n"
4931 "uptime_in_days:%d\r\n"
ed9b544e 4932 "connected_clients:%d\r\n"
4933 "connected_slaves:%d\r\n"
5fba9f71 4934 "used_memory:%zu\r\n"
ed9b544e 4935 "changes_since_last_save:%lld\r\n"
be2bb6b0 4936 "bgsave_in_progress:%d\r\n"
ed9b544e 4937 "last_save_time:%d\r\n"
4938 "total_connections_received:%lld\r\n"
4939 "total_commands_processed:%lld\r\n"
a0f643ea 4940 "role:%s\r\n"
ed9b544e 4941 ,REDIS_VERSION,
f1017b3f 4942 (sizeof(long) == 8) ? "64" : "32",
a0f643ea 4943 uptime,
4944 uptime/(3600*24),
ed9b544e 4945 listLength(server.clients)-listLength(server.slaves),
4946 listLength(server.slaves),
4947 server.usedmemory,
4948 server.dirty,
be2bb6b0 4949 server.bgsaveinprogress,
ed9b544e 4950 server.lastsave,
4951 server.stat_numconnections,
4952 server.stat_numcommands,
a0f643ea 4953 server.masterhost == NULL ? "master" : "slave"
ed9b544e 4954 );
a0f643ea 4955 if (server.masterhost) {
4956 info = sdscatprintf(info,
4957 "master_host:%s\r\n"
4958 "master_port:%d\r\n"
4959 "master_link_status:%s\r\n"
4960 "master_last_io_seconds_ago:%d\r\n"
4961 ,server.masterhost,
4962 server.masterport,
4963 (server.replstate == REDIS_REPL_CONNECTED) ?
4964 "up" : "down",
f72b934d 4965 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 4966 );
4967 }
c3cb078d 4968 for (j = 0; j < server.dbnum; j++) {
4969 long long keys, vkeys;
4970
4971 keys = dictSize(server.db[j].dict);
4972 vkeys = dictSize(server.db[j].expires);
4973 if (keys || vkeys) {
4974 info = sdscatprintf(info, "db%d: keys=%lld,expires=%lld\r\n",
4975 j, keys, vkeys);
4976 }
4977 }
c937aa89 4978 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
ed9b544e 4979 addReplySds(c,info);
70003d28 4980 addReply(c,shared.crlf);
ed9b544e 4981}
4982
3305306f 4983static void monitorCommand(redisClient *c) {
4984 /* ignore MONITOR if aleady slave or in monitor mode */
4985 if (c->flags & REDIS_SLAVE) return;
4986
4987 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
4988 c->slaveseldb = 0;
6b47e12e 4989 listAddNodeTail(server.monitors,c);
3305306f 4990 addReply(c,shared.ok);
4991}
4992
4993/* ================================= Expire ================================= */
4994static int removeExpire(redisDb *db, robj *key) {
4995 if (dictDelete(db->expires,key) == DICT_OK) {
4996 return 1;
4997 } else {
4998 return 0;
4999 }
5000}
5001
5002static int setExpire(redisDb *db, robj *key, time_t when) {
5003 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
5004 return 0;
5005 } else {
5006 incrRefCount(key);
5007 return 1;
5008 }
5009}
5010
bb32ede5 5011/* Return the expire time of the specified key, or -1 if no expire
5012 * is associated with this key (i.e. the key is non volatile) */
5013static time_t getExpire(redisDb *db, robj *key) {
5014 dictEntry *de;
5015
5016 /* No expire? return ASAP */
5017 if (dictSize(db->expires) == 0 ||
5018 (de = dictFind(db->expires,key)) == NULL) return -1;
5019
5020 return (time_t) dictGetEntryVal(de);
5021}
5022
3305306f 5023static int expireIfNeeded(redisDb *db, robj *key) {
5024 time_t when;
5025 dictEntry *de;
5026
5027 /* No expire? return ASAP */
5028 if (dictSize(db->expires) == 0 ||
5029 (de = dictFind(db->expires,key)) == NULL) return 0;
5030
5031 /* Lookup the expire */
5032 when = (time_t) dictGetEntryVal(de);
5033 if (time(NULL) <= when) return 0;
5034
5035 /* Delete the key */
5036 dictDelete(db->expires,key);
5037 return dictDelete(db->dict,key) == DICT_OK;
5038}
5039
5040static int deleteIfVolatile(redisDb *db, robj *key) {
5041 dictEntry *de;
5042
5043 /* No expire? return ASAP */
5044 if (dictSize(db->expires) == 0 ||
5045 (de = dictFind(db->expires,key)) == NULL) return 0;
5046
5047 /* Delete the key */
0c66a471 5048 server.dirty++;
3305306f 5049 dictDelete(db->expires,key);
5050 return dictDelete(db->dict,key) == DICT_OK;
5051}
5052
802e8373 5053static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
3305306f 5054 dictEntry *de;
3305306f 5055
802e8373 5056 de = dictFind(c->db->dict,key);
3305306f 5057 if (de == NULL) {
5058 addReply(c,shared.czero);
5059 return;
5060 }
43e5ccdf 5061 if (seconds < 0) {
5062 if (deleteKey(c->db,key)) server.dirty++;
5063 addReply(c, shared.cone);
3305306f 5064 return;
5065 } else {
5066 time_t when = time(NULL)+seconds;
802e8373 5067 if (setExpire(c->db,key,when)) {
3305306f 5068 addReply(c,shared.cone);
77423026 5069 server.dirty++;
5070 } else {
3305306f 5071 addReply(c,shared.czero);
77423026 5072 }
3305306f 5073 return;
5074 }
5075}
5076
802e8373 5077static void expireCommand(redisClient *c) {
5078 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
5079}
5080
5081static void expireatCommand(redisClient *c) {
5082 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
5083}
5084
fd88489a 5085static void ttlCommand(redisClient *c) {
5086 time_t expire;
5087 int ttl = -1;
5088
5089 expire = getExpire(c->db,c->argv[1]);
5090 if (expire != -1) {
5091 ttl = (int) (expire-time(NULL));
5092 if (ttl < 0) ttl = -1;
5093 }
5094 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
5095}
5096
f6b141c5 5097static void msetGenericCommand(redisClient *c, int nx) {
5098 int j;
5099
5100 if ((c->argc % 2) == 0) {
5101 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
5102 return;
5103 }
5104 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
5105 * set nothing at all if at least one already key exists. */
5106 if (nx) {
5107 for (j = 1; j < c->argc; j += 2) {
5108 if (dictFind(c->db->dict,c->argv[j]) != NULL) {
5109 addReply(c, shared.czero);
5110 return;
5111 }
5112 }
5113 }
5114
5115 for (j = 1; j < c->argc; j += 2) {
2ed22c8b 5116 int retval;
5117
5118 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
5119 if (retval == DICT_ERR) {
5120 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
5121 incrRefCount(c->argv[j+1]);
5122 } else {
5123 incrRefCount(c->argv[j]);
5124 incrRefCount(c->argv[j+1]);
5125 }
f6b141c5 5126 removeExpire(c->db,c->argv[j]);
5127 }
5128 server.dirty += (c->argc-1)/2;
5129 addReply(c, nx ? shared.cone : shared.ok);
5130}
5131
5132static void msetCommand(redisClient *c) {
5133 msetGenericCommand(c,0);
5134}
5135
5136static void msetnxCommand(redisClient *c) {
5137 msetGenericCommand(c,1);
5138}
5139
ed9b544e 5140/* =============================== Replication ============================= */
5141
a4d1ba9a 5142static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5143 ssize_t nwritten, ret = size;
5144 time_t start = time(NULL);
5145
5146 timeout++;
5147 while(size) {
5148 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
5149 nwritten = write(fd,ptr,size);
5150 if (nwritten == -1) return -1;
5151 ptr += nwritten;
5152 size -= nwritten;
5153 }
5154 if ((time(NULL)-start) > timeout) {
5155 errno = ETIMEDOUT;
5156 return -1;
5157 }
5158 }
5159 return ret;
5160}
5161
a4d1ba9a 5162static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5163 ssize_t nread, totread = 0;
5164 time_t start = time(NULL);
5165
5166 timeout++;
5167 while(size) {
5168 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
5169 nread = read(fd,ptr,size);
5170 if (nread == -1) return -1;
5171 ptr += nread;
5172 size -= nread;
5173 totread += nread;
5174 }
5175 if ((time(NULL)-start) > timeout) {
5176 errno = ETIMEDOUT;
5177 return -1;
5178 }
5179 }
5180 return totread;
5181}
5182
5183static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
5184 ssize_t nread = 0;
5185
5186 size--;
5187 while(size) {
5188 char c;
5189
5190 if (syncRead(fd,&c,1,timeout) == -1) return -1;
5191 if (c == '\n') {
5192 *ptr = '\0';
5193 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
5194 return nread;
5195 } else {
5196 *ptr++ = c;
5197 *ptr = '\0';
5198 nread++;
5199 }
5200 }
5201 return nread;
5202}
5203
5204static void syncCommand(redisClient *c) {
40d224a9 5205 /* ignore SYNC if aleady slave or in monitor mode */
5206 if (c->flags & REDIS_SLAVE) return;
5207
5208 /* SYNC can't be issued when the server has pending data to send to
5209 * the client about already issued commands. We need a fresh reply
5210 * buffer registering the differences between the BGSAVE and the current
5211 * dataset, so that we can copy to other slaves if needed. */
5212 if (listLength(c->reply) != 0) {
5213 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
5214 return;
5215 }
5216
5217 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
5218 /* Here we need to check if there is a background saving operation
5219 * in progress, or if it is required to start one */
5220 if (server.bgsaveinprogress) {
5221 /* Ok a background save is in progress. Let's check if it is a good
5222 * one for replication, i.e. if there is another slave that is
5223 * registering differences since the server forked to save */
5224 redisClient *slave;
5225 listNode *ln;
5226
6208b3a7 5227 listRewind(server.slaves);
5228 while((ln = listYield(server.slaves))) {
40d224a9 5229 slave = ln->value;
5230 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 5231 }
5232 if (ln) {
5233 /* Perfect, the server is already registering differences for
5234 * another slave. Set the right state, and copy the buffer. */
5235 listRelease(c->reply);
5236 c->reply = listDup(slave->reply);
40d224a9 5237 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5238 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
5239 } else {
5240 /* No way, we need to wait for the next BGSAVE in order to
5241 * register differences */
5242 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5243 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5244 }
5245 } else {
5246 /* Ok we don't have a BGSAVE in progress, let's start one */
5247 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
5248 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5249 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
5250 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
5251 return;
5252 }
5253 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5254 }
6208b3a7 5255 c->repldbfd = -1;
40d224a9 5256 c->flags |= REDIS_SLAVE;
5257 c->slaveseldb = 0;
6b47e12e 5258 listAddNodeTail(server.slaves,c);
40d224a9 5259 return;
5260}
5261
6208b3a7 5262static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
5263 redisClient *slave = privdata;
5264 REDIS_NOTUSED(el);
5265 REDIS_NOTUSED(mask);
5266 char buf[REDIS_IOBUF_LEN];
5267 ssize_t nwritten, buflen;
5268
5269 if (slave->repldboff == 0) {
5270 /* Write the bulk write count before to transfer the DB. In theory here
5271 * we don't know how much room there is in the output buffer of the
5272 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
5273 * operations) will never be smaller than the few bytes we need. */
5274 sds bulkcount;
5275
5276 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
5277 slave->repldbsize);
5278 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
5279 {
5280 sdsfree(bulkcount);
5281 freeClient(slave);
5282 return;
5283 }
5284 sdsfree(bulkcount);
5285 }
5286 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
5287 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
5288 if (buflen <= 0) {
5289 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
5290 (buflen == 0) ? "premature EOF" : strerror(errno));
5291 freeClient(slave);
5292 return;
5293 }
5294 if ((nwritten = write(fd,buf,buflen)) == -1) {
5295 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
5296 strerror(errno));
5297 freeClient(slave);
5298 return;
5299 }
5300 slave->repldboff += nwritten;
5301 if (slave->repldboff == slave->repldbsize) {
5302 close(slave->repldbfd);
5303 slave->repldbfd = -1;
5304 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5305 slave->replstate = REDIS_REPL_ONLINE;
5306 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
5307 sendReplyToClient, slave, NULL) == AE_ERR) {
5308 freeClient(slave);
5309 return;
5310 }
5311 addReplySds(slave,sdsempty());
5312 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
5313 }
5314}
ed9b544e 5315
a3b21203 5316/* This function is called at the end of every backgrond saving.
5317 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
5318 * otherwise REDIS_ERR is passed to the function.
5319 *
5320 * The goal of this function is to handle slaves waiting for a successful
5321 * background saving in order to perform non-blocking synchronization. */
5322static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 5323 listNode *ln;
5324 int startbgsave = 0;
ed9b544e 5325
6208b3a7 5326 listRewind(server.slaves);
5327 while((ln = listYield(server.slaves))) {
5328 redisClient *slave = ln->value;
ed9b544e 5329
6208b3a7 5330 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
5331 startbgsave = 1;
5332 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5333 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 5334 struct redis_stat buf;
6208b3a7 5335
5336 if (bgsaveerr != REDIS_OK) {
5337 freeClient(slave);
5338 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
5339 continue;
5340 }
5341 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 5342 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 5343 freeClient(slave);
5344 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
5345 continue;
5346 }
5347 slave->repldboff = 0;
5348 slave->repldbsize = buf.st_size;
5349 slave->replstate = REDIS_REPL_SEND_BULK;
5350 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5351 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave, NULL) == AE_ERR) {
5352 freeClient(slave);
5353 continue;
5354 }
5355 }
ed9b544e 5356 }
6208b3a7 5357 if (startbgsave) {
5358 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5359 listRewind(server.slaves);
5360 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
5361 while((ln = listYield(server.slaves))) {
5362 redisClient *slave = ln->value;
ed9b544e 5363
6208b3a7 5364 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
5365 freeClient(slave);
5366 }
5367 }
5368 }
ed9b544e 5369}
5370
5371static int syncWithMaster(void) {
d0ccebcf 5372 char buf[1024], tmpfile[256], authcmd[1024];
ed9b544e 5373 int dumpsize;
5374 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
5375 int dfd;
5376
5377 if (fd == -1) {
5378 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
5379 strerror(errno));
5380 return REDIS_ERR;
5381 }
d0ccebcf 5382
5383 /* AUTH with the master if required. */
5384 if(server.masterauth) {
5385 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
5386 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
5387 close(fd);
5388 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
5389 strerror(errno));
5390 return REDIS_ERR;
5391 }
5392 /* Read the AUTH result. */
5393 if (syncReadLine(fd,buf,1024,3600) == -1) {
5394 close(fd);
5395 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
5396 strerror(errno));
5397 return REDIS_ERR;
5398 }
5399 if (buf[0] != '+') {
5400 close(fd);
5401 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
5402 return REDIS_ERR;
5403 }
5404 }
5405
ed9b544e 5406 /* Issue the SYNC command */
5407 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
5408 close(fd);
5409 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
5410 strerror(errno));
5411 return REDIS_ERR;
5412 }
5413 /* Read the bulk write count */
8c4d91fc 5414 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 5415 close(fd);
5416 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
5417 strerror(errno));
5418 return REDIS_ERR;
5419 }
4aa701c1 5420 if (buf[0] != '$') {
5421 close(fd);
5422 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
5423 return REDIS_ERR;
5424 }
c937aa89 5425 dumpsize = atoi(buf+1);
ed9b544e 5426 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
5427 /* Read the bulk write data on a temp file */
5428 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
5429 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
5430 if (dfd == -1) {
5431 close(fd);
5432 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
5433 return REDIS_ERR;
5434 }
5435 while(dumpsize) {
5436 int nread, nwritten;
5437
5438 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
5439 if (nread == -1) {
5440 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
5441 strerror(errno));
5442 close(fd);
5443 close(dfd);
5444 return REDIS_ERR;
5445 }
5446 nwritten = write(dfd,buf,nread);
5447 if (nwritten == -1) {
5448 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
5449 close(fd);
5450 close(dfd);
5451 return REDIS_ERR;
5452 }
5453 dumpsize -= nread;
5454 }
5455 close(dfd);
5456 if (rename(tmpfile,server.dbfilename) == -1) {
5457 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
5458 unlink(tmpfile);
5459 close(fd);
5460 return REDIS_ERR;
5461 }
5462 emptyDb();
f78fd11b 5463 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 5464 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
5465 close(fd);
5466 return REDIS_ERR;
5467 }
5468 server.master = createClient(fd);
5469 server.master->flags |= REDIS_MASTER;
5470 server.replstate = REDIS_REPL_CONNECTED;
5471 return REDIS_OK;
5472}
5473
321b0e13 5474static void slaveofCommand(redisClient *c) {
5475 if (!strcasecmp(c->argv[1]->ptr,"no") &&
5476 !strcasecmp(c->argv[2]->ptr,"one")) {
5477 if (server.masterhost) {
5478 sdsfree(server.masterhost);
5479 server.masterhost = NULL;
5480 if (server.master) freeClient(server.master);
5481 server.replstate = REDIS_REPL_NONE;
5482 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
5483 }
5484 } else {
5485 sdsfree(server.masterhost);
5486 server.masterhost = sdsdup(c->argv[1]->ptr);
5487 server.masterport = atoi(c->argv[2]->ptr);
5488 if (server.master) freeClient(server.master);
5489 server.replstate = REDIS_REPL_CONNECT;
5490 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
5491 server.masterhost, server.masterport);
5492 }
5493 addReply(c,shared.ok);
5494}
5495
3fd78bcd 5496/* ============================ Maxmemory directive ======================== */
5497
5498/* This function gets called when 'maxmemory' is set on the config file to limit
5499 * the max memory used by the server, and we are out of memory.
5500 * This function will try to, in order:
5501 *
5502 * - Free objects from the free list
5503 * - Try to remove keys with an EXPIRE set
5504 *
5505 * It is not possible to free enough memory to reach used-memory < maxmemory
5506 * the server will start refusing commands that will enlarge even more the
5507 * memory usage.
5508 */
5509static void freeMemoryIfNeeded(void) {
5510 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
5511 if (listLength(server.objfreelist)) {
5512 robj *o;
5513
5514 listNode *head = listFirst(server.objfreelist);
5515 o = listNodeValue(head);
5516 listDelNode(server.objfreelist,head);
5517 zfree(o);
5518 } else {
5519 int j, k, freed = 0;
5520
5521 for (j = 0; j < server.dbnum; j++) {
5522 int minttl = -1;
5523 robj *minkey = NULL;
5524 struct dictEntry *de;
5525
5526 if (dictSize(server.db[j].expires)) {
5527 freed = 1;
5528 /* From a sample of three keys drop the one nearest to
5529 * the natural expire */
5530 for (k = 0; k < 3; k++) {
5531 time_t t;
5532
5533 de = dictGetRandomKey(server.db[j].expires);
5534 t = (time_t) dictGetEntryVal(de);
5535 if (minttl == -1 || t < minttl) {
5536 minkey = dictGetEntryKey(de);
5537 minttl = t;
5538 }
5539 }
5540 deleteKey(server.db+j,minkey);
5541 }
5542 }
5543 if (!freed) return; /* nothing to free... */
5544 }
5545 }
5546}
5547
f80dff62 5548/* ============================== Append Only file ========================== */
5549
5550static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
5551 sds buf = sdsempty();
5552 int j;
5553 ssize_t nwritten;
5554 time_t now;
5555 robj *tmpargv[3];
5556
5557 /* The DB this command was targetting is not the same as the last command
5558 * we appendend. To issue a SELECT command is needed. */
5559 if (dictid != server.appendseldb) {
5560 char seldb[64];
5561
5562 snprintf(seldb,sizeof(seldb),"%d",dictid);
5563 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
5564 strlen(seldb),seldb);
5565 server.appendseldb = dictid;
5566 }
5567
5568 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
5569 * EXPIREs into EXPIREATs calls */
5570 if (cmd->proc == expireCommand) {
5571 long when;
5572
5573 tmpargv[0] = createStringObject("EXPIREAT",8);
5574 tmpargv[1] = argv[1];
5575 incrRefCount(argv[1]);
5576 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
5577 tmpargv[2] = createObject(REDIS_STRING,
5578 sdscatprintf(sdsempty(),"%ld",when));
5579 argv = tmpargv;
5580 }
5581
5582 /* Append the actual command */
5583 buf = sdscatprintf(buf,"*%d\r\n",argc);
5584 for (j = 0; j < argc; j++) {
5585 robj *o = argv[j];
5586
5587 if (o->encoding != REDIS_ENCODING_RAW)
5588 o = getDecodedObject(o);
5589 buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr));
5590 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
5591 buf = sdscatlen(buf,"\r\n",2);
5592 if (o != argv[j])
5593 decrRefCount(o);
5594 }
5595
5596 /* Free the objects from the modified argv for EXPIREAT */
5597 if (cmd->proc == expireCommand) {
5598 for (j = 0; j < 3; j++)
5599 decrRefCount(argv[j]);
5600 }
5601
5602 /* We want to perform a single write. This should be guaranteed atomic
5603 * at least if the filesystem we are writing is a real physical one.
5604 * While this will save us against the server being killed I don't think
5605 * there is much to do about the whole server stopping for power problems
5606 * or alike */
5607 nwritten = write(server.appendfd,buf,sdslen(buf));
5608 if (nwritten != (signed)sdslen(buf)) {
5609 /* Ooops, we are in troubles. The best thing to do for now is
5610 * to simply exit instead to give the illusion that everything is
5611 * working as expected. */
5612 if (nwritten == -1) {
5613 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
5614 } else {
5615 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
5616 }
5617 exit(1);
5618 }
5619 now = time(NULL);
5620 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
5621 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
5622 now-server.lastfsync > 1))
5623 {
5624 fsync(server.appendfd); /* Let's try to get this data on the disk */
5625 server.lastfsync = now;
5626 }
5627}
5628
5629/* In Redis commands are always executed in the context of a client, so in
5630 * order to load the append only file we need to create a fake client. */
5631static struct redisClient *createFakeClient(void) {
5632 struct redisClient *c = zmalloc(sizeof(*c));
5633
5634 selectDb(c,0);
5635 c->fd = -1;
5636 c->querybuf = sdsempty();
5637 c->argc = 0;
5638 c->argv = NULL;
5639 c->flags = 0;
9387d17d 5640 /* We set the fake client as a slave waiting for the synchronization
5641 * so that Redis will not try to send replies to this client. */
5642 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
f80dff62 5643 c->reply = listCreate();
5644 listSetFreeMethod(c->reply,decrRefCount);
5645 listSetDupMethod(c->reply,dupClientReplyValue);
5646 return c;
5647}
5648
5649static void freeFakeClient(struct redisClient *c) {
5650 sdsfree(c->querybuf);
5651 listRelease(c->reply);
5652 zfree(c);
5653}
5654
5655/* Replay the append log file. On error REDIS_OK is returned. On non fatal
5656 * error (the append only file is zero-length) REDIS_ERR is returned. On
5657 * fatal error an error message is logged and the program exists. */
5658int loadAppendOnlyFile(char *filename) {
5659 struct redisClient *fakeClient;
5660 FILE *fp = fopen(filename,"r");
5661 struct redis_stat sb;
5662
5663 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
5664 return REDIS_ERR;
5665
5666 if (fp == NULL) {
5667 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
5668 exit(1);
5669 }
5670
5671 fakeClient = createFakeClient();
5672 while(1) {
5673 int argc, j;
5674 unsigned long len;
5675 robj **argv;
5676 char buf[128];
5677 sds argsds;
5678 struct redisCommand *cmd;
5679
5680 if (fgets(buf,sizeof(buf),fp) == NULL) {
5681 if (feof(fp))
5682 break;
5683 else
5684 goto readerr;
5685 }
5686 if (buf[0] != '*') goto fmterr;
5687 argc = atoi(buf+1);
5688 argv = zmalloc(sizeof(robj*)*argc);
5689 for (j = 0; j < argc; j++) {
5690 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
5691 if (buf[0] != '$') goto fmterr;
5692 len = strtol(buf+1,NULL,10);
5693 argsds = sdsnewlen(NULL,len);
5694 if (fread(argsds,len,1,fp) == 0) goto fmterr;
5695 argv[j] = createObject(REDIS_STRING,argsds);
5696 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
5697 }
5698
5699 /* Command lookup */
5700 cmd = lookupCommand(argv[0]->ptr);
5701 if (!cmd) {
5702 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
5703 exit(1);
5704 }
5705 /* Try object sharing and encoding */
5706 if (server.shareobjects) {
5707 int j;
5708 for(j = 1; j < argc; j++)
5709 argv[j] = tryObjectSharing(argv[j]);
5710 }
5711 if (cmd->flags & REDIS_CMD_BULK)
5712 tryObjectEncoding(argv[argc-1]);
5713 /* Run the command in the context of a fake client */
5714 fakeClient->argc = argc;
5715 fakeClient->argv = argv;
5716 cmd->proc(fakeClient);
5717 /* Discard the reply objects list from the fake client */
5718 while(listLength(fakeClient->reply))
5719 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
5720 /* Clean up, ready for the next command */
5721 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
5722 zfree(argv);
5723 }
5724 fclose(fp);
5725 freeFakeClient(fakeClient);
5726 return REDIS_OK;
5727
5728readerr:
5729 if (feof(fp)) {
5730 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
5731 } else {
5732 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
5733 }
5734 exit(1);
5735fmterr:
5736 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
5737 exit(1);
5738}
5739
7f957c92 5740/* ================================= Debugging ============================== */
5741
5742static void debugCommand(redisClient *c) {
5743 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
5744 *((char*)-1) = 'x';
333298da 5745 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
5746 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
5747 robj *key, *val;
5748
5749 if (!de) {
5750 addReply(c,shared.nokeyerr);
5751 return;
5752 }
5753 key = dictGetEntryKey(de);
5754 val = dictGetEntryVal(de);
5755 addReplySds(c,sdscatprintf(sdsempty(),
942a3961 5756 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
5757 key, key->refcount, val, val->refcount, val->encoding));
7f957c92 5758 } else {
333298da 5759 addReplySds(c,sdsnew(
5760 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>]\r\n"));
7f957c92 5761 }
5762}
56906eef 5763
bcfc686d 5764/* =================================== Main! ================================ */
56906eef 5765
bcfc686d 5766#ifdef __linux__
5767int linuxOvercommitMemoryValue(void) {
5768 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
5769 char buf[64];
56906eef 5770
bcfc686d 5771 if (!fp) return -1;
5772 if (fgets(buf,64,fp) == NULL) {
5773 fclose(fp);
5774 return -1;
5775 }
5776 fclose(fp);
56906eef 5777
bcfc686d 5778 return atoi(buf);
5779}
5780
5781void linuxOvercommitMemoryWarning(void) {
5782 if (linuxOvercommitMemoryValue() == 0) {
5783 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
5784 }
5785}
5786#endif /* __linux__ */
5787
5788static void daemonize(void) {
5789 int fd;
5790 FILE *fp;
5791
5792 if (fork() != 0) exit(0); /* parent exits */
5793 setsid(); /* create a new session */
5794
5795 /* Every output goes to /dev/null. If Redis is daemonized but
5796 * the 'logfile' is set to 'stdout' in the configuration file
5797 * it will not log at all. */
5798 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
5799 dup2(fd, STDIN_FILENO);
5800 dup2(fd, STDOUT_FILENO);
5801 dup2(fd, STDERR_FILENO);
5802 if (fd > STDERR_FILENO) close(fd);
5803 }
5804 /* Try to write the pid file */
5805 fp = fopen(server.pidfile,"w");
5806 if (fp) {
5807 fprintf(fp,"%d\n",getpid());
5808 fclose(fp);
56906eef 5809 }
56906eef 5810}
5811
bcfc686d 5812int main(int argc, char **argv) {
5813 initServerConfig();
5814 if (argc == 2) {
5815 resetServerSaveParams();
5816 loadServerConfig(argv[1]);
5817 } else if (argc > 2) {
5818 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
5819 exit(1);
5820 } else {
5821 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
5822 }
5823 initServer();
5824 if (server.daemonize) daemonize();
5825 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
5826#ifdef __linux__
5827 linuxOvercommitMemoryWarning();
5828#endif
5829 if (server.appendonly) {
5830 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
5831 redisLog(REDIS_NOTICE,"DB loaded from append only file");
5832 } else {
5833 if (rdbLoad(server.dbfilename) == REDIS_OK)
5834 redisLog(REDIS_NOTICE,"DB loaded from disk");
5835 }
5836 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
5837 acceptHandler, NULL, NULL) == AE_ERR) oom("creating file event");
5838 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
5839 aeMain(server.el);
5840 aeDeleteEventLoop(server.el);
5841 return 0;
5842}
5843
5844/* ============================= Backtrace support ========================= */
5845
5846#ifdef HAVE_BACKTRACE
5847static char *findFuncName(void *pointer, unsigned long *offset);
5848
56906eef 5849static void *getMcontextEip(ucontext_t *uc) {
5850#if defined(__FreeBSD__)
5851 return (void*) uc->uc_mcontext.mc_eip;
5852#elif defined(__dietlibc__)
5853 return (void*) uc->uc_mcontext.eip;
06db1f50 5854#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
56906eef 5855 return (void*) uc->uc_mcontext->__ss.__eip;
06db1f50 5856#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 5857 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 5858 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 5859 #else
5860 return (void*) uc->uc_mcontext->__ss.__eip;
5861 #endif
b91cf5ef 5862#elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
56906eef 5863 return (void*) uc->uc_mcontext.gregs[REG_EIP];
b91cf5ef 5864#elif defined(__ia64__) /* Linux IA64 */
5865 return (void*) uc->uc_mcontext.sc_ip;
5866#else
5867 return NULL;
56906eef 5868#endif
5869}
5870
5871static void segvHandler(int sig, siginfo_t *info, void *secret) {
5872 void *trace[100];
5873 char **messages = NULL;
5874 int i, trace_size = 0;
5875 unsigned long offset=0;
5876 time_t uptime = time(NULL)-server.stat_starttime;
5877 ucontext_t *uc = (ucontext_t*) secret;
5878 REDIS_NOTUSED(info);
5879
5880 redisLog(REDIS_WARNING,
5881 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
5882 redisLog(REDIS_WARNING, "%s", sdscatprintf(sdsempty(),
433cc893 5883 "redis_version:%s; "
56906eef 5884 "uptime_in_seconds:%d; "
433cc893 5885 "connected_clients:%d; "
5886 "connected_slaves:%d; "
5887 "used_memory:%zu; "
5888 "changes_since_last_save:%lld; "
5889 "bgsave_in_progress:%d; "
5890 "last_save_time:%d; "
5891 "total_connections_received:%lld; "
5892 "total_commands_processed:%lld; "
5893 "role:%s;"
5894 ,REDIS_VERSION,
5895 uptime,
5896 listLength(server.clients)-listLength(server.slaves),
5897 listLength(server.slaves),
5898 server.usedmemory,
5899 server.dirty,
5900 server.bgsaveinprogress,
5901 server.lastsave,
5902 server.stat_numconnections,
5903 server.stat_numcommands,
5904 server.masterhost == NULL ? "master" : "slave"
5905 ));
56906eef 5906
5907 trace_size = backtrace(trace, 100);
de96dbfe 5908 /* overwrite sigaction with caller's address */
b91cf5ef 5909 if (getMcontextEip(uc) != NULL) {
5910 trace[1] = getMcontextEip(uc);
5911 }
56906eef 5912 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 5913
d76412d1 5914 for (i=1; i<trace_size; ++i) {
56906eef 5915 char *fn = findFuncName(trace[i], &offset), *p;
5916
5917 p = strchr(messages[i],'+');
5918 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
5919 redisLog(REDIS_WARNING,"%s", messages[i]);
5920 } else {
5921 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
5922 }
5923 }
5924 free(messages);
5925 exit(0);
fe3bbfbe 5926}
56906eef 5927
5928static void setupSigSegvAction(void) {
5929 struct sigaction act;
5930
5931 sigemptyset (&act.sa_mask);
5932 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
5933 * is used. Otherwise, sa_handler is used */
5934 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
5935 act.sa_sigaction = segvHandler;
5936 sigaction (SIGSEGV, &act, NULL);
5937 sigaction (SIGBUS, &act, NULL);
12fea928 5938 sigaction (SIGFPE, &act, NULL);
5939 sigaction (SIGILL, &act, NULL);
5940 sigaction (SIGBUS, &act, NULL);
e65fdc78 5941 return;
56906eef 5942}
e65fdc78 5943
bcfc686d 5944#include "staticsymbols.h"
5945/* This function try to convert a pointer into a function name. It's used in
5946 * oreder to provide a backtrace under segmentation fault that's able to
5947 * display functions declared as static (otherwise the backtrace is useless). */
5948static char *findFuncName(void *pointer, unsigned long *offset){
5949 int i, ret = -1;
5950 unsigned long off, minoff = 0;
ed9b544e 5951
bcfc686d 5952 /* Try to match against the Symbol with the smallest offset */
5953 for (i=0; symsTable[i].pointer; i++) {
5954 unsigned long lp = (unsigned long) pointer;
0bc03378 5955
bcfc686d 5956 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
5957 off=lp-symsTable[i].pointer;
5958 if (ret < 0 || off < minoff) {
5959 minoff=off;
5960 ret=i;
5961 }
5962 }
0bc03378 5963 }
bcfc686d 5964 if (ret == -1) return NULL;
5965 *offset = minoff;
5966 return symsTable[ret].name;
0bc03378 5967}
bcfc686d 5968#else /* HAVE_BACKTRACE */
5969static void setupSigSegvAction(void) {
0bc03378 5970}
bcfc686d 5971#endif /* HAVE_BACKTRACE */
0bc03378 5972
ed9b544e 5973
ed9b544e 5974
bcfc686d 5975/* The End */
5976
5977
ed9b544e 5978