]> git.saurik.com Git - redis.git/blame - redis.c
support for appendonly mode no, always, everysec
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
1812e024 30#define REDIS_VERSION "1.050"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
c9468bcf 40#define __USE_POSIX199309
ed9b544e 41#include <signal.h>
fbf9bcdb 42
43#ifdef HAVE_BACKTRACE
c9468bcf 44#include <execinfo.h>
45#include <ucontext.h>
fbf9bcdb 46#endif /* HAVE_BACKTRACE */
47
ed9b544e 48#include <sys/wait.h>
49#include <errno.h>
50#include <assert.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <inttypes.h>
54#include <arpa/inet.h>
55#include <sys/stat.h>
56#include <fcntl.h>
57#include <sys/time.h>
58#include <sys/resource.h>
f78fd11b 59#include <limits.h>
a7866db6 60#include <math.h>
0bc1b2f6 61
62#if defined(__sun)
5043dff3 63#include "solarisfixes.h"
64#endif
ed9b544e 65
c9468bcf 66#include "redis.h"
ed9b544e 67#include "ae.h" /* Event driven programming library */
68#include "sds.h" /* Dynamic safe strings */
69#include "anet.h" /* Networking the easy way */
70#include "dict.h" /* Hash tables */
71#include "adlist.h" /* Linked lists */
72#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 73#include "lzf.h" /* LZF compression library */
74#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ed9b544e 75
76/* Error codes */
77#define REDIS_OK 0
78#define REDIS_ERR -1
79
80/* Static server configuration */
81#define REDIS_SERVERPORT 6379 /* TCP port */
82#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 83#define REDIS_IOBUF_LEN 1024
ed9b544e 84#define REDIS_LOADBUF_LEN 1024
93ea3759 85#define REDIS_STATIC_ARGS 4
ed9b544e 86#define REDIS_DEFAULT_DBNUM 16
87#define REDIS_CONFIGLINE_MAX 1024
88#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
89#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94754ccc 90#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
6f376729 91#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
cd194638 92#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
ed9b544e 93
94/* Hash table parameters */
95#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 96
97/* Command flags */
3fd78bcd 98#define REDIS_CMD_BULK 1 /* Bulk write command */
99#define REDIS_CMD_INLINE 2 /* Inline command */
100/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
101 this flags will return an error when the 'maxmemory' option is set in the
102 config file and the server is using more than maxmemory bytes of memory.
103 In short this commands are denied on low memory conditions. */
104#define REDIS_CMD_DENYOOM 4
ed9b544e 105
106/* Object types */
107#define REDIS_STRING 0
108#define REDIS_LIST 1
109#define REDIS_SET 2
1812e024 110#define REDIS_ZSET 3
111#define REDIS_HASH 4
f78fd11b 112
942a3961 113/* Objects encoding */
114#define REDIS_ENCODING_RAW 0 /* Raw representation */
115#define REDIS_ENCODING_INT 1 /* Encoded as integer */
116
f78fd11b 117/* Object types only used for dumping to disk */
bb32ede5 118#define REDIS_EXPIRETIME 253
ed9b544e 119#define REDIS_SELECTDB 254
120#define REDIS_EOF 255
121
f78fd11b 122/* Defines related to the dump file format. To store 32 bits lengths for short
123 * keys requires a lot of space, so we check the most significant 2 bits of
124 * the first byte to interpreter the length:
125 *
126 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
127 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
128 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 129 * 11|000000 this means: specially encoded object will follow. The six bits
130 * number specify the kind of object that follows.
131 * See the REDIS_RDB_ENC_* defines.
f78fd11b 132 *
10c43610 133 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
134 * values, will fit inside. */
f78fd11b 135#define REDIS_RDB_6BITLEN 0
136#define REDIS_RDB_14BITLEN 1
137#define REDIS_RDB_32BITLEN 2
17be1a4a 138#define REDIS_RDB_ENCVAL 3
f78fd11b 139#define REDIS_RDB_LENERR UINT_MAX
140
a4d1ba9a 141/* When a length of a string object stored on disk has the first two bits
142 * set, the remaining two bits specify a special encoding for the object
143 * accordingly to the following defines: */
144#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
145#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
146#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 147#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 148
ed9b544e 149/* Client flags */
150#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
151#define REDIS_SLAVE 2 /* This client is a slave server */
152#define REDIS_MASTER 4 /* This client is a master server */
87eca727 153#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
ed9b544e 154
40d224a9 155/* Slave replication state - slave side */
ed9b544e 156#define REDIS_REPL_NONE 0 /* No active replication */
157#define REDIS_REPL_CONNECT 1 /* Must connect to master */
158#define REDIS_REPL_CONNECTED 2 /* Connected to master */
159
40d224a9 160/* Slave replication state - from the point of view of master
161 * Note that in SEND_BULK and ONLINE state the slave receives new updates
162 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
163 * to start the next background saving in order to send updates to it. */
164#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
165#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
166#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
167#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
168
ed9b544e 169/* List related stuff */
170#define REDIS_HEAD 0
171#define REDIS_TAIL 1
172
173/* Sort operations */
174#define REDIS_SORT_GET 0
175#define REDIS_SORT_DEL 1
176#define REDIS_SORT_INCR 2
177#define REDIS_SORT_DECR 3
178#define REDIS_SORT_ASC 4
179#define REDIS_SORT_DESC 5
180#define REDIS_SORTKEY_MAX 1024
181
182/* Log levels */
183#define REDIS_DEBUG 0
184#define REDIS_NOTICE 1
185#define REDIS_WARNING 2
186
187/* Anti-warning macro... */
188#define REDIS_NOTUSED(V) ((void) V)
189
6b47e12e 190#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
191#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 192
48f0308a 193/* Append only defines */
194#define APPENDFSYNC_NO 0
195#define APPENDFSYNC_ALWAYS 1
196#define APPENDFSYNC_EVERYSEC 2
197
ed9b544e 198/*================================= Data types ============================== */
199
200/* A redis object, that is a type able to hold a string / list / set */
201typedef struct redisObject {
ed9b544e 202 void *ptr;
942a3961 203 unsigned char type;
204 unsigned char encoding;
205 unsigned char notused[2];
ed9b544e 206 int refcount;
207} robj;
208
3305306f 209typedef struct redisDb {
210 dict *dict;
211 dict *expires;
212 int id;
213} redisDb;
214
ed9b544e 215/* With multiplexing we need to take per-clinet state.
216 * Clients are taken in a liked list. */
217typedef struct redisClient {
218 int fd;
3305306f 219 redisDb *db;
ed9b544e 220 int dictid;
221 sds querybuf;
e8a74421 222 robj **argv, **mbargv;
223 int argc, mbargc;
40d224a9 224 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 225 int multibulk; /* multi bulk command format active */
ed9b544e 226 list *reply;
227 int sentlen;
228 time_t lastinteraction; /* time of the last interaction, used for timeout */
40d224a9 229 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
230 int slaveseldb; /* slave selected db, if this client is a slave */
231 int authenticated; /* when requirepass is non-NULL */
232 int replstate; /* replication state if this is a slave */
233 int repldbfd; /* replication DB file descriptor */
6208b3a7 234 long repldboff; /* replication DB file offset */
40d224a9 235 off_t repldbsize; /* replication DB file size */
ed9b544e 236} redisClient;
237
238struct saveparam {
239 time_t seconds;
240 int changes;
241};
242
243/* Global server state structure */
244struct redisServer {
245 int port;
246 int fd;
3305306f 247 redisDb *db;
10c43610 248 dict *sharingpool;
249 unsigned int sharingpoolsize;
ed9b544e 250 long long dirty; /* changes to DB from the last save */
251 list *clients;
87eca727 252 list *slaves, *monitors;
ed9b544e 253 char neterr[ANET_ERR_LEN];
254 aeEventLoop *el;
255 int cronloops; /* number of times the cron function run */
256 list *objfreelist; /* A list of freed objects to avoid malloc() */
257 time_t lastsave; /* Unix time of last save succeeede */
5fba9f71 258 size_t usedmemory; /* Used memory in megabytes */
ed9b544e 259 /* Fields used only for stats */
260 time_t stat_starttime; /* server start time */
261 long long stat_numcommands; /* number of processed commands */
262 long long stat_numconnections; /* number of connections received */
263 /* Configuration */
264 int verbosity;
265 int glueoutputbuf;
266 int maxidletime;
267 int dbnum;
268 int daemonize;
44b38ef4 269 int appendonly;
48f0308a 270 int appendfsync;
271 time_t lastfsync;
44b38ef4 272 int appendfd;
273 int appendseldb;
ed329fcf 274 char *pidfile;
ed9b544e 275 int bgsaveinprogress;
9f3c422c 276 pid_t bgsavechildpid;
ed9b544e 277 struct saveparam *saveparams;
278 int saveparamslen;
279 char *logfile;
280 char *bindaddr;
281 char *dbfilename;
44b38ef4 282 char *appendfilename;
abcb223e 283 char *requirepass;
10c43610 284 int shareobjects;
ed9b544e 285 /* Replication related */
286 int isslave;
287 char *masterhost;
288 int masterport;
40d224a9 289 redisClient *master; /* client that is master for this slave */
ed9b544e 290 int replstate;
285add55 291 unsigned int maxclients;
d4465900 292 unsigned long maxmemory;
ed9b544e 293 /* Sort parameters - qsort_r() is only available under BSD so we
294 * have to take this state global, in order to pass it to sortCompare() */
295 int sort_desc;
296 int sort_alpha;
297 int sort_bypattern;
298};
299
300typedef void redisCommandProc(redisClient *c);
301struct redisCommand {
302 char *name;
303 redisCommandProc *proc;
304 int arity;
305 int flags;
306};
307
de96dbfe 308struct redisFunctionSym {
309 char *name;
56906eef 310 unsigned long pointer;
de96dbfe 311};
312
ed9b544e 313typedef struct _redisSortObject {
314 robj *obj;
315 union {
316 double score;
317 robj *cmpobj;
318 } u;
319} redisSortObject;
320
321typedef struct _redisSortOperation {
322 int type;
323 robj *pattern;
324} redisSortOperation;
325
6b47e12e 326/* ZSETs use a specialized version of Skiplists */
327
328typedef struct zskiplistNode {
329 struct zskiplistNode **forward;
e3870fab 330 struct zskiplistNode *backward;
6b47e12e 331 double score;
332 robj *obj;
333} zskiplistNode;
334
335typedef struct zskiplist {
e3870fab 336 struct zskiplistNode *header, *tail;
d13f767c 337 unsigned long length;
6b47e12e 338 int level;
339} zskiplist;
340
1812e024 341typedef struct zset {
342 dict *dict;
6b47e12e 343 zskiplist *zsl;
1812e024 344} zset;
345
6b47e12e 346/* Our shared "common" objects */
347
ed9b544e 348struct sharedObjectsStruct {
c937aa89 349 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
7b45bfb2 350 *colon, *nullbulk, *nullmultibulk,
c937aa89 351 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
352 *outofrangeerr, *plus,
ed9b544e 353 *select0, *select1, *select2, *select3, *select4,
354 *select5, *select6, *select7, *select8, *select9;
355} shared;
356
a7866db6 357/* Global vars that are actally used as constants. The following double
358 * values are used for double on-disk serialization, and are initialized
359 * at runtime to avoid strange compiler optimizations. */
360
361static double R_Zero, R_PosInf, R_NegInf, R_Nan;
362
ed9b544e 363/*================================ Prototypes =============================== */
364
365static void freeStringObject(robj *o);
366static void freeListObject(robj *o);
367static void freeSetObject(robj *o);
368static void decrRefCount(void *o);
369static robj *createObject(int type, void *ptr);
370static void freeClient(redisClient *c);
f78fd11b 371static int rdbLoad(char *filename);
ed9b544e 372static void addReply(redisClient *c, robj *obj);
373static void addReplySds(redisClient *c, sds s);
374static void incrRefCount(robj *o);
f78fd11b 375static int rdbSaveBackground(char *filename);
ed9b544e 376static robj *createStringObject(char *ptr, size_t len);
87eca727 377static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
44b38ef4 378static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 379static int syncWithMaster(void);
10c43610 380static robj *tryObjectSharing(robj *o);
942a3961 381static int tryObjectEncoding(robj *o);
382static robj *getDecodedObject(const robj *o);
3305306f 383static int removeExpire(redisDb *db, robj *key);
384static int expireIfNeeded(redisDb *db, robj *key);
385static int deleteIfVolatile(redisDb *db, robj *key);
94754ccc 386static int deleteKey(redisDb *db, robj *key);
bb32ede5 387static time_t getExpire(redisDb *db, robj *key);
388static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 389static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 390static void freeMemoryIfNeeded(void);
de96dbfe 391static int processCommand(redisClient *c);
56906eef 392static void setupSigSegvAction(void);
a3b21203 393static void rdbRemoveTempFile(pid_t childpid);
0ea663ea 394static size_t stringObjectLen(robj *o);
638e42ac 395static void processInputBuffer(redisClient *c);
6b47e12e 396static zskiplist *zslCreate(void);
fd8ccf44 397static void zslFree(zskiplist *zsl);
2b59cfdf 398static void zslInsert(zskiplist *zsl, double score, robj *obj);
ed9b544e 399
abcb223e 400static void authCommand(redisClient *c);
ed9b544e 401static void pingCommand(redisClient *c);
402static void echoCommand(redisClient *c);
403static void setCommand(redisClient *c);
404static void setnxCommand(redisClient *c);
405static void getCommand(redisClient *c);
406static void delCommand(redisClient *c);
407static void existsCommand(redisClient *c);
408static void incrCommand(redisClient *c);
409static void decrCommand(redisClient *c);
410static void incrbyCommand(redisClient *c);
411static void decrbyCommand(redisClient *c);
412static void selectCommand(redisClient *c);
413static void randomkeyCommand(redisClient *c);
414static void keysCommand(redisClient *c);
415static void dbsizeCommand(redisClient *c);
416static void lastsaveCommand(redisClient *c);
417static void saveCommand(redisClient *c);
418static void bgsaveCommand(redisClient *c);
419static void shutdownCommand(redisClient *c);
420static void moveCommand(redisClient *c);
421static void renameCommand(redisClient *c);
422static void renamenxCommand(redisClient *c);
423static void lpushCommand(redisClient *c);
424static void rpushCommand(redisClient *c);
425static void lpopCommand(redisClient *c);
426static void rpopCommand(redisClient *c);
427static void llenCommand(redisClient *c);
428static void lindexCommand(redisClient *c);
429static void lrangeCommand(redisClient *c);
430static void ltrimCommand(redisClient *c);
431static void typeCommand(redisClient *c);
432static void lsetCommand(redisClient *c);
433static void saddCommand(redisClient *c);
434static void sremCommand(redisClient *c);
a4460ef4 435static void smoveCommand(redisClient *c);
ed9b544e 436static void sismemberCommand(redisClient *c);
437static void scardCommand(redisClient *c);
12fea928 438static void spopCommand(redisClient *c);
2abb95a9 439static void srandmemberCommand(redisClient *c);
ed9b544e 440static void sinterCommand(redisClient *c);
441static void sinterstoreCommand(redisClient *c);
40d224a9 442static void sunionCommand(redisClient *c);
443static void sunionstoreCommand(redisClient *c);
f4f56e1d 444static void sdiffCommand(redisClient *c);
445static void sdiffstoreCommand(redisClient *c);
ed9b544e 446static void syncCommand(redisClient *c);
447static void flushdbCommand(redisClient *c);
448static void flushallCommand(redisClient *c);
449static void sortCommand(redisClient *c);
450static void lremCommand(redisClient *c);
451static void infoCommand(redisClient *c);
70003d28 452static void mgetCommand(redisClient *c);
87eca727 453static void monitorCommand(redisClient *c);
3305306f 454static void expireCommand(redisClient *c);
802e8373 455static void expireatCommand(redisClient *c);
f6b141c5 456static void getsetCommand(redisClient *c);
fd88489a 457static void ttlCommand(redisClient *c);
321b0e13 458static void slaveofCommand(redisClient *c);
7f957c92 459static void debugCommand(redisClient *c);
f6b141c5 460static void msetCommand(redisClient *c);
461static void msetnxCommand(redisClient *c);
fd8ccf44 462static void zaddCommand(redisClient *c);
cc812361 463static void zrangeCommand(redisClient *c);
50c55df5 464static void zrangebyscoreCommand(redisClient *c);
e3870fab 465static void zrevrangeCommand(redisClient *c);
3c41331e 466static void zcardCommand(redisClient *c);
1b7106e7 467static void zremCommand(redisClient *c);
6e333bbe 468static void zscoreCommand(redisClient *c);
1807985b 469static void zremrangebyscoreCommand(redisClient *c);
f6b141c5 470
ed9b544e 471/*================================= Globals ================================= */
472
473/* Global vars */
474static struct redisServer server; /* server global state */
475static struct redisCommand cmdTable[] = {
476 {"get",getCommand,2,REDIS_CMD_INLINE},
3fd78bcd 477 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
478 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
5109cdff 479 {"del",delCommand,-2,REDIS_CMD_INLINE},
ed9b544e 480 {"exists",existsCommand,2,REDIS_CMD_INLINE},
3fd78bcd 481 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
482 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
70003d28 483 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
3fd78bcd 484 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
485 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 486 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
487 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
488 {"llen",llenCommand,2,REDIS_CMD_INLINE},
489 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
3fd78bcd 490 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 491 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
492 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
493 {"lrem",lremCommand,4,REDIS_CMD_BULK},
3fd78bcd 494 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 495 {"srem",sremCommand,3,REDIS_CMD_BULK},
a4460ef4 496 {"smove",smoveCommand,4,REDIS_CMD_BULK},
ed9b544e 497 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
498 {"scard",scardCommand,2,REDIS_CMD_INLINE},
12fea928 499 {"spop",spopCommand,2,REDIS_CMD_INLINE},
2abb95a9 500 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
3fd78bcd 501 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
502 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
503 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
504 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
505 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
506 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 507 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
fd8ccf44 508 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
1b7106e7 509 {"zrem",zremCommand,3,REDIS_CMD_BULK},
1807985b 510 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
cc812361 511 {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
50c55df5 512 {"zrangebyscore",zrangebyscoreCommand,4,REDIS_CMD_INLINE},
e3870fab 513 {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
3c41331e 514 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
6e333bbe 515 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 516 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
517 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
f6b141c5 518 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
519 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
520 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 521 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
522 {"select",selectCommand,2,REDIS_CMD_INLINE},
523 {"move",moveCommand,3,REDIS_CMD_INLINE},
524 {"rename",renameCommand,3,REDIS_CMD_INLINE},
525 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
321b0e13 526 {"expire",expireCommand,3,REDIS_CMD_INLINE},
802e8373 527 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
ed9b544e 528 {"keys",keysCommand,2,REDIS_CMD_INLINE},
529 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
abcb223e 530 {"auth",authCommand,2,REDIS_CMD_INLINE},
ed9b544e 531 {"ping",pingCommand,1,REDIS_CMD_INLINE},
532 {"echo",echoCommand,2,REDIS_CMD_BULK},
533 {"save",saveCommand,1,REDIS_CMD_INLINE},
534 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
535 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
536 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
537 {"type",typeCommand,2,REDIS_CMD_INLINE},
538 {"sync",syncCommand,1,REDIS_CMD_INLINE},
539 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
540 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
3fd78bcd 541 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 542 {"info",infoCommand,1,REDIS_CMD_INLINE},
87eca727 543 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
fd88489a 544 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
321b0e13 545 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
7f957c92 546 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
ed9b544e 547 {NULL,NULL,0,0}
548};
ed9b544e 549/*============================ Utility functions ============================ */
550
551/* Glob-style pattern matching. */
552int stringmatchlen(const char *pattern, int patternLen,
553 const char *string, int stringLen, int nocase)
554{
555 while(patternLen) {
556 switch(pattern[0]) {
557 case '*':
558 while (pattern[1] == '*') {
559 pattern++;
560 patternLen--;
561 }
562 if (patternLen == 1)
563 return 1; /* match */
564 while(stringLen) {
565 if (stringmatchlen(pattern+1, patternLen-1,
566 string, stringLen, nocase))
567 return 1; /* match */
568 string++;
569 stringLen--;
570 }
571 return 0; /* no match */
572 break;
573 case '?':
574 if (stringLen == 0)
575 return 0; /* no match */
576 string++;
577 stringLen--;
578 break;
579 case '[':
580 {
581 int not, match;
582
583 pattern++;
584 patternLen--;
585 not = pattern[0] == '^';
586 if (not) {
587 pattern++;
588 patternLen--;
589 }
590 match = 0;
591 while(1) {
592 if (pattern[0] == '\\') {
593 pattern++;
594 patternLen--;
595 if (pattern[0] == string[0])
596 match = 1;
597 } else if (pattern[0] == ']') {
598 break;
599 } else if (patternLen == 0) {
600 pattern--;
601 patternLen++;
602 break;
603 } else if (pattern[1] == '-' && patternLen >= 3) {
604 int start = pattern[0];
605 int end = pattern[2];
606 int c = string[0];
607 if (start > end) {
608 int t = start;
609 start = end;
610 end = t;
611 }
612 if (nocase) {
613 start = tolower(start);
614 end = tolower(end);
615 c = tolower(c);
616 }
617 pattern += 2;
618 patternLen -= 2;
619 if (c >= start && c <= end)
620 match = 1;
621 } else {
622 if (!nocase) {
623 if (pattern[0] == string[0])
624 match = 1;
625 } else {
626 if (tolower((int)pattern[0]) == tolower((int)string[0]))
627 match = 1;
628 }
629 }
630 pattern++;
631 patternLen--;
632 }
633 if (not)
634 match = !match;
635 if (!match)
636 return 0; /* no match */
637 string++;
638 stringLen--;
639 break;
640 }
641 case '\\':
642 if (patternLen >= 2) {
643 pattern++;
644 patternLen--;
645 }
646 /* fall through */
647 default:
648 if (!nocase) {
649 if (pattern[0] != string[0])
650 return 0; /* no match */
651 } else {
652 if (tolower((int)pattern[0]) != tolower((int)string[0]))
653 return 0; /* no match */
654 }
655 string++;
656 stringLen--;
657 break;
658 }
659 pattern++;
660 patternLen--;
661 if (stringLen == 0) {
662 while(*pattern == '*') {
663 pattern++;
664 patternLen--;
665 }
666 break;
667 }
668 }
669 if (patternLen == 0 && stringLen == 0)
670 return 1;
671 return 0;
672}
673
56906eef 674static void redisLog(int level, const char *fmt, ...) {
ed9b544e 675 va_list ap;
676 FILE *fp;
677
678 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
679 if (!fp) return;
680
681 va_start(ap, fmt);
682 if (level >= server.verbosity) {
683 char *c = ".-*";
1904ecc1 684 char buf[64];
685 time_t now;
686
687 now = time(NULL);
6c9385e0 688 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
1904ecc1 689 fprintf(fp,"%s %c ",buf,c[level]);
ed9b544e 690 vfprintf(fp, fmt, ap);
691 fprintf(fp,"\n");
692 fflush(fp);
693 }
694 va_end(ap);
695
696 if (server.logfile) fclose(fp);
697}
698
699/*====================== Hash table type implementation ==================== */
700
701/* This is an hash table type that uses the SDS dynamic strings libary as
702 * keys and radis objects as values (objects can hold SDS strings,
703 * lists, sets). */
704
1812e024 705static void dictVanillaFree(void *privdata, void *val)
706{
707 DICT_NOTUSED(privdata);
708 zfree(val);
709}
710
ed9b544e 711static int sdsDictKeyCompare(void *privdata, const void *key1,
712 const void *key2)
713{
714 int l1,l2;
715 DICT_NOTUSED(privdata);
716
717 l1 = sdslen((sds)key1);
718 l2 = sdslen((sds)key2);
719 if (l1 != l2) return 0;
720 return memcmp(key1, key2, l1) == 0;
721}
722
723static void dictRedisObjectDestructor(void *privdata, void *val)
724{
725 DICT_NOTUSED(privdata);
726
727 decrRefCount(val);
728}
729
942a3961 730static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 731 const void *key2)
732{
733 const robj *o1 = key1, *o2 = key2;
734 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
735}
736
942a3961 737static unsigned int dictObjHash(const void *key) {
ed9b544e 738 const robj *o = key;
739 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
740}
741
942a3961 742static int dictEncObjKeyCompare(void *privdata, const void *key1,
743 const void *key2)
744{
745 const robj *o1 = key1, *o2 = key2;
746
747 if (o1->encoding == REDIS_ENCODING_RAW &&
748 o2->encoding == REDIS_ENCODING_RAW)
749 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
750 else {
751 robj *dec1, *dec2;
752 int cmp;
753
754 dec1 = o1->encoding != REDIS_ENCODING_RAW ?
755 getDecodedObject(o1) : (robj*)o1;
756 dec2 = o2->encoding != REDIS_ENCODING_RAW ?
757 getDecodedObject(o2) : (robj*)o2;
758 cmp = sdsDictKeyCompare(privdata,dec1->ptr,dec2->ptr);
759 if (dec1 != o1) decrRefCount(dec1);
760 if (dec2 != o2) decrRefCount(dec2);
761 return cmp;
762 }
763}
764
765static unsigned int dictEncObjHash(const void *key) {
766 const robj *o = key;
767
768 if (o->encoding == REDIS_ENCODING_RAW)
769 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
770 else {
771 robj *dec = getDecodedObject(o);
772 unsigned int hash = dictGenHashFunction(dec->ptr, sdslen((sds)dec->ptr));
773 decrRefCount(dec);
774 return hash;
775 }
776}
777
ed9b544e 778static dictType setDictType = {
942a3961 779 dictEncObjHash, /* hash function */
ed9b544e 780 NULL, /* key dup */
781 NULL, /* val dup */
942a3961 782 dictEncObjKeyCompare, /* key compare */
ed9b544e 783 dictRedisObjectDestructor, /* key destructor */
784 NULL /* val destructor */
785};
786
1812e024 787static dictType zsetDictType = {
788 dictEncObjHash, /* hash function */
789 NULL, /* key dup */
790 NULL, /* val dup */
791 dictEncObjKeyCompare, /* key compare */
792 dictRedisObjectDestructor, /* key destructor */
793 dictVanillaFree /* val destructor */
794};
795
ed9b544e 796static dictType hashDictType = {
942a3961 797 dictObjHash, /* hash function */
ed9b544e 798 NULL, /* key dup */
799 NULL, /* val dup */
942a3961 800 dictObjKeyCompare, /* key compare */
ed9b544e 801 dictRedisObjectDestructor, /* key destructor */
802 dictRedisObjectDestructor /* val destructor */
803};
804
805/* ========================= Random utility functions ======================= */
806
807/* Redis generally does not try to recover from out of memory conditions
808 * when allocating objects or strings, it is not clear if it will be possible
809 * to report this condition to the client since the networking layer itself
810 * is based on heap allocation for send buffers, so we simply abort.
811 * At least the code will be simpler to read... */
812static void oom(const char *msg) {
813 fprintf(stderr, "%s: Out of memory\n",msg);
814 fflush(stderr);
815 sleep(1);
816 abort();
817}
818
819/* ====================== Redis server networking stuff ===================== */
56906eef 820static void closeTimedoutClients(void) {
ed9b544e 821 redisClient *c;
ed9b544e 822 listNode *ln;
823 time_t now = time(NULL);
824
6208b3a7 825 listRewind(server.clients);
826 while ((ln = listYield(server.clients)) != NULL) {
ed9b544e 827 c = listNodeValue(ln);
828 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 829 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
ed9b544e 830 (now - c->lastinteraction > server.maxidletime)) {
831 redisLog(REDIS_DEBUG,"Closing idle client");
832 freeClient(c);
833 }
834 }
ed9b544e 835}
836
12fea928 837static int htNeedsResize(dict *dict) {
838 long long size, used;
839
840 size = dictSlots(dict);
841 used = dictSize(dict);
842 return (size && used && size > DICT_HT_INITIAL_SIZE &&
843 (used*100/size < REDIS_HT_MINFILL));
844}
845
0bc03378 846/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
847 * we resize the hash table to save memory */
56906eef 848static void tryResizeHashTables(void) {
0bc03378 849 int j;
850
851 for (j = 0; j < server.dbnum; j++) {
12fea928 852 if (htNeedsResize(server.db[j].dict)) {
853 redisLog(REDIS_DEBUG,"The hash table %d is too sparse, resize it...",j);
0bc03378 854 dictResize(server.db[j].dict);
12fea928 855 redisLog(REDIS_DEBUG,"Hash table %d resized.",j);
0bc03378 856 }
12fea928 857 if (htNeedsResize(server.db[j].expires))
858 dictResize(server.db[j].expires);
0bc03378 859 }
860}
861
56906eef 862static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 863 int j, loops = server.cronloops++;
ed9b544e 864 REDIS_NOTUSED(eventLoop);
865 REDIS_NOTUSED(id);
866 REDIS_NOTUSED(clientData);
867
868 /* Update the global state with the amount of used memory */
869 server.usedmemory = zmalloc_used_memory();
870
0bc03378 871 /* Show some info about non-empty databases */
ed9b544e 872 for (j = 0; j < server.dbnum; j++) {
dec423d9 873 long long size, used, vkeys;
94754ccc 874
3305306f 875 size = dictSlots(server.db[j].dict);
876 used = dictSize(server.db[j].dict);
94754ccc 877 vkeys = dictSize(server.db[j].expires);
c3cb078d 878 if (!(loops % 5) && (used || vkeys)) {
879 redisLog(REDIS_DEBUG,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 880 /* dictPrintStats(server.dict); */
ed9b544e 881 }
ed9b544e 882 }
883
0bc03378 884 /* We don't want to resize the hash tables while a bacground saving
885 * is in progress: the saving child is created using fork() that is
886 * implemented with a copy-on-write semantic in most modern systems, so
887 * if we resize the HT while there is the saving child at work actually
888 * a lot of memory movements in the parent will cause a lot of pages
889 * copied. */
890 if (!server.bgsaveinprogress) tryResizeHashTables();
891
ed9b544e 892 /* Show information about connected clients */
893 if (!(loops % 5)) {
21aecf4b 894 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
ed9b544e 895 listLength(server.clients)-listLength(server.slaves),
896 listLength(server.slaves),
10c43610 897 server.usedmemory,
3305306f 898 dictSize(server.sharingpool));
ed9b544e 899 }
900
901 /* Close connections of timedout clients */
0150db36 902 if (server.maxidletime && !(loops % 10))
ed9b544e 903 closeTimedoutClients();
904
905 /* Check if a background saving in progress terminated */
906 if (server.bgsaveinprogress) {
907 int statloc;
908 if (wait4(-1,&statloc,WNOHANG,NULL)) {
909 int exitcode = WEXITSTATUS(statloc);
9f3c422c 910 int bysignal = WIFSIGNALED(statloc);
911
912 if (!bysignal && exitcode == 0) {
ed9b544e 913 redisLog(REDIS_NOTICE,
914 "Background saving terminated with success");
915 server.dirty = 0;
916 server.lastsave = time(NULL);
9f3c422c 917 } else if (!bysignal && exitcode != 0) {
918 redisLog(REDIS_WARNING, "Background saving error");
ed9b544e 919 } else {
920 redisLog(REDIS_WARNING,
9f3c422c 921 "Background saving terminated by signal");
a3b21203 922 rdbRemoveTempFile(server.bgsavechildpid);
ed9b544e 923 }
924 server.bgsaveinprogress = 0;
9f3c422c 925 server.bgsavechildpid = -1;
a3b21203 926 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
ed9b544e 927 }
928 } else {
929 /* If there is not a background saving in progress check if
930 * we have to save now */
931 time_t now = time(NULL);
932 for (j = 0; j < server.saveparamslen; j++) {
933 struct saveparam *sp = server.saveparams+j;
934
935 if (server.dirty >= sp->changes &&
936 now-server.lastsave > sp->seconds) {
937 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
938 sp->changes, sp->seconds);
f78fd11b 939 rdbSaveBackground(server.dbfilename);
ed9b544e 940 break;
941 }
942 }
943 }
94754ccc 944
945 /* Try to expire a few timed out keys */
946 for (j = 0; j < server.dbnum; j++) {
947 redisDb *db = server.db+j;
948 int num = dictSize(db->expires);
949
950 if (num) {
951 time_t now = time(NULL);
952
953 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
954 num = REDIS_EXPIRELOOKUPS_PER_CRON;
955 while (num--) {
956 dictEntry *de;
957 time_t t;
958
959 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
960 t = (time_t) dictGetEntryVal(de);
961 if (now > t) {
962 deleteKey(db,dictGetEntryKey(de));
963 }
964 }
965 }
966 }
967
ed9b544e 968 /* Check if we should connect to a MASTER */
969 if (server.replstate == REDIS_REPL_CONNECT) {
970 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
971 if (syncWithMaster() == REDIS_OK) {
972 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
973 }
974 }
975 return 1000;
976}
977
978static void createSharedObjects(void) {
979 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
980 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
981 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 982 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
983 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
984 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
985 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
986 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
987 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 988 /* no such key */
ed9b544e 989 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
990 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
991 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 992 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
993 "-ERR no such key\r\n"));
ed9b544e 994 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
995 "-ERR syntax error\r\n"));
c937aa89 996 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
997 "-ERR source and destination objects are the same\r\n"));
998 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
999 "-ERR index out of range\r\n"));
ed9b544e 1000 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1001 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1002 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1003 shared.select0 = createStringObject("select 0\r\n",10);
1004 shared.select1 = createStringObject("select 1\r\n",10);
1005 shared.select2 = createStringObject("select 2\r\n",10);
1006 shared.select3 = createStringObject("select 3\r\n",10);
1007 shared.select4 = createStringObject("select 4\r\n",10);
1008 shared.select5 = createStringObject("select 5\r\n",10);
1009 shared.select6 = createStringObject("select 6\r\n",10);
1010 shared.select7 = createStringObject("select 7\r\n",10);
1011 shared.select8 = createStringObject("select 8\r\n",10);
1012 shared.select9 = createStringObject("select 9\r\n",10);
1013}
1014
1015static void appendServerSaveParams(time_t seconds, int changes) {
1016 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1017 server.saveparams[server.saveparamslen].seconds = seconds;
1018 server.saveparams[server.saveparamslen].changes = changes;
1019 server.saveparamslen++;
1020}
1021
1022static void ResetServerSaveParams() {
1023 zfree(server.saveparams);
1024 server.saveparams = NULL;
1025 server.saveparamslen = 0;
1026}
1027
1028static void initServerConfig() {
1029 server.dbnum = REDIS_DEFAULT_DBNUM;
1030 server.port = REDIS_SERVERPORT;
1031 server.verbosity = REDIS_DEBUG;
1032 server.maxidletime = REDIS_MAXIDLETIME;
1033 server.saveparams = NULL;
1034 server.logfile = NULL; /* NULL = log on standard output */
1035 server.bindaddr = NULL;
1036 server.glueoutputbuf = 1;
1037 server.daemonize = 0;
44b38ef4 1038 server.appendonly = 0;
48f0308a 1039 server.appendfsync = APPENDFSYNC_EVERYSEC;
1040 server.lastfsync = time(NULL);
44b38ef4 1041 server.appendfd = -1;
1042 server.appendseldb = -1; /* Make sure the first time will not match */
ed329fcf 1043 server.pidfile = "/var/run/redis.pid";
ed9b544e 1044 server.dbfilename = "dump.rdb";
44b38ef4 1045 server.appendfilename = "appendonly.log";
abcb223e 1046 server.requirepass = NULL;
10c43610 1047 server.shareobjects = 0;
21aecf4b 1048 server.sharingpoolsize = 1024;
285add55 1049 server.maxclients = 0;
3fd78bcd 1050 server.maxmemory = 0;
ed9b544e 1051 ResetServerSaveParams();
1052
1053 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1054 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1055 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1056 /* Replication related */
1057 server.isslave = 0;
1058 server.masterhost = NULL;
1059 server.masterport = 6379;
1060 server.master = NULL;
1061 server.replstate = REDIS_REPL_NONE;
a7866db6 1062
1063 /* Double constants initialization */
1064 R_Zero = 0.0;
1065 R_PosInf = 1.0/R_Zero;
1066 R_NegInf = -1.0/R_Zero;
1067 R_Nan = R_Zero/R_Zero;
ed9b544e 1068}
1069
1070static void initServer() {
1071 int j;
1072
1073 signal(SIGHUP, SIG_IGN);
1074 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1075 setupSigSegvAction();
ed9b544e 1076
1077 server.clients = listCreate();
1078 server.slaves = listCreate();
87eca727 1079 server.monitors = listCreate();
ed9b544e 1080 server.objfreelist = listCreate();
1081 createSharedObjects();
1082 server.el = aeCreateEventLoop();
3305306f 1083 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
10c43610 1084 server.sharingpool = dictCreate(&setDictType,NULL);
ed9b544e 1085 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1086 if (server.fd == -1) {
1087 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1088 exit(1);
1089 }
3305306f 1090 for (j = 0; j < server.dbnum; j++) {
1091 server.db[j].dict = dictCreate(&hashDictType,NULL);
1092 server.db[j].expires = dictCreate(&setDictType,NULL);
1093 server.db[j].id = j;
1094 }
ed9b544e 1095 server.cronloops = 0;
1096 server.bgsaveinprogress = 0;
9f3c422c 1097 server.bgsavechildpid = -1;
ed9b544e 1098 server.lastsave = time(NULL);
1099 server.dirty = 0;
1100 server.usedmemory = 0;
1101 server.stat_numcommands = 0;
1102 server.stat_numconnections = 0;
1103 server.stat_starttime = time(NULL);
1104 aeCreateTimeEvent(server.el, 1000, serverCron, NULL, NULL);
44b38ef4 1105
1106 if (server.appendonly) {
1107 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT);
1108 if (server.appendfd == -1) {
1109 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1110 strerror(errno));
1111 exit(1);
1112 }
1113 }
ed9b544e 1114}
1115
1116/* Empty the whole database */
ca37e9cd 1117static long long emptyDb() {
ed9b544e 1118 int j;
ca37e9cd 1119 long long removed = 0;
ed9b544e 1120
3305306f 1121 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1122 removed += dictSize(server.db[j].dict);
3305306f 1123 dictEmpty(server.db[j].dict);
1124 dictEmpty(server.db[j].expires);
1125 }
ca37e9cd 1126 return removed;
ed9b544e 1127}
1128
85dd2f3a 1129static int yesnotoi(char *s) {
1130 if (!strcasecmp(s,"yes")) return 1;
1131 else if (!strcasecmp(s,"no")) return 0;
1132 else return -1;
1133}
1134
ed9b544e 1135/* I agree, this is a very rudimental way to load a configuration...
1136 will improve later if the config gets more complex */
1137static void loadServerConfig(char *filename) {
c9a111ac 1138 FILE *fp;
ed9b544e 1139 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1140 int linenum = 0;
1141 sds line = NULL;
c9a111ac 1142
1143 if (filename[0] == '-' && filename[1] == '\0')
1144 fp = stdin;
1145 else {
1146 if ((fp = fopen(filename,"r")) == NULL) {
1147 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1148 exit(1);
1149 }
ed9b544e 1150 }
c9a111ac 1151
ed9b544e 1152 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1153 sds *argv;
1154 int argc, j;
1155
1156 linenum++;
1157 line = sdsnew(buf);
1158 line = sdstrim(line," \t\r\n");
1159
1160 /* Skip comments and blank lines*/
1161 if (line[0] == '#' || line[0] == '\0') {
1162 sdsfree(line);
1163 continue;
1164 }
1165
1166 /* Split into arguments */
1167 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1168 sdstolower(argv[0]);
1169
1170 /* Execute config directives */
bb0b03a3 1171 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1172 server.maxidletime = atoi(argv[1]);
0150db36 1173 if (server.maxidletime < 0) {
ed9b544e 1174 err = "Invalid timeout value"; goto loaderr;
1175 }
bb0b03a3 1176 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1177 server.port = atoi(argv[1]);
1178 if (server.port < 1 || server.port > 65535) {
1179 err = "Invalid port"; goto loaderr;
1180 }
bb0b03a3 1181 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1182 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1183 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1184 int seconds = atoi(argv[1]);
1185 int changes = atoi(argv[2]);
1186 if (seconds < 1 || changes < 0) {
1187 err = "Invalid save parameters"; goto loaderr;
1188 }
1189 appendServerSaveParams(seconds,changes);
bb0b03a3 1190 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1191 if (chdir(argv[1]) == -1) {
1192 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1193 argv[1], strerror(errno));
1194 exit(1);
1195 }
bb0b03a3 1196 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1197 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
1198 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1199 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1200 else {
1201 err = "Invalid log level. Must be one of debug, notice, warning";
1202 goto loaderr;
1203 }
bb0b03a3 1204 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1205 FILE *logfp;
ed9b544e 1206
1207 server.logfile = zstrdup(argv[1]);
bb0b03a3 1208 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1209 zfree(server.logfile);
1210 server.logfile = NULL;
1211 }
1212 if (server.logfile) {
1213 /* Test if we are able to open the file. The server will not
1214 * be able to abort just for this problem later... */
c9a111ac 1215 logfp = fopen(server.logfile,"a");
1216 if (logfp == NULL) {
ed9b544e 1217 err = sdscatprintf(sdsempty(),
1218 "Can't open the log file: %s", strerror(errno));
1219 goto loaderr;
1220 }
c9a111ac 1221 fclose(logfp);
ed9b544e 1222 }
bb0b03a3 1223 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1224 server.dbnum = atoi(argv[1]);
1225 if (server.dbnum < 1) {
1226 err = "Invalid number of databases"; goto loaderr;
1227 }
285add55 1228 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1229 server.maxclients = atoi(argv[1]);
3fd78bcd 1230 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
d4465900 1231 server.maxmemory = strtoll(argv[1], NULL, 10);
bb0b03a3 1232 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1233 server.masterhost = sdsnew(argv[1]);
1234 server.masterport = atoi(argv[2]);
1235 server.replstate = REDIS_REPL_CONNECT;
bb0b03a3 1236 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1237 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1238 err = "argument must be 'yes' or 'no'"; goto loaderr;
1239 }
bb0b03a3 1240 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
85dd2f3a 1241 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
10c43610 1242 err = "argument must be 'yes' or 'no'"; goto loaderr;
1243 }
e52c65b9 1244 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1245 server.sharingpoolsize = atoi(argv[1]);
1246 if (server.sharingpoolsize < 1) {
1247 err = "invalid object sharing pool size"; goto loaderr;
1248 }
bb0b03a3 1249 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1250 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1251 err = "argument must be 'yes' or 'no'"; goto loaderr;
1252 }
44b38ef4 1253 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1254 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1255 err = "argument must be 'yes' or 'no'"; goto loaderr;
1256 }
48f0308a 1257 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1258 if (strcasecmp(argv[1],"no")) {
1259 server.appendfsync = APPENDFSYNC_NO;
1260 } else if (strcasecmp(argv[1],"always")) {
1261 server.appendfsync = APPENDFSYNC_ALWAYS;
1262 } else if (strcasecmp(argv[1],"everysec")) {
1263 server.appendfsync = APPENDFSYNC_EVERYSEC;
1264 } else {
1265 err = "argument must be 'no', 'always' or 'everysec'";
1266 goto loaderr;
1267 }
bb0b03a3 1268 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
abcb223e 1269 server.requirepass = zstrdup(argv[1]);
bb0b03a3 1270 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
ed329fcf 1271 server.pidfile = zstrdup(argv[1]);
bb0b03a3 1272 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
b8b553c8 1273 server.dbfilename = zstrdup(argv[1]);
ed9b544e 1274 } else {
1275 err = "Bad directive or wrong number of arguments"; goto loaderr;
1276 }
1277 for (j = 0; j < argc; j++)
1278 sdsfree(argv[j]);
1279 zfree(argv);
1280 sdsfree(line);
1281 }
c9a111ac 1282 if (fp != stdin) fclose(fp);
ed9b544e 1283 return;
1284
1285loaderr:
1286 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1287 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1288 fprintf(stderr, ">>> '%s'\n", line);
1289 fprintf(stderr, "%s\n", err);
1290 exit(1);
1291}
1292
1293static void freeClientArgv(redisClient *c) {
1294 int j;
1295
1296 for (j = 0; j < c->argc; j++)
1297 decrRefCount(c->argv[j]);
e8a74421 1298 for (j = 0; j < c->mbargc; j++)
1299 decrRefCount(c->mbargv[j]);
ed9b544e 1300 c->argc = 0;
e8a74421 1301 c->mbargc = 0;
ed9b544e 1302}
1303
1304static void freeClient(redisClient *c) {
1305 listNode *ln;
1306
1307 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1308 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1309 sdsfree(c->querybuf);
1310 listRelease(c->reply);
1311 freeClientArgv(c);
1312 close(c->fd);
1313 ln = listSearchKey(server.clients,c);
1314 assert(ln != NULL);
1315 listDelNode(server.clients,ln);
1316 if (c->flags & REDIS_SLAVE) {
6208b3a7 1317 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1318 close(c->repldbfd);
87eca727 1319 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1320 ln = listSearchKey(l,c);
ed9b544e 1321 assert(ln != NULL);
87eca727 1322 listDelNode(l,ln);
ed9b544e 1323 }
1324 if (c->flags & REDIS_MASTER) {
1325 server.master = NULL;
1326 server.replstate = REDIS_REPL_CONNECT;
1327 }
93ea3759 1328 zfree(c->argv);
e8a74421 1329 zfree(c->mbargv);
ed9b544e 1330 zfree(c);
1331}
1332
1333static void glueReplyBuffersIfNeeded(redisClient *c) {
1334 int totlen = 0;
6208b3a7 1335 listNode *ln;
ed9b544e 1336 robj *o;
1337
6208b3a7 1338 listRewind(c->reply);
1339 while((ln = listYield(c->reply))) {
ed9b544e 1340 o = ln->value;
1341 totlen += sdslen(o->ptr);
ed9b544e 1342 /* This optimization makes more sense if we don't have to copy
1343 * too much data */
1344 if (totlen > 1024) return;
1345 }
1346 if (totlen > 0) {
1347 char buf[1024];
1348 int copylen = 0;
1349
6208b3a7 1350 listRewind(c->reply);
1351 while((ln = listYield(c->reply))) {
ed9b544e 1352 o = ln->value;
1353 memcpy(buf+copylen,o->ptr,sdslen(o->ptr));
1354 copylen += sdslen(o->ptr);
1355 listDelNode(c->reply,ln);
ed9b544e 1356 }
1357 /* Now the output buffer is empty, add the new single element */
6fdc78ac 1358 o = createObject(REDIS_STRING,sdsnewlen(buf,totlen));
6b47e12e 1359 listAddNodeTail(c->reply,o);
ed9b544e 1360 }
1361}
1362
1363static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1364 redisClient *c = privdata;
1365 int nwritten = 0, totwritten = 0, objlen;
1366 robj *o;
1367 REDIS_NOTUSED(el);
1368 REDIS_NOTUSED(mask);
1369
1370 if (server.glueoutputbuf && listLength(c->reply) > 1)
1371 glueReplyBuffersIfNeeded(c);
1372 while(listLength(c->reply)) {
1373 o = listNodeValue(listFirst(c->reply));
1374 objlen = sdslen(o->ptr);
1375
1376 if (objlen == 0) {
1377 listDelNode(c->reply,listFirst(c->reply));
1378 continue;
1379 }
1380
1381 if (c->flags & REDIS_MASTER) {
6f376729 1382 /* Don't reply to a master */
ed9b544e 1383 nwritten = objlen - c->sentlen;
1384 } else {
a4d1ba9a 1385 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 1386 if (nwritten <= 0) break;
1387 }
1388 c->sentlen += nwritten;
1389 totwritten += nwritten;
1390 /* If we fully sent the object on head go to the next one */
1391 if (c->sentlen == objlen) {
1392 listDelNode(c->reply,listFirst(c->reply));
1393 c->sentlen = 0;
1394 }
6f376729 1395 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1396 * bytes, in a single threaded server it's a good idea to server
1397 * other clients as well, even if a very large request comes from
1398 * super fast link that is always able to accept data (in real world
1399 * terms think to 'KEYS *' against the loopback interfae) */
1400 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 1401 }
1402 if (nwritten == -1) {
1403 if (errno == EAGAIN) {
1404 nwritten = 0;
1405 } else {
1406 redisLog(REDIS_DEBUG,
1407 "Error writing to client: %s", strerror(errno));
1408 freeClient(c);
1409 return;
1410 }
1411 }
1412 if (totwritten > 0) c->lastinteraction = time(NULL);
1413 if (listLength(c->reply) == 0) {
1414 c->sentlen = 0;
1415 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1416 }
1417}
1418
1419static struct redisCommand *lookupCommand(char *name) {
1420 int j = 0;
1421 while(cmdTable[j].name != NULL) {
bb0b03a3 1422 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
ed9b544e 1423 j++;
1424 }
1425 return NULL;
1426}
1427
1428/* resetClient prepare the client to process the next command */
1429static void resetClient(redisClient *c) {
1430 freeClientArgv(c);
1431 c->bulklen = -1;
e8a74421 1432 c->multibulk = 0;
ed9b544e 1433}
1434
1435/* If this function gets called we already read a whole
1436 * command, argments are in the client argv/argc fields.
1437 * processCommand() execute the command or prepare the
1438 * server for a bulk read from the client.
1439 *
1440 * If 1 is returned the client is still alive and valid and
1441 * and other operations can be performed by the caller. Otherwise
1442 * if 0 is returned the client was destroied (i.e. after QUIT). */
1443static int processCommand(redisClient *c) {
1444 struct redisCommand *cmd;
1445 long long dirty;
1446
3fd78bcd 1447 /* Free some memory if needed (maxmemory setting) */
1448 if (server.maxmemory) freeMemoryIfNeeded();
1449
e8a74421 1450 /* Handle the multi bulk command type. This is an alternative protocol
1451 * supported by Redis in order to receive commands that are composed of
1452 * multiple binary-safe "bulk" arguments. The latency of processing is
1453 * a bit higher but this allows things like multi-sets, so if this
1454 * protocol is used only for MSET and similar commands this is a big win. */
1455 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1456 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1457 if (c->multibulk <= 0) {
1458 resetClient(c);
1459 return 1;
1460 } else {
1461 decrRefCount(c->argv[c->argc-1]);
1462 c->argc--;
1463 return 1;
1464 }
1465 } else if (c->multibulk) {
1466 if (c->bulklen == -1) {
1467 if (((char*)c->argv[0]->ptr)[0] != '$') {
1468 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1469 resetClient(c);
1470 return 1;
1471 } else {
1472 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1473 decrRefCount(c->argv[0]);
1474 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1475 c->argc--;
1476 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1477 resetClient(c);
1478 return 1;
1479 }
1480 c->argc--;
1481 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1482 return 1;
1483 }
1484 } else {
1485 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1486 c->mbargv[c->mbargc] = c->argv[0];
1487 c->mbargc++;
1488 c->argc--;
1489 c->multibulk--;
1490 if (c->multibulk == 0) {
1491 robj **auxargv;
1492 int auxargc;
1493
1494 /* Here we need to swap the multi-bulk argc/argv with the
1495 * normal argc/argv of the client structure. */
1496 auxargv = c->argv;
1497 c->argv = c->mbargv;
1498 c->mbargv = auxargv;
1499
1500 auxargc = c->argc;
1501 c->argc = c->mbargc;
1502 c->mbargc = auxargc;
1503
1504 /* We need to set bulklen to something different than -1
1505 * in order for the code below to process the command without
1506 * to try to read the last argument of a bulk command as
1507 * a special argument. */
1508 c->bulklen = 0;
1509 /* continue below and process the command */
1510 } else {
1511 c->bulklen = -1;
1512 return 1;
1513 }
1514 }
1515 }
1516 /* -- end of multi bulk commands processing -- */
1517
ed9b544e 1518 /* The QUIT command is handled as a special case. Normal command
1519 * procs are unable to close the client connection safely */
bb0b03a3 1520 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 1521 freeClient(c);
1522 return 0;
1523 }
1524 cmd = lookupCommand(c->argv[0]->ptr);
1525 if (!cmd) {
1526 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1527 resetClient(c);
1528 return 1;
1529 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1530 (c->argc < -cmd->arity)) {
1531 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
1532 resetClient(c);
1533 return 1;
3fd78bcd 1534 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1535 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1536 resetClient(c);
1537 return 1;
ed9b544e 1538 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1539 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1540
1541 decrRefCount(c->argv[c->argc-1]);
1542 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1543 c->argc--;
1544 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1545 resetClient(c);
1546 return 1;
1547 }
1548 c->argc--;
1549 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1550 /* It is possible that the bulk read is already in the
8d0490e7 1551 * buffer. Check this condition and handle it accordingly.
1552 * This is just a fast path, alternative to call processInputBuffer().
1553 * It's a good idea since the code is small and this condition
1554 * happens most of the times. */
ed9b544e 1555 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1556 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1557 c->argc++;
1558 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1559 } else {
1560 return 1;
1561 }
1562 }
10c43610 1563 /* Let's try to share objects on the command arguments vector */
1564 if (server.shareobjects) {
1565 int j;
1566 for(j = 1; j < c->argc; j++)
1567 c->argv[j] = tryObjectSharing(c->argv[j]);
1568 }
942a3961 1569 /* Let's try to encode the bulk object to save space. */
1570 if (cmd->flags & REDIS_CMD_BULK)
1571 tryObjectEncoding(c->argv[c->argc-1]);
1572
e63943a4 1573 /* Check if the user is authenticated */
1574 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1575 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1576 resetClient(c);
1577 return 1;
1578 }
1579
ed9b544e 1580 /* Exec the command */
1581 dirty = server.dirty;
1582 cmd->proc(c);
44b38ef4 1583 if (server.appendonly != 0)
1584 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
ed9b544e 1585 if (server.dirty-dirty != 0 && listLength(server.slaves))
3305306f 1586 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
87eca727 1587 if (listLength(server.monitors))
3305306f 1588 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
ed9b544e 1589 server.stat_numcommands++;
1590
1591 /* Prepare the client for the next command */
1592 if (c->flags & REDIS_CLOSE) {
1593 freeClient(c);
1594 return 0;
1595 }
1596 resetClient(c);
1597 return 1;
1598}
1599
87eca727 1600static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6208b3a7 1601 listNode *ln;
ed9b544e 1602 int outc = 0, j;
93ea3759 1603 robj **outv;
1604 /* (args*2)+1 is enough room for args, spaces, newlines */
1605 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1606
1607 if (argc <= REDIS_STATIC_ARGS) {
1608 outv = static_outv;
1609 } else {
1610 outv = zmalloc(sizeof(robj*)*(argc*2+1));
93ea3759 1611 }
ed9b544e 1612
1613 for (j = 0; j < argc; j++) {
1614 if (j != 0) outv[outc++] = shared.space;
1615 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1616 robj *lenobj;
1617
1618 lenobj = createObject(REDIS_STRING,
0ea663ea 1619 sdscatprintf(sdsempty(),"%d\r\n",
1620 stringObjectLen(argv[j])));
ed9b544e 1621 lenobj->refcount = 0;
1622 outv[outc++] = lenobj;
1623 }
1624 outv[outc++] = argv[j];
1625 }
1626 outv[outc++] = shared.crlf;
1627
40d224a9 1628 /* Increment all the refcounts at start and decrement at end in order to
1629 * be sure to free objects if there is no slave in a replication state
1630 * able to be feed with commands */
1631 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
6208b3a7 1632 listRewind(slaves);
1633 while((ln = listYield(slaves))) {
ed9b544e 1634 redisClient *slave = ln->value;
40d224a9 1635
1636 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 1637 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 1638
1639 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 1640 if (slave->slaveseldb != dictid) {
1641 robj *selectcmd;
1642
1643 switch(dictid) {
1644 case 0: selectcmd = shared.select0; break;
1645 case 1: selectcmd = shared.select1; break;
1646 case 2: selectcmd = shared.select2; break;
1647 case 3: selectcmd = shared.select3; break;
1648 case 4: selectcmd = shared.select4; break;
1649 case 5: selectcmd = shared.select5; break;
1650 case 6: selectcmd = shared.select6; break;
1651 case 7: selectcmd = shared.select7; break;
1652 case 8: selectcmd = shared.select8; break;
1653 case 9: selectcmd = shared.select9; break;
1654 default:
1655 selectcmd = createObject(REDIS_STRING,
1656 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1657 selectcmd->refcount = 0;
1658 break;
1659 }
1660 addReply(slave,selectcmd);
1661 slave->slaveseldb = dictid;
1662 }
1663 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 1664 }
40d224a9 1665 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 1666 if (outv != static_outv) zfree(outv);
ed9b544e 1667}
1668
44b38ef4 1669/* TODO: translate EXPIREs into EXPIRETOs */
1670static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
1671 sds buf = sdsempty();
1672 int j;
1673 ssize_t nwritten;
48f0308a 1674 time_t now;
44b38ef4 1675
1676 /* The DB this command was targetting is not the same as the last command
1677 * we appendend. To issue a SELECT command is needed. */
1678 if (dictid != server.appendseldb) {
1679 char seldb[64];
1680
1681 snprintf(seldb,sizeof(seldb),"%d",dictid);
1682 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
1683 strlen(seldb),seldb);
16f92547 1684 server.appendseldb = dictid;
44b38ef4 1685 }
1686 /* Append the actual command */
1687 buf = sdscatprintf(buf,"*%d\r\n",argc);
1688 for (j = 0; j < argc; j++) {
1689 robj *o = argv[j];
1690
1691 if (o->encoding != REDIS_ENCODING_RAW)
1692 o = getDecodedObject(o);
1693 buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr));
1694 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
1695 buf = sdscatlen(buf,"\r\n",2);
1696 if (o != argv[j])
1697 decrRefCount(o);
1698 }
1699 /* We want to perform a single write. This should be guaranteed atomic
1700 * at least if the filesystem we are writing is a real physical one.
1701 * While this will save us against the server being killed I don't think
1702 * there is much to do about the whole server stopping for power problems
1703 * or alike */
1704 nwritten = write(server.appendfd,buf,sdslen(buf));
1705 if (nwritten != (unsigned)sdslen(buf)) {
1706 /* Ooops, we are in troubles. The best thing to do for now is
1707 * to simply exit instead to give the illusion that everything is
1708 * working as expected. */
1709 if (nwritten == -1) {
1710 redisLog(REDIS_WARNING,"Aborting on error writing to the append-only file: %s",strerror(errno));
1711 } else {
1712 redisLog(REDIS_WARNING,"Aborting on short write while writing to the append-only file: %s",strerror(errno));
1713 }
1714 abort();
48f0308a 1715 }
1716 now = time(NULL);
1717 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
1718 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
1719 now-server.lastfsync > 1))
1720 {
1721 fsync(server.appendfd); /* Let's try to get this data on the disk */
1722 server.lastfsync = now;
1723 }
44b38ef4 1724}
1725
638e42ac 1726static void processInputBuffer(redisClient *c) {
ed9b544e 1727again:
1728 if (c->bulklen == -1) {
1729 /* Read the first line of the query */
1730 char *p = strchr(c->querybuf,'\n');
1731 size_t querylen;
644fafa3 1732
ed9b544e 1733 if (p) {
1734 sds query, *argv;
1735 int argc, j;
1736
1737 query = c->querybuf;
1738 c->querybuf = sdsempty();
1739 querylen = 1+(p-(query));
1740 if (sdslen(query) > querylen) {
1741 /* leave data after the first line of the query in the buffer */
1742 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1743 }
1744 *p = '\0'; /* remove "\n" */
1745 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1746 sdsupdatelen(query);
1747
1748 /* Now we can split the query in arguments */
1749 if (sdslen(query) == 0) {
1750 /* Ignore empty query */
1751 sdsfree(query);
1752 return;
1753 }
1754 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 1755 sdsfree(query);
1756
1757 if (c->argv) zfree(c->argv);
1758 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 1759
1760 for (j = 0; j < argc; j++) {
ed9b544e 1761 if (sdslen(argv[j])) {
1762 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1763 c->argc++;
1764 } else {
1765 sdsfree(argv[j]);
1766 }
1767 }
1768 zfree(argv);
1769 /* Execute the command. If the client is still valid
1770 * after processCommand() return and there is something
1771 * on the query buffer try to process the next command. */
af807d87 1772 if (c->argc && processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1773 return;
644fafa3 1774 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
ed9b544e 1775 redisLog(REDIS_DEBUG, "Client protocol error");
1776 freeClient(c);
1777 return;
1778 }
1779 } else {
1780 /* Bulk read handling. Note that if we are at this point
1781 the client already sent a command terminated with a newline,
1782 we are reading the bulk data that is actually the last
1783 argument of the command. */
1784 int qbl = sdslen(c->querybuf);
1785
1786 if (c->bulklen <= qbl) {
1787 /* Copy everything but the final CRLF as final argument */
1788 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1789 c->argc++;
1790 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 1791 /* Process the command. If the client is still valid after
1792 * the processing and there is more data in the buffer
1793 * try to parse it. */
1794 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1795 return;
1796 }
1797 }
1798}
1799
638e42ac 1800static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1801 redisClient *c = (redisClient*) privdata;
1802 char buf[REDIS_IOBUF_LEN];
1803 int nread;
1804 REDIS_NOTUSED(el);
1805 REDIS_NOTUSED(mask);
1806
1807 nread = read(fd, buf, REDIS_IOBUF_LEN);
1808 if (nread == -1) {
1809 if (errno == EAGAIN) {
1810 nread = 0;
1811 } else {
1812 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1813 freeClient(c);
1814 return;
1815 }
1816 } else if (nread == 0) {
1817 redisLog(REDIS_DEBUG, "Client closed connection");
1818 freeClient(c);
1819 return;
1820 }
1821 if (nread) {
1822 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1823 c->lastinteraction = time(NULL);
1824 } else {
1825 return;
1826 }
1827 processInputBuffer(c);
1828}
1829
ed9b544e 1830static int selectDb(redisClient *c, int id) {
1831 if (id < 0 || id >= server.dbnum)
1832 return REDIS_ERR;
3305306f 1833 c->db = &server.db[id];
ed9b544e 1834 return REDIS_OK;
1835}
1836
40d224a9 1837static void *dupClientReplyValue(void *o) {
1838 incrRefCount((robj*)o);
1839 return 0;
1840}
1841
ed9b544e 1842static redisClient *createClient(int fd) {
1843 redisClient *c = zmalloc(sizeof(*c));
1844
1845 anetNonBlock(NULL,fd);
1846 anetTcpNoDelay(NULL,fd);
1847 if (!c) return NULL;
1848 selectDb(c,0);
1849 c->fd = fd;
1850 c->querybuf = sdsempty();
1851 c->argc = 0;
93ea3759 1852 c->argv = NULL;
ed9b544e 1853 c->bulklen = -1;
e8a74421 1854 c->multibulk = 0;
1855 c->mbargc = 0;
1856 c->mbargv = NULL;
ed9b544e 1857 c->sentlen = 0;
1858 c->flags = 0;
1859 c->lastinteraction = time(NULL);
abcb223e 1860 c->authenticated = 0;
40d224a9 1861 c->replstate = REDIS_REPL_NONE;
6b47e12e 1862 c->reply = listCreate();
ed9b544e 1863 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 1864 listSetDupMethod(c->reply,dupClientReplyValue);
ed9b544e 1865 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1866 readQueryFromClient, c, NULL) == AE_ERR) {
1867 freeClient(c);
1868 return NULL;
1869 }
6b47e12e 1870 listAddNodeTail(server.clients,c);
ed9b544e 1871 return c;
1872}
1873
1874static void addReply(redisClient *c, robj *obj) {
1875 if (listLength(c->reply) == 0 &&
6208b3a7 1876 (c->replstate == REDIS_REPL_NONE ||
1877 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 1878 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
1879 sendReplyToClient, c, NULL) == AE_ERR) return;
942a3961 1880 if (obj->encoding != REDIS_ENCODING_RAW) {
1881 obj = getDecodedObject(obj);
1882 } else {
1883 incrRefCount(obj);
1884 }
6b47e12e 1885 listAddNodeTail(c->reply,obj);
ed9b544e 1886}
1887
1888static void addReplySds(redisClient *c, sds s) {
1889 robj *o = createObject(REDIS_STRING,s);
1890 addReply(c,o);
1891 decrRefCount(o);
1892}
1893
942a3961 1894static void addReplyBulkLen(redisClient *c, robj *obj) {
1895 size_t len;
1896
1897 if (obj->encoding == REDIS_ENCODING_RAW) {
1898 len = sdslen(obj->ptr);
1899 } else {
1900 long n = (long)obj->ptr;
1901
1902 len = 1;
1903 if (n < 0) {
1904 len++;
1905 n = -n;
1906 }
1907 while((n = n/10) != 0) {
1908 len++;
1909 }
1910 }
1911 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",len));
1912}
1913
ed9b544e 1914static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
1915 int cport, cfd;
1916 char cip[128];
285add55 1917 redisClient *c;
ed9b544e 1918 REDIS_NOTUSED(el);
1919 REDIS_NOTUSED(mask);
1920 REDIS_NOTUSED(privdata);
1921
1922 cfd = anetAccept(server.neterr, fd, cip, &cport);
1923 if (cfd == AE_ERR) {
1924 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
1925 return;
1926 }
1927 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
285add55 1928 if ((c = createClient(cfd)) == NULL) {
ed9b544e 1929 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
1930 close(cfd); /* May be already closed, just ingore errors */
1931 return;
1932 }
285add55 1933 /* If maxclient directive is set and this is one client more... close the
1934 * connection. Note that we create the client instead to check before
1935 * for this condition, since now the socket is already set in nonblocking
1936 * mode and we can send an error for free using the Kernel I/O */
1937 if (server.maxclients && listLength(server.clients) > server.maxclients) {
1938 char *err = "-ERR max number of clients reached\r\n";
1939
1940 /* That's a best effort error message, don't check write errors */
d7fc9edb 1941 (void) write(c->fd,err,strlen(err));
285add55 1942 freeClient(c);
1943 return;
1944 }
ed9b544e 1945 server.stat_numconnections++;
1946}
1947
1948/* ======================= Redis objects implementation ===================== */
1949
1950static robj *createObject(int type, void *ptr) {
1951 robj *o;
1952
1953 if (listLength(server.objfreelist)) {
1954 listNode *head = listFirst(server.objfreelist);
1955 o = listNodeValue(head);
1956 listDelNode(server.objfreelist,head);
1957 } else {
1958 o = zmalloc(sizeof(*o));
1959 }
ed9b544e 1960 o->type = type;
942a3961 1961 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 1962 o->ptr = ptr;
1963 o->refcount = 1;
1964 return o;
1965}
1966
1967static robj *createStringObject(char *ptr, size_t len) {
1968 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
1969}
1970
1971static robj *createListObject(void) {
1972 list *l = listCreate();
1973
ed9b544e 1974 listSetFreeMethod(l,decrRefCount);
1975 return createObject(REDIS_LIST,l);
1976}
1977
1978static robj *createSetObject(void) {
1979 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 1980 return createObject(REDIS_SET,d);
1981}
1982
1812e024 1983static robj *createZsetObject(void) {
6b47e12e 1984 zset *zs = zmalloc(sizeof(*zs));
1985
1986 zs->dict = dictCreate(&zsetDictType,NULL);
1987 zs->zsl = zslCreate();
1988 return createObject(REDIS_ZSET,zs);
1812e024 1989}
1990
ed9b544e 1991static void freeStringObject(robj *o) {
942a3961 1992 if (o->encoding == REDIS_ENCODING_RAW) {
1993 sdsfree(o->ptr);
1994 }
ed9b544e 1995}
1996
1997static void freeListObject(robj *o) {
1998 listRelease((list*) o->ptr);
1999}
2000
2001static void freeSetObject(robj *o) {
2002 dictRelease((dict*) o->ptr);
2003}
2004
fd8ccf44 2005static void freeZsetObject(robj *o) {
2006 zset *zs = o->ptr;
2007
2008 dictRelease(zs->dict);
2009 zslFree(zs->zsl);
2010 zfree(zs);
2011}
2012
ed9b544e 2013static void freeHashObject(robj *o) {
2014 dictRelease((dict*) o->ptr);
2015}
2016
2017static void incrRefCount(robj *o) {
2018 o->refcount++;
94754ccc 2019#ifdef DEBUG_REFCOUNT
2020 if (o->type == REDIS_STRING)
2021 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
2022#endif
ed9b544e 2023}
2024
2025static void decrRefCount(void *obj) {
2026 robj *o = obj;
94754ccc 2027
2028#ifdef DEBUG_REFCOUNT
2029 if (o->type == REDIS_STRING)
2030 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
2031#endif
ed9b544e 2032 if (--(o->refcount) == 0) {
2033 switch(o->type) {
2034 case REDIS_STRING: freeStringObject(o); break;
2035 case REDIS_LIST: freeListObject(o); break;
2036 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 2037 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 2038 case REDIS_HASH: freeHashObject(o); break;
2039 default: assert(0 != 0); break;
2040 }
2041 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2042 !listAddNodeHead(server.objfreelist,o))
2043 zfree(o);
2044 }
2045}
2046
942a3961 2047static robj *lookupKey(redisDb *db, robj *key) {
2048 dictEntry *de = dictFind(db->dict,key);
2049 return de ? dictGetEntryVal(de) : NULL;
2050}
2051
2052static robj *lookupKeyRead(redisDb *db, robj *key) {
2053 expireIfNeeded(db,key);
2054 return lookupKey(db,key);
2055}
2056
2057static robj *lookupKeyWrite(redisDb *db, robj *key) {
2058 deleteIfVolatile(db,key);
2059 return lookupKey(db,key);
2060}
2061
2062static int deleteKey(redisDb *db, robj *key) {
2063 int retval;
2064
2065 /* We need to protect key from destruction: after the first dictDelete()
2066 * it may happen that 'key' is no longer valid if we don't increment
2067 * it's count. This may happen when we get the object reference directly
2068 * from the hash table with dictRandomKey() or dict iterators */
2069 incrRefCount(key);
2070 if (dictSize(db->expires)) dictDelete(db->expires,key);
2071 retval = dictDelete(db->dict,key);
2072 decrRefCount(key);
2073
2074 return retval == DICT_OK;
2075}
2076
10c43610 2077/* Try to share an object against the shared objects pool */
2078static robj *tryObjectSharing(robj *o) {
2079 struct dictEntry *de;
2080 unsigned long c;
2081
3305306f 2082 if (o == NULL || server.shareobjects == 0) return o;
10c43610 2083
2084 assert(o->type == REDIS_STRING);
2085 de = dictFind(server.sharingpool,o);
2086 if (de) {
2087 robj *shared = dictGetEntryKey(de);
2088
2089 c = ((unsigned long) dictGetEntryVal(de))+1;
2090 dictGetEntryVal(de) = (void*) c;
2091 incrRefCount(shared);
2092 decrRefCount(o);
2093 return shared;
2094 } else {
2095 /* Here we are using a stream algorihtm: Every time an object is
2096 * shared we increment its count, everytime there is a miss we
2097 * recrement the counter of a random object. If this object reaches
2098 * zero we remove the object and put the current object instead. */
3305306f 2099 if (dictSize(server.sharingpool) >=
10c43610 2100 server.sharingpoolsize) {
2101 de = dictGetRandomKey(server.sharingpool);
2102 assert(de != NULL);
2103 c = ((unsigned long) dictGetEntryVal(de))-1;
2104 dictGetEntryVal(de) = (void*) c;
2105 if (c == 0) {
2106 dictDelete(server.sharingpool,de->key);
2107 }
2108 } else {
2109 c = 0; /* If the pool is empty we want to add this object */
2110 }
2111 if (c == 0) {
2112 int retval;
2113
2114 retval = dictAdd(server.sharingpool,o,(void*)1);
2115 assert(retval == DICT_OK);
2116 incrRefCount(o);
2117 }
2118 return o;
2119 }
2120}
2121
724a51b1 2122/* Check if the nul-terminated string 's' can be represented by a long
2123 * (that is, is a number that fits into long without any other space or
2124 * character before or after the digits).
2125 *
2126 * If so, the function returns REDIS_OK and *longval is set to the value
2127 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 2128static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 2129 char buf[32], *endptr;
2130 long value;
2131 int slen;
2132
2133 value = strtol(s, &endptr, 10);
2134 if (endptr[0] != '\0') return REDIS_ERR;
2135 slen = snprintf(buf,32,"%ld",value);
2136
2137 /* If the number converted back into a string is not identical
2138 * then it's not possible to encode the string as integer */
f69f2cba 2139 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 2140 if (longval) *longval = value;
2141 return REDIS_OK;
2142}
2143
942a3961 2144/* Try to encode a string object in order to save space */
2145static int tryObjectEncoding(robj *o) {
2146 long value;
942a3961 2147 sds s = o->ptr;
3305306f 2148
942a3961 2149 if (o->encoding != REDIS_ENCODING_RAW)
2150 return REDIS_ERR; /* Already encoded */
3305306f 2151
942a3961 2152 /* It's not save to encode shared objects: shared objects can be shared
2153 * everywhere in the "object space" of Redis. Encoded objects can only
2154 * appear as "values" (and not, for instance, as keys) */
2155 if (o->refcount > 1) return REDIS_ERR;
3305306f 2156
942a3961 2157 /* Currently we try to encode only strings */
2158 assert(o->type == REDIS_STRING);
94754ccc 2159
724a51b1 2160 /* Check if we can represent this string as a long integer */
2161 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
942a3961 2162
2163 /* Ok, this object can be encoded */
2164 o->encoding = REDIS_ENCODING_INT;
2165 sdsfree(o->ptr);
2166 o->ptr = (void*) value;
2167 return REDIS_OK;
2168}
2169
2170/* Get a decoded version of an encoded object (returned as a new object) */
2171static robj *getDecodedObject(const robj *o) {
2172 robj *dec;
2173
2174 assert(o->encoding != REDIS_ENCODING_RAW);
2175 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2176 char buf[32];
2177
2178 snprintf(buf,32,"%ld",(long)o->ptr);
2179 dec = createStringObject(buf,strlen(buf));
2180 return dec;
2181 } else {
2182 assert(1 != 1);
2183 }
3305306f 2184}
2185
d7f43c08 2186/* Compare two string objects via strcmp() or alike.
2187 * Note that the objects may be integer-encoded. In such a case we
2188 * use snprintf() to get a string representation of the numbers on the stack
2189 * and compare the strings, it's much faster than calling getDecodedObject(). */
724a51b1 2190static int compareStringObjects(robj *a, robj *b) {
2191 assert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 2192 char bufa[128], bufb[128], *astr, *bstr;
2193 int bothsds = 1;
724a51b1 2194
e197b441 2195 if (a == b) return 0;
d7f43c08 2196 if (a->encoding != REDIS_ENCODING_RAW) {
2197 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2198 astr = bufa;
2199 bothsds = 0;
724a51b1 2200 } else {
d7f43c08 2201 astr = a->ptr;
724a51b1 2202 }
d7f43c08 2203 if (b->encoding != REDIS_ENCODING_RAW) {
2204 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2205 bstr = bufb;
2206 bothsds = 0;
2207 } else {
2208 bstr = b->ptr;
2209 }
2210 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 2211}
2212
0ea663ea 2213static size_t stringObjectLen(robj *o) {
2214 assert(o->type == REDIS_STRING);
2215 if (o->encoding == REDIS_ENCODING_RAW) {
2216 return sdslen(o->ptr);
2217 } else {
2218 char buf[32];
2219
2220 return snprintf(buf,32,"%ld",(long)o->ptr);
2221 }
2222}
2223
ed9b544e 2224/*============================ DB saving/loading ============================ */
2225
f78fd11b 2226static int rdbSaveType(FILE *fp, unsigned char type) {
2227 if (fwrite(&type,1,1,fp) == 0) return -1;
2228 return 0;
2229}
2230
bb32ede5 2231static int rdbSaveTime(FILE *fp, time_t t) {
2232 int32_t t32 = (int32_t) t;
2233 if (fwrite(&t32,4,1,fp) == 0) return -1;
2234 return 0;
2235}
2236
e3566d4b 2237/* check rdbLoadLen() comments for more info */
f78fd11b 2238static int rdbSaveLen(FILE *fp, uint32_t len) {
2239 unsigned char buf[2];
2240
2241 if (len < (1<<6)) {
2242 /* Save a 6 bit len */
10c43610 2243 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 2244 if (fwrite(buf,1,1,fp) == 0) return -1;
2245 } else if (len < (1<<14)) {
2246 /* Save a 14 bit len */
10c43610 2247 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 2248 buf[1] = len&0xFF;
17be1a4a 2249 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 2250 } else {
2251 /* Save a 32 bit len */
10c43610 2252 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 2253 if (fwrite(buf,1,1,fp) == 0) return -1;
2254 len = htonl(len);
2255 if (fwrite(&len,4,1,fp) == 0) return -1;
2256 }
2257 return 0;
2258}
2259
e3566d4b 2260/* String objects in the form "2391" "-100" without any space and with a
2261 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2262 * encoded as integers to save space */
56906eef 2263static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
e3566d4b 2264 long long value;
2265 char *endptr, buf[32];
2266
2267 /* Check if it's possible to encode this value as a number */
2268 value = strtoll(s, &endptr, 10);
2269 if (endptr[0] != '\0') return 0;
2270 snprintf(buf,32,"%lld",value);
2271
2272 /* If the number converted back into a string is not identical
2273 * then it's not possible to encode the string as integer */
2274 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2275
2276 /* Finally check if it fits in our ranges */
2277 if (value >= -(1<<7) && value <= (1<<7)-1) {
2278 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2279 enc[1] = value&0xFF;
2280 return 2;
2281 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2282 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2283 enc[1] = value&0xFF;
2284 enc[2] = (value>>8)&0xFF;
2285 return 3;
2286 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2287 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2288 enc[1] = value&0xFF;
2289 enc[2] = (value>>8)&0xFF;
2290 enc[3] = (value>>16)&0xFF;
2291 enc[4] = (value>>24)&0xFF;
2292 return 5;
2293 } else {
2294 return 0;
2295 }
2296}
2297
774e3047 2298static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2299 unsigned int comprlen, outlen;
2300 unsigned char byte;
2301 void *out;
2302
2303 /* We require at least four bytes compression for this to be worth it */
2304 outlen = sdslen(obj->ptr)-4;
2305 if (outlen <= 0) return 0;
3a2694c4 2306 if ((out = zmalloc(outlen+1)) == NULL) return 0;
774e3047 2307 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2308 if (comprlen == 0) {
88e85998 2309 zfree(out);
774e3047 2310 return 0;
2311 }
2312 /* Data compressed! Let's save it on disk */
2313 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2314 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2315 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2316 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2317 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 2318 zfree(out);
774e3047 2319 return comprlen;
2320
2321writeerr:
88e85998 2322 zfree(out);
774e3047 2323 return -1;
2324}
2325
e3566d4b 2326/* Save a string objet as [len][data] on disk. If the object is a string
2327 * representation of an integer value we try to safe it in a special form */
942a3961 2328static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2329 size_t len;
e3566d4b 2330 int enclen;
10c43610 2331
942a3961 2332 len = sdslen(obj->ptr);
2333
774e3047 2334 /* Try integer encoding */
e3566d4b 2335 if (len <= 11) {
2336 unsigned char buf[5];
2337 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2338 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2339 return 0;
2340 }
2341 }
774e3047 2342
2343 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 2344 * aaaaaaaaaaaaaaaaaa so skip it */
942a3961 2345 if (len > 20) {
774e3047 2346 int retval;
2347
2348 retval = rdbSaveLzfStringObject(fp,obj);
2349 if (retval == -1) return -1;
2350 if (retval > 0) return 0;
2351 /* retval == 0 means data can't be compressed, save the old way */
2352 }
2353
2354 /* Store verbatim */
10c43610 2355 if (rdbSaveLen(fp,len) == -1) return -1;
2356 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2357 return 0;
2358}
2359
942a3961 2360/* Like rdbSaveStringObjectRaw() but handle encoded objects */
2361static int rdbSaveStringObject(FILE *fp, robj *obj) {
2362 int retval;
2363 robj *dec;
2364
2365 if (obj->encoding != REDIS_ENCODING_RAW) {
2366 dec = getDecodedObject(obj);
2367 retval = rdbSaveStringObjectRaw(fp,dec);
2368 decrRefCount(dec);
2369 return retval;
2370 } else {
2371 return rdbSaveStringObjectRaw(fp,obj);
2372 }
2373}
2374
a7866db6 2375/* Save a double value. Doubles are saved as strings prefixed by an unsigned
2376 * 8 bit integer specifing the length of the representation.
2377 * This 8 bit integer has special values in order to specify the following
2378 * conditions:
2379 * 253: not a number
2380 * 254: + inf
2381 * 255: - inf
2382 */
2383static int rdbSaveDoubleValue(FILE *fp, double val) {
2384 unsigned char buf[128];
2385 int len;
2386
2387 if (isnan(val)) {
2388 buf[0] = 253;
2389 len = 1;
2390 } else if (!isfinite(val)) {
2391 len = 1;
2392 buf[0] = (val < 0) ? 255 : 254;
2393 } else {
2394 snprintf((char*)buf+1,sizeof(buf)-1,"%.16g",val);
2395 buf[0] = strlen((char*)buf);
2396 len = buf[0]+1;
2397 }
2398 if (fwrite(buf,len,1,fp) == 0) return -1;
2399 return 0;
2400}
2401
ed9b544e 2402/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 2403static int rdbSave(char *filename) {
ed9b544e 2404 dictIterator *di = NULL;
2405 dictEntry *de;
ed9b544e 2406 FILE *fp;
2407 char tmpfile[256];
2408 int j;
bb32ede5 2409 time_t now = time(NULL);
ed9b544e 2410
a3b21203 2411 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 2412 fp = fopen(tmpfile,"w");
2413 if (!fp) {
2414 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2415 return REDIS_ERR;
2416 }
f78fd11b 2417 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 2418 for (j = 0; j < server.dbnum; j++) {
bb32ede5 2419 redisDb *db = server.db+j;
2420 dict *d = db->dict;
3305306f 2421 if (dictSize(d) == 0) continue;
ed9b544e 2422 di = dictGetIterator(d);
2423 if (!di) {
2424 fclose(fp);
2425 return REDIS_ERR;
2426 }
2427
2428 /* Write the SELECT DB opcode */
f78fd11b 2429 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2430 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 2431
2432 /* Iterate this DB writing every entry */
2433 while((de = dictNext(di)) != NULL) {
2434 robj *key = dictGetEntryKey(de);
2435 robj *o = dictGetEntryVal(de);
bb32ede5 2436 time_t expiretime = getExpire(db,key);
2437
2438 /* Save the expire time */
2439 if (expiretime != -1) {
2440 /* If this key is already expired skip it */
2441 if (expiretime < now) continue;
2442 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2443 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2444 }
2445 /* Save the key and associated value */
f78fd11b 2446 if (rdbSaveType(fp,o->type) == -1) goto werr;
10c43610 2447 if (rdbSaveStringObject(fp,key) == -1) goto werr;
f78fd11b 2448 if (o->type == REDIS_STRING) {
ed9b544e 2449 /* Save a string value */
10c43610 2450 if (rdbSaveStringObject(fp,o) == -1) goto werr;
f78fd11b 2451 } else if (o->type == REDIS_LIST) {
ed9b544e 2452 /* Save a list value */
2453 list *list = o->ptr;
6208b3a7 2454 listNode *ln;
ed9b544e 2455
6208b3a7 2456 listRewind(list);
f78fd11b 2457 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
6208b3a7 2458 while((ln = listYield(list))) {
ed9b544e 2459 robj *eleobj = listNodeValue(ln);
f78fd11b 2460
10c43610 2461 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2462 }
f78fd11b 2463 } else if (o->type == REDIS_SET) {
ed9b544e 2464 /* Save a set value */
2465 dict *set = o->ptr;
2466 dictIterator *di = dictGetIterator(set);
2467 dictEntry *de;
2468
3305306f 2469 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
ed9b544e 2470 while((de = dictNext(di)) != NULL) {
10c43610 2471 robj *eleobj = dictGetEntryKey(de);
ed9b544e 2472
10c43610 2473 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2474 }
2475 dictReleaseIterator(di);
2b59cfdf 2476 } else if (o->type == REDIS_ZSET) {
2477 /* Save a set value */
2478 zset *zs = o->ptr;
2479 dictIterator *di = dictGetIterator(zs->dict);
2480 dictEntry *de;
2481
2482 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
2483 while((de = dictNext(di)) != NULL) {
2484 robj *eleobj = dictGetEntryKey(de);
2485 double *score = dictGetEntryVal(de);
2486
2487 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2488 if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
2489 }
2490 dictReleaseIterator(di);
ed9b544e 2491 } else {
2492 assert(0 != 0);
2493 }
2494 }
2495 dictReleaseIterator(di);
2496 }
2497 /* EOF opcode */
f78fd11b 2498 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2499
2500 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 2501 fflush(fp);
2502 fsync(fileno(fp));
2503 fclose(fp);
2504
2505 /* Use RENAME to make sure the DB file is changed atomically only
2506 * if the generate DB file is ok. */
2507 if (rename(tmpfile,filename) == -1) {
325d1eb4 2508 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 2509 unlink(tmpfile);
2510 return REDIS_ERR;
2511 }
2512 redisLog(REDIS_NOTICE,"DB saved on disk");
2513 server.dirty = 0;
2514 server.lastsave = time(NULL);
2515 return REDIS_OK;
2516
2517werr:
2518 fclose(fp);
2519 unlink(tmpfile);
2520 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2521 if (di) dictReleaseIterator(di);
2522 return REDIS_ERR;
2523}
2524
f78fd11b 2525static int rdbSaveBackground(char *filename) {
ed9b544e 2526 pid_t childpid;
2527
2528 if (server.bgsaveinprogress) return REDIS_ERR;
2529 if ((childpid = fork()) == 0) {
2530 /* Child */
2531 close(server.fd);
f78fd11b 2532 if (rdbSave(filename) == REDIS_OK) {
ed9b544e 2533 exit(0);
2534 } else {
2535 exit(1);
2536 }
2537 } else {
2538 /* Parent */
5a7c647e 2539 if (childpid == -1) {
2540 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2541 strerror(errno));
2542 return REDIS_ERR;
2543 }
ed9b544e 2544 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
2545 server.bgsaveinprogress = 1;
9f3c422c 2546 server.bgsavechildpid = childpid;
ed9b544e 2547 return REDIS_OK;
2548 }
2549 return REDIS_OK; /* unreached */
2550}
2551
a3b21203 2552static void rdbRemoveTempFile(pid_t childpid) {
2553 char tmpfile[256];
2554
2555 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2556 unlink(tmpfile);
2557}
2558
f78fd11b 2559static int rdbLoadType(FILE *fp) {
2560 unsigned char type;
7b45bfb2 2561 if (fread(&type,1,1,fp) == 0) return -1;
2562 return type;
2563}
2564
bb32ede5 2565static time_t rdbLoadTime(FILE *fp) {
2566 int32_t t32;
2567 if (fread(&t32,4,1,fp) == 0) return -1;
2568 return (time_t) t32;
2569}
2570
e3566d4b 2571/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
2572 * of this file for a description of how this are stored on disk.
2573 *
2574 * isencoded is set to 1 if the readed length is not actually a length but
2575 * an "encoding type", check the above comments for more info */
2576static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
f78fd11b 2577 unsigned char buf[2];
2578 uint32_t len;
2579
e3566d4b 2580 if (isencoded) *isencoded = 0;
f78fd11b 2581 if (rdbver == 0) {
2582 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2583 return ntohl(len);
2584 } else {
17be1a4a 2585 int type;
2586
f78fd11b 2587 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
17be1a4a 2588 type = (buf[0]&0xC0)>>6;
2589 if (type == REDIS_RDB_6BITLEN) {
f78fd11b 2590 /* Read a 6 bit len */
e3566d4b 2591 return buf[0]&0x3F;
2592 } else if (type == REDIS_RDB_ENCVAL) {
2593 /* Read a 6 bit len encoding type */
2594 if (isencoded) *isencoded = 1;
2595 return buf[0]&0x3F;
17be1a4a 2596 } else if (type == REDIS_RDB_14BITLEN) {
f78fd11b 2597 /* Read a 14 bit len */
2598 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
2599 return ((buf[0]&0x3F)<<8)|buf[1];
2600 } else {
2601 /* Read a 32 bit len */
2602 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2603 return ntohl(len);
2604 }
2605 }
f78fd11b 2606}
2607
e3566d4b 2608static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
2609 unsigned char enc[4];
2610 long long val;
2611
2612 if (enctype == REDIS_RDB_ENC_INT8) {
2613 if (fread(enc,1,1,fp) == 0) return NULL;
2614 val = (signed char)enc[0];
2615 } else if (enctype == REDIS_RDB_ENC_INT16) {
2616 uint16_t v;
2617 if (fread(enc,2,1,fp) == 0) return NULL;
2618 v = enc[0]|(enc[1]<<8);
2619 val = (int16_t)v;
2620 } else if (enctype == REDIS_RDB_ENC_INT32) {
2621 uint32_t v;
2622 if (fread(enc,4,1,fp) == 0) return NULL;
2623 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2624 val = (int32_t)v;
2625 } else {
2626 val = 0; /* anti-warning */
2627 assert(0!=0);
2628 }
2629 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2630}
2631
88e85998 2632static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2633 unsigned int len, clen;
2634 unsigned char *c = NULL;
2635 sds val = NULL;
2636
2637 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2638 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2639 if ((c = zmalloc(clen)) == NULL) goto err;
2640 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2641 if (fread(c,clen,1,fp) == 0) goto err;
2642 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 2643 zfree(c);
88e85998 2644 return createObject(REDIS_STRING,val);
2645err:
2646 zfree(c);
2647 sdsfree(val);
2648 return NULL;
2649}
2650
e3566d4b 2651static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2652 int isencoded;
2653 uint32_t len;
f78fd11b 2654 sds val;
2655
e3566d4b 2656 len = rdbLoadLen(fp,rdbver,&isencoded);
2657 if (isencoded) {
2658 switch(len) {
2659 case REDIS_RDB_ENC_INT8:
2660 case REDIS_RDB_ENC_INT16:
2661 case REDIS_RDB_ENC_INT32:
3305306f 2662 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
88e85998 2663 case REDIS_RDB_ENC_LZF:
2664 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
e3566d4b 2665 default:
2666 assert(0!=0);
2667 }
2668 }
2669
f78fd11b 2670 if (len == REDIS_RDB_LENERR) return NULL;
2671 val = sdsnewlen(NULL,len);
2672 if (len && fread(val,len,1,fp) == 0) {
2673 sdsfree(val);
2674 return NULL;
2675 }
10c43610 2676 return tryObjectSharing(createObject(REDIS_STRING,val));
f78fd11b 2677}
2678
a7866db6 2679/* For information about double serialization check rdbSaveDoubleValue() */
2680static int rdbLoadDoubleValue(FILE *fp, double *val) {
2681 char buf[128];
2682 unsigned char len;
2683
2684 if (fread(&len,1,1,fp) == 0) return -1;
2685 switch(len) {
2686 case 255: *val = R_NegInf; return 0;
2687 case 254: *val = R_PosInf; return 0;
2688 case 253: *val = R_Nan; return 0;
2689 default:
2690 if (fread(buf,len,1,fp) == 0) return -1;
2691 sscanf(buf, "%lg", val);
2692 return 0;
2693 }
2694}
2695
f78fd11b 2696static int rdbLoad(char *filename) {
ed9b544e 2697 FILE *fp;
f78fd11b 2698 robj *keyobj = NULL;
2699 uint32_t dbid;
bb32ede5 2700 int type, retval, rdbver;
3305306f 2701 dict *d = server.db[0].dict;
bb32ede5 2702 redisDb *db = server.db+0;
f78fd11b 2703 char buf[1024];
bb32ede5 2704 time_t expiretime = -1, now = time(NULL);
2705
ed9b544e 2706 fp = fopen(filename,"r");
2707 if (!fp) return REDIS_ERR;
2708 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 2709 buf[9] = '\0';
2710 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 2711 fclose(fp);
2712 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2713 return REDIS_ERR;
2714 }
f78fd11b 2715 rdbver = atoi(buf+5);
2716 if (rdbver > 1) {
2717 fclose(fp);
2718 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2719 return REDIS_ERR;
2720 }
ed9b544e 2721 while(1) {
2722 robj *o;
2723
2724 /* Read type. */
f78fd11b 2725 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 2726 if (type == REDIS_EXPIRETIME) {
2727 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2728 /* We read the time so we need to read the object type again */
2729 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2730 }
ed9b544e 2731 if (type == REDIS_EOF) break;
2732 /* Handle SELECT DB opcode as a special case */
2733 if (type == REDIS_SELECTDB) {
e3566d4b 2734 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2735 goto eoferr;
ed9b544e 2736 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 2737 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 2738 exit(1);
2739 }
bb32ede5 2740 db = server.db+dbid;
2741 d = db->dict;
ed9b544e 2742 continue;
2743 }
2744 /* Read key */
f78fd11b 2745 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
ed9b544e 2746
2747 if (type == REDIS_STRING) {
2748 /* Read string value */
f78fd11b 2749 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2750 tryObjectEncoding(o);
ed9b544e 2751 } else if (type == REDIS_LIST || type == REDIS_SET) {
2752 /* Read list/set value */
2753 uint32_t listlen;
f78fd11b 2754
e3566d4b 2755 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
f78fd11b 2756 goto eoferr;
ed9b544e 2757 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2758 /* Load every single element of the list/set */
2759 while(listlen--) {
2760 robj *ele;
2761
f78fd11b 2762 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2763 tryObjectEncoding(ele);
ed9b544e 2764 if (type == REDIS_LIST) {
6b47e12e 2765 listAddNodeTail((list*)o->ptr,ele);
ed9b544e 2766 } else {
6b47e12e 2767 dictAdd((dict*)o->ptr,ele,NULL);
ed9b544e 2768 }
ed9b544e 2769 }
2b59cfdf 2770 } else if (type == REDIS_ZSET) {
2771 /* Read list/set value */
2772 uint32_t zsetlen;
2773 zset *zs;
2774
2775 if ((zsetlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2776 goto eoferr;
2777 o = createZsetObject();
2778 zs = o->ptr;
2779 /* Load every single element of the list/set */
2780 while(zsetlen--) {
2781 robj *ele;
2782 double *score = zmalloc(sizeof(double));
2783
2784 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2785 tryObjectEncoding(ele);
2786 if (rdbLoadDoubleValue(fp,score) == -1) goto eoferr;
2787 dictAdd(zs->dict,ele,score);
2788 zslInsert(zs->zsl,*score,ele);
2789 incrRefCount(ele); /* added to skiplist */
2790 }
ed9b544e 2791 } else {
2792 assert(0 != 0);
2793 }
2794 /* Add the new object in the hash table */
f78fd11b 2795 retval = dictAdd(d,keyobj,o);
ed9b544e 2796 if (retval == DICT_ERR) {
f78fd11b 2797 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
ed9b544e 2798 exit(1);
2799 }
bb32ede5 2800 /* Set the expire time if needed */
2801 if (expiretime != -1) {
2802 setExpire(db,keyobj,expiretime);
2803 /* Delete this key if already expired */
2804 if (expiretime < now) deleteKey(db,keyobj);
2805 expiretime = -1;
2806 }
f78fd11b 2807 keyobj = o = NULL;
ed9b544e 2808 }
2809 fclose(fp);
2810 return REDIS_OK;
2811
2812eoferr: /* unexpected end of file is handled here with a fatal exit */
e3566d4b 2813 if (keyobj) decrRefCount(keyobj);
2814 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, exiting now.");
ed9b544e 2815 exit(1);
2816 return REDIS_ERR; /* Just to avoid warning */
2817}
2818
2819/*================================== Commands =============================== */
2820
abcb223e 2821static void authCommand(redisClient *c) {
2e77c2ee 2822 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
2823 c->authenticated = 1;
2824 addReply(c,shared.ok);
2825 } else {
2826 c->authenticated = 0;
fa4c0aba 2827 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
2828 }
2829}
2830
ed9b544e 2831static void pingCommand(redisClient *c) {
2832 addReply(c,shared.pong);
2833}
2834
2835static void echoCommand(redisClient *c) {
942a3961 2836 addReplyBulkLen(c,c->argv[1]);
ed9b544e 2837 addReply(c,c->argv[1]);
2838 addReply(c,shared.crlf);
2839}
2840
2841/*=================================== Strings =============================== */
2842
2843static void setGenericCommand(redisClient *c, int nx) {
2844 int retval;
2845
3305306f 2846 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2847 if (retval == DICT_ERR) {
2848 if (!nx) {
3305306f 2849 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2850 incrRefCount(c->argv[2]);
2851 } else {
c937aa89 2852 addReply(c,shared.czero);
ed9b544e 2853 return;
2854 }
2855 } else {
2856 incrRefCount(c->argv[1]);
2857 incrRefCount(c->argv[2]);
2858 }
2859 server.dirty++;
3305306f 2860 removeExpire(c->db,c->argv[1]);
c937aa89 2861 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 2862}
2863
2864static void setCommand(redisClient *c) {
a4d1ba9a 2865 setGenericCommand(c,0);
ed9b544e 2866}
2867
2868static void setnxCommand(redisClient *c) {
a4d1ba9a 2869 setGenericCommand(c,1);
ed9b544e 2870}
2871
2872static void getCommand(redisClient *c) {
3305306f 2873 robj *o = lookupKeyRead(c->db,c->argv[1]);
2874
2875 if (o == NULL) {
c937aa89 2876 addReply(c,shared.nullbulk);
ed9b544e 2877 } else {
ed9b544e 2878 if (o->type != REDIS_STRING) {
c937aa89 2879 addReply(c,shared.wrongtypeerr);
ed9b544e 2880 } else {
942a3961 2881 addReplyBulkLen(c,o);
ed9b544e 2882 addReply(c,o);
2883 addReply(c,shared.crlf);
2884 }
2885 }
2886}
2887
f6b141c5 2888static void getsetCommand(redisClient *c) {
a431eb74 2889 getCommand(c);
2890 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
2891 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2892 } else {
2893 incrRefCount(c->argv[1]);
2894 }
2895 incrRefCount(c->argv[2]);
2896 server.dirty++;
2897 removeExpire(c->db,c->argv[1]);
2898}
2899
70003d28 2900static void mgetCommand(redisClient *c) {
70003d28 2901 int j;
2902
c937aa89 2903 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 2904 for (j = 1; j < c->argc; j++) {
3305306f 2905 robj *o = lookupKeyRead(c->db,c->argv[j]);
2906 if (o == NULL) {
c937aa89 2907 addReply(c,shared.nullbulk);
70003d28 2908 } else {
70003d28 2909 if (o->type != REDIS_STRING) {
c937aa89 2910 addReply(c,shared.nullbulk);
70003d28 2911 } else {
942a3961 2912 addReplyBulkLen(c,o);
70003d28 2913 addReply(c,o);
2914 addReply(c,shared.crlf);
2915 }
2916 }
2917 }
2918}
2919
d68ed120 2920static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 2921 long long value;
2922 int retval;
2923 robj *o;
2924
3305306f 2925 o = lookupKeyWrite(c->db,c->argv[1]);
2926 if (o == NULL) {
ed9b544e 2927 value = 0;
2928 } else {
ed9b544e 2929 if (o->type != REDIS_STRING) {
2930 value = 0;
2931 } else {
2932 char *eptr;
2933
942a3961 2934 if (o->encoding == REDIS_ENCODING_RAW)
2935 value = strtoll(o->ptr, &eptr, 10);
2936 else if (o->encoding == REDIS_ENCODING_INT)
2937 value = (long)o->ptr;
2938 else
2939 assert(1 != 1);
ed9b544e 2940 }
2941 }
2942
2943 value += incr;
2944 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
942a3961 2945 tryObjectEncoding(o);
3305306f 2946 retval = dictAdd(c->db->dict,c->argv[1],o);
ed9b544e 2947 if (retval == DICT_ERR) {
3305306f 2948 dictReplace(c->db->dict,c->argv[1],o);
2949 removeExpire(c->db,c->argv[1]);
ed9b544e 2950 } else {
2951 incrRefCount(c->argv[1]);
2952 }
2953 server.dirty++;
c937aa89 2954 addReply(c,shared.colon);
ed9b544e 2955 addReply(c,o);
2956 addReply(c,shared.crlf);
2957}
2958
2959static void incrCommand(redisClient *c) {
a4d1ba9a 2960 incrDecrCommand(c,1);
ed9b544e 2961}
2962
2963static void decrCommand(redisClient *c) {
a4d1ba9a 2964 incrDecrCommand(c,-1);
ed9b544e 2965}
2966
2967static void incrbyCommand(redisClient *c) {
d68ed120 2968 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 2969 incrDecrCommand(c,incr);
ed9b544e 2970}
2971
2972static void decrbyCommand(redisClient *c) {
d68ed120 2973 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 2974 incrDecrCommand(c,-incr);
ed9b544e 2975}
2976
2977/* ========================= Type agnostic commands ========================= */
2978
2979static void delCommand(redisClient *c) {
5109cdff 2980 int deleted = 0, j;
2981
2982 for (j = 1; j < c->argc; j++) {
2983 if (deleteKey(c->db,c->argv[j])) {
2984 server.dirty++;
2985 deleted++;
2986 }
2987 }
2988 switch(deleted) {
2989 case 0:
c937aa89 2990 addReply(c,shared.czero);
5109cdff 2991 break;
2992 case 1:
2993 addReply(c,shared.cone);
2994 break;
2995 default:
2996 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
2997 break;
ed9b544e 2998 }
2999}
3000
3001static void existsCommand(redisClient *c) {
3305306f 3002 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
ed9b544e 3003}
3004
3005static void selectCommand(redisClient *c) {
3006 int id = atoi(c->argv[1]->ptr);
3007
3008 if (selectDb(c,id) == REDIS_ERR) {
774e3047 3009 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 3010 } else {
3011 addReply(c,shared.ok);
3012 }
3013}
3014
3015static void randomkeyCommand(redisClient *c) {
3016 dictEntry *de;
3305306f 3017
3018 while(1) {
3019 de = dictGetRandomKey(c->db->dict);
ce7bef07 3020 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3305306f 3021 }
ed9b544e 3022 if (de == NULL) {
ce7bef07 3023 addReply(c,shared.plus);
ed9b544e 3024 addReply(c,shared.crlf);
3025 } else {
c937aa89 3026 addReply(c,shared.plus);
ed9b544e 3027 addReply(c,dictGetEntryKey(de));
3028 addReply(c,shared.crlf);
3029 }
3030}
3031
3032static void keysCommand(redisClient *c) {
3033 dictIterator *di;
3034 dictEntry *de;
3035 sds pattern = c->argv[1]->ptr;
3036 int plen = sdslen(pattern);
3037 int numkeys = 0, keyslen = 0;
3038 robj *lenobj = createObject(REDIS_STRING,NULL);
3039
3305306f 3040 di = dictGetIterator(c->db->dict);
ed9b544e 3041 addReply(c,lenobj);
3042 decrRefCount(lenobj);
3043 while((de = dictNext(di)) != NULL) {
3044 robj *keyobj = dictGetEntryKey(de);
3305306f 3045
ed9b544e 3046 sds key = keyobj->ptr;
3047 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3048 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3305306f 3049 if (expireIfNeeded(c->db,keyobj) == 0) {
3050 if (numkeys != 0)
3051 addReply(c,shared.space);
3052 addReply(c,keyobj);
3053 numkeys++;
3054 keyslen += sdslen(key);
3055 }
ed9b544e 3056 }
3057 }
3058 dictReleaseIterator(di);
c937aa89 3059 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
ed9b544e 3060 addReply(c,shared.crlf);
3061}
3062
3063static void dbsizeCommand(redisClient *c) {
3064 addReplySds(c,
3305306f 3065 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 3066}
3067
3068static void lastsaveCommand(redisClient *c) {
3069 addReplySds(c,
c937aa89 3070 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 3071}
3072
3073static void typeCommand(redisClient *c) {
3305306f 3074 robj *o;
ed9b544e 3075 char *type;
3305306f 3076
3077 o = lookupKeyRead(c->db,c->argv[1]);
3078 if (o == NULL) {
c937aa89 3079 type = "+none";
ed9b544e 3080 } else {
ed9b544e 3081 switch(o->type) {
c937aa89 3082 case REDIS_STRING: type = "+string"; break;
3083 case REDIS_LIST: type = "+list"; break;
3084 case REDIS_SET: type = "+set"; break;
ed9b544e 3085 default: type = "unknown"; break;
3086 }
3087 }
3088 addReplySds(c,sdsnew(type));
3089 addReply(c,shared.crlf);
3090}
3091
3092static void saveCommand(redisClient *c) {
05557f6d 3093 if (server.bgsaveinprogress) {
3094 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3095 return;
3096 }
f78fd11b 3097 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 3098 addReply(c,shared.ok);
3099 } else {
3100 addReply(c,shared.err);
3101 }
3102}
3103
3104static void bgsaveCommand(redisClient *c) {
3105 if (server.bgsaveinprogress) {
3106 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3107 return;
3108 }
f78fd11b 3109 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
ed9b544e 3110 addReply(c,shared.ok);
3111 } else {
3112 addReply(c,shared.err);
3113 }
3114}
3115
3116static void shutdownCommand(redisClient *c) {
3117 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
a3b21203 3118 /* Kill the saving child if there is a background saving in progress.
3119 We want to avoid race conditions, for instance our saving child may
3120 overwrite the synchronous saving did by SHUTDOWN. */
9f3c422c 3121 if (server.bgsaveinprogress) {
3122 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3123 kill(server.bgsavechildpid,SIGKILL);
a3b21203 3124 rdbRemoveTempFile(server.bgsavechildpid);
9f3c422c 3125 }
a3b21203 3126 /* SYNC SAVE */
f78fd11b 3127 if (rdbSave(server.dbfilename) == REDIS_OK) {
9f3c422c 3128 if (server.daemonize)
b284af55 3129 unlink(server.pidfile);
b284af55 3130 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
ed9b544e 3131 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3132 exit(1);
3133 } else {
a3b21203 3134 /* Ooops.. error saving! The best we can do is to continue operating.
3135 * Note that if there was a background saving process, in the next
3136 * cron() Redis will be notified that the background saving aborted,
3137 * handling special stuff like slaves pending for synchronization... */
ed9b544e 3138 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3139 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3140 }
3141}
3142
3143static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 3144 robj *o;
3145
3146 /* To use the same key as src and dst is probably an error */
3147 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 3148 addReply(c,shared.sameobjecterr);
ed9b544e 3149 return;
3150 }
3151
3305306f 3152 o = lookupKeyWrite(c->db,c->argv[1]);
3153 if (o == NULL) {
c937aa89 3154 addReply(c,shared.nokeyerr);
ed9b544e 3155 return;
3156 }
ed9b544e 3157 incrRefCount(o);
3305306f 3158 deleteIfVolatile(c->db,c->argv[2]);
3159 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
ed9b544e 3160 if (nx) {
3161 decrRefCount(o);
c937aa89 3162 addReply(c,shared.czero);
ed9b544e 3163 return;
3164 }
3305306f 3165 dictReplace(c->db->dict,c->argv[2],o);
ed9b544e 3166 } else {
3167 incrRefCount(c->argv[2]);
3168 }
3305306f 3169 deleteKey(c->db,c->argv[1]);
ed9b544e 3170 server.dirty++;
c937aa89 3171 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 3172}
3173
3174static void renameCommand(redisClient *c) {
3175 renameGenericCommand(c,0);
3176}
3177
3178static void renamenxCommand(redisClient *c) {
3179 renameGenericCommand(c,1);
3180}
3181
3182static void moveCommand(redisClient *c) {
3305306f 3183 robj *o;
3184 redisDb *src, *dst;
ed9b544e 3185 int srcid;
3186
3187 /* Obtain source and target DB pointers */
3305306f 3188 src = c->db;
3189 srcid = c->db->id;
ed9b544e 3190 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 3191 addReply(c,shared.outofrangeerr);
ed9b544e 3192 return;
3193 }
3305306f 3194 dst = c->db;
3195 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 3196
3197 /* If the user is moving using as target the same
3198 * DB as the source DB it is probably an error. */
3199 if (src == dst) {
c937aa89 3200 addReply(c,shared.sameobjecterr);
ed9b544e 3201 return;
3202 }
3203
3204 /* Check if the element exists and get a reference */
3305306f 3205 o = lookupKeyWrite(c->db,c->argv[1]);
3206 if (!o) {
c937aa89 3207 addReply(c,shared.czero);
ed9b544e 3208 return;
3209 }
3210
3211 /* Try to add the element to the target DB */
3305306f 3212 deleteIfVolatile(dst,c->argv[1]);
3213 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
c937aa89 3214 addReply(c,shared.czero);
ed9b544e 3215 return;
3216 }
3305306f 3217 incrRefCount(c->argv[1]);
ed9b544e 3218 incrRefCount(o);
3219
3220 /* OK! key moved, free the entry in the source DB */
3305306f 3221 deleteKey(src,c->argv[1]);
ed9b544e 3222 server.dirty++;
c937aa89 3223 addReply(c,shared.cone);
ed9b544e 3224}
3225
3226/* =================================== Lists ================================ */
3227static void pushGenericCommand(redisClient *c, int where) {
3228 robj *lobj;
ed9b544e 3229 list *list;
3305306f 3230
3231 lobj = lookupKeyWrite(c->db,c->argv[1]);
3232 if (lobj == NULL) {
ed9b544e 3233 lobj = createListObject();
3234 list = lobj->ptr;
3235 if (where == REDIS_HEAD) {
6b47e12e 3236 listAddNodeHead(list,c->argv[2]);
ed9b544e 3237 } else {
6b47e12e 3238 listAddNodeTail(list,c->argv[2]);
ed9b544e 3239 }
3305306f 3240 dictAdd(c->db->dict,c->argv[1],lobj);
ed9b544e 3241 incrRefCount(c->argv[1]);
3242 incrRefCount(c->argv[2]);
3243 } else {
ed9b544e 3244 if (lobj->type != REDIS_LIST) {
3245 addReply(c,shared.wrongtypeerr);
3246 return;
3247 }
3248 list = lobj->ptr;
3249 if (where == REDIS_HEAD) {
6b47e12e 3250 listAddNodeHead(list,c->argv[2]);
ed9b544e 3251 } else {
6b47e12e 3252 listAddNodeTail(list,c->argv[2]);
ed9b544e 3253 }
3254 incrRefCount(c->argv[2]);
3255 }
3256 server.dirty++;
3257 addReply(c,shared.ok);
3258}
3259
3260static void lpushCommand(redisClient *c) {
3261 pushGenericCommand(c,REDIS_HEAD);
3262}
3263
3264static void rpushCommand(redisClient *c) {
3265 pushGenericCommand(c,REDIS_TAIL);
3266}
3267
3268static void llenCommand(redisClient *c) {
3305306f 3269 robj *o;
ed9b544e 3270 list *l;
3271
3305306f 3272 o = lookupKeyRead(c->db,c->argv[1]);
3273 if (o == NULL) {
c937aa89 3274 addReply(c,shared.czero);
ed9b544e 3275 return;
3276 } else {
ed9b544e 3277 if (o->type != REDIS_LIST) {
c937aa89 3278 addReply(c,shared.wrongtypeerr);
ed9b544e 3279 } else {
3280 l = o->ptr;
c937aa89 3281 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
ed9b544e 3282 }
3283 }
3284}
3285
3286static void lindexCommand(redisClient *c) {
3305306f 3287 robj *o;
ed9b544e 3288 int index = atoi(c->argv[2]->ptr);
3289
3305306f 3290 o = lookupKeyRead(c->db,c->argv[1]);
3291 if (o == NULL) {
c937aa89 3292 addReply(c,shared.nullbulk);
ed9b544e 3293 } else {
ed9b544e 3294 if (o->type != REDIS_LIST) {
c937aa89 3295 addReply(c,shared.wrongtypeerr);
ed9b544e 3296 } else {
3297 list *list = o->ptr;
3298 listNode *ln;
3299
3300 ln = listIndex(list, index);
3301 if (ln == NULL) {
c937aa89 3302 addReply(c,shared.nullbulk);
ed9b544e 3303 } else {
3304 robj *ele = listNodeValue(ln);
942a3961 3305 addReplyBulkLen(c,ele);
ed9b544e 3306 addReply(c,ele);
3307 addReply(c,shared.crlf);
3308 }
3309 }
3310 }
3311}
3312
3313static void lsetCommand(redisClient *c) {
3305306f 3314 robj *o;
ed9b544e 3315 int index = atoi(c->argv[2]->ptr);
3316
3305306f 3317 o = lookupKeyWrite(c->db,c->argv[1]);
3318 if (o == NULL) {
ed9b544e 3319 addReply(c,shared.nokeyerr);
3320 } else {
ed9b544e 3321 if (o->type != REDIS_LIST) {
3322 addReply(c,shared.wrongtypeerr);
3323 } else {
3324 list *list = o->ptr;
3325 listNode *ln;
3326
3327 ln = listIndex(list, index);
3328 if (ln == NULL) {
c937aa89 3329 addReply(c,shared.outofrangeerr);
ed9b544e 3330 } else {
3331 robj *ele = listNodeValue(ln);
3332
3333 decrRefCount(ele);
3334 listNodeValue(ln) = c->argv[3];
3335 incrRefCount(c->argv[3]);
3336 addReply(c,shared.ok);
3337 server.dirty++;
3338 }
3339 }
3340 }
3341}
3342
3343static void popGenericCommand(redisClient *c, int where) {
3305306f 3344 robj *o;
3345
3346 o = lookupKeyWrite(c->db,c->argv[1]);
3347 if (o == NULL) {
c937aa89 3348 addReply(c,shared.nullbulk);
ed9b544e 3349 } else {
ed9b544e 3350 if (o->type != REDIS_LIST) {
c937aa89 3351 addReply(c,shared.wrongtypeerr);
ed9b544e 3352 } else {
3353 list *list = o->ptr;
3354 listNode *ln;
3355
3356 if (where == REDIS_HEAD)
3357 ln = listFirst(list);
3358 else
3359 ln = listLast(list);
3360
3361 if (ln == NULL) {
c937aa89 3362 addReply(c,shared.nullbulk);
ed9b544e 3363 } else {
3364 robj *ele = listNodeValue(ln);
942a3961 3365 addReplyBulkLen(c,ele);
ed9b544e 3366 addReply(c,ele);
3367 addReply(c,shared.crlf);
3368 listDelNode(list,ln);
3369 server.dirty++;
3370 }
3371 }
3372 }
3373}
3374
3375static void lpopCommand(redisClient *c) {
3376 popGenericCommand(c,REDIS_HEAD);
3377}
3378
3379static void rpopCommand(redisClient *c) {
3380 popGenericCommand(c,REDIS_TAIL);
3381}
3382
3383static void lrangeCommand(redisClient *c) {
3305306f 3384 robj *o;
ed9b544e 3385 int start = atoi(c->argv[2]->ptr);
3386 int end = atoi(c->argv[3]->ptr);
3305306f 3387
3388 o = lookupKeyRead(c->db,c->argv[1]);
3389 if (o == NULL) {
c937aa89 3390 addReply(c,shared.nullmultibulk);
ed9b544e 3391 } else {
ed9b544e 3392 if (o->type != REDIS_LIST) {
c937aa89 3393 addReply(c,shared.wrongtypeerr);
ed9b544e 3394 } else {
3395 list *list = o->ptr;
3396 listNode *ln;
3397 int llen = listLength(list);
3398 int rangelen, j;
3399 robj *ele;
3400
3401 /* convert negative indexes */
3402 if (start < 0) start = llen+start;
3403 if (end < 0) end = llen+end;
3404 if (start < 0) start = 0;
3405 if (end < 0) end = 0;
3406
3407 /* indexes sanity checks */
3408 if (start > end || start >= llen) {
3409 /* Out of range start or start > end result in empty list */
c937aa89 3410 addReply(c,shared.emptymultibulk);
ed9b544e 3411 return;
3412 }
3413 if (end >= llen) end = llen-1;
3414 rangelen = (end-start)+1;
3415
3416 /* Return the result in form of a multi-bulk reply */
3417 ln = listIndex(list, start);
c937aa89 3418 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
ed9b544e 3419 for (j = 0; j < rangelen; j++) {
3420 ele = listNodeValue(ln);
942a3961 3421 addReplyBulkLen(c,ele);
ed9b544e 3422 addReply(c,ele);
3423 addReply(c,shared.crlf);
3424 ln = ln->next;
3425 }
3426 }
3427 }
3428}
3429
3430static void ltrimCommand(redisClient *c) {
3305306f 3431 robj *o;
ed9b544e 3432 int start = atoi(c->argv[2]->ptr);
3433 int end = atoi(c->argv[3]->ptr);
3434
3305306f 3435 o = lookupKeyWrite(c->db,c->argv[1]);
3436 if (o == NULL) {
ed9b544e 3437 addReply(c,shared.nokeyerr);
3438 } else {
ed9b544e 3439 if (o->type != REDIS_LIST) {
3440 addReply(c,shared.wrongtypeerr);
3441 } else {
3442 list *list = o->ptr;
3443 listNode *ln;
3444 int llen = listLength(list);
3445 int j, ltrim, rtrim;
3446
3447 /* convert negative indexes */
3448 if (start < 0) start = llen+start;
3449 if (end < 0) end = llen+end;
3450 if (start < 0) start = 0;
3451 if (end < 0) end = 0;
3452
3453 /* indexes sanity checks */
3454 if (start > end || start >= llen) {
3455 /* Out of range start or start > end result in empty list */
3456 ltrim = llen;
3457 rtrim = 0;
3458 } else {
3459 if (end >= llen) end = llen-1;
3460 ltrim = start;
3461 rtrim = llen-end-1;
3462 }
3463
3464 /* Remove list elements to perform the trim */
3465 for (j = 0; j < ltrim; j++) {
3466 ln = listFirst(list);
3467 listDelNode(list,ln);
3468 }
3469 for (j = 0; j < rtrim; j++) {
3470 ln = listLast(list);
3471 listDelNode(list,ln);
3472 }
ed9b544e 3473 server.dirty++;
e59229a2 3474 addReply(c,shared.ok);
ed9b544e 3475 }
3476 }
3477}
3478
3479static void lremCommand(redisClient *c) {
3305306f 3480 robj *o;
ed9b544e 3481
3305306f 3482 o = lookupKeyWrite(c->db,c->argv[1]);
3483 if (o == NULL) {
33c08b39 3484 addReply(c,shared.czero);
ed9b544e 3485 } else {
ed9b544e 3486 if (o->type != REDIS_LIST) {
c937aa89 3487 addReply(c,shared.wrongtypeerr);
ed9b544e 3488 } else {
3489 list *list = o->ptr;
3490 listNode *ln, *next;
3491 int toremove = atoi(c->argv[2]->ptr);
3492 int removed = 0;
3493 int fromtail = 0;
3494
3495 if (toremove < 0) {
3496 toremove = -toremove;
3497 fromtail = 1;
3498 }
3499 ln = fromtail ? list->tail : list->head;
3500 while (ln) {
ed9b544e 3501 robj *ele = listNodeValue(ln);
a4d1ba9a 3502
3503 next = fromtail ? ln->prev : ln->next;
724a51b1 3504 if (compareStringObjects(ele,c->argv[3]) == 0) {
ed9b544e 3505 listDelNode(list,ln);
3506 server.dirty++;
3507 removed++;
3508 if (toremove && removed == toremove) break;
3509 }
3510 ln = next;
3511 }
c937aa89 3512 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 3513 }
3514 }
3515}
3516
3517/* ==================================== Sets ================================ */
3518
3519static void saddCommand(redisClient *c) {
ed9b544e 3520 robj *set;
3521
3305306f 3522 set = lookupKeyWrite(c->db,c->argv[1]);
3523 if (set == NULL) {
ed9b544e 3524 set = createSetObject();
3305306f 3525 dictAdd(c->db->dict,c->argv[1],set);
ed9b544e 3526 incrRefCount(c->argv[1]);
3527 } else {
ed9b544e 3528 if (set->type != REDIS_SET) {
c937aa89 3529 addReply(c,shared.wrongtypeerr);
ed9b544e 3530 return;
3531 }
3532 }
3533 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
3534 incrRefCount(c->argv[2]);
3535 server.dirty++;
c937aa89 3536 addReply(c,shared.cone);
ed9b544e 3537 } else {
c937aa89 3538 addReply(c,shared.czero);
ed9b544e 3539 }
3540}
3541
3542static void sremCommand(redisClient *c) {
3305306f 3543 robj *set;
ed9b544e 3544
3305306f 3545 set = lookupKeyWrite(c->db,c->argv[1]);
3546 if (set == NULL) {
c937aa89 3547 addReply(c,shared.czero);
ed9b544e 3548 } else {
ed9b544e 3549 if (set->type != REDIS_SET) {
c937aa89 3550 addReply(c,shared.wrongtypeerr);
ed9b544e 3551 return;
3552 }
3553 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
3554 server.dirty++;
12fea928 3555 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
c937aa89 3556 addReply(c,shared.cone);
ed9b544e 3557 } else {
c937aa89 3558 addReply(c,shared.czero);
ed9b544e 3559 }
3560 }
3561}
3562
a4460ef4 3563static void smoveCommand(redisClient *c) {
3564 robj *srcset, *dstset;
3565
3566 srcset = lookupKeyWrite(c->db,c->argv[1]);
3567 dstset = lookupKeyWrite(c->db,c->argv[2]);
3568
3569 /* If the source key does not exist return 0, if it's of the wrong type
3570 * raise an error */
3571 if (srcset == NULL || srcset->type != REDIS_SET) {
3572 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
3573 return;
3574 }
3575 /* Error if the destination key is not a set as well */
3576 if (dstset && dstset->type != REDIS_SET) {
3577 addReply(c,shared.wrongtypeerr);
3578 return;
3579 }
3580 /* Remove the element from the source set */
3581 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
3582 /* Key not found in the src set! return zero */
3583 addReply(c,shared.czero);
3584 return;
3585 }
3586 server.dirty++;
3587 /* Add the element to the destination set */
3588 if (!dstset) {
3589 dstset = createSetObject();
3590 dictAdd(c->db->dict,c->argv[2],dstset);
3591 incrRefCount(c->argv[2]);
3592 }
3593 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
3594 incrRefCount(c->argv[3]);
3595 addReply(c,shared.cone);
3596}
3597
ed9b544e 3598static void sismemberCommand(redisClient *c) {
3305306f 3599 robj *set;
ed9b544e 3600
3305306f 3601 set = lookupKeyRead(c->db,c->argv[1]);
3602 if (set == NULL) {
c937aa89 3603 addReply(c,shared.czero);
ed9b544e 3604 } else {
ed9b544e 3605 if (set->type != REDIS_SET) {
c937aa89 3606 addReply(c,shared.wrongtypeerr);
ed9b544e 3607 return;
3608 }
3609 if (dictFind(set->ptr,c->argv[2]))
c937aa89 3610 addReply(c,shared.cone);
ed9b544e 3611 else
c937aa89 3612 addReply(c,shared.czero);
ed9b544e 3613 }
3614}
3615
3616static void scardCommand(redisClient *c) {
3305306f 3617 robj *o;
ed9b544e 3618 dict *s;
3619
3305306f 3620 o = lookupKeyRead(c->db,c->argv[1]);
3621 if (o == NULL) {
c937aa89 3622 addReply(c,shared.czero);
ed9b544e 3623 return;
3624 } else {
ed9b544e 3625 if (o->type != REDIS_SET) {
c937aa89 3626 addReply(c,shared.wrongtypeerr);
ed9b544e 3627 } else {
3628 s = o->ptr;
c937aa89 3629 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3305306f 3630 dictSize(s)));
ed9b544e 3631 }
3632 }
3633}
3634
12fea928 3635static void spopCommand(redisClient *c) {
3636 robj *set;
3637 dictEntry *de;
3638
3639 set = lookupKeyWrite(c->db,c->argv[1]);
3640 if (set == NULL) {
3641 addReply(c,shared.nullbulk);
3642 } else {
3643 if (set->type != REDIS_SET) {
3644 addReply(c,shared.wrongtypeerr);
3645 return;
3646 }
3647 de = dictGetRandomKey(set->ptr);
3648 if (de == NULL) {
3649 addReply(c,shared.nullbulk);
3650 } else {
3651 robj *ele = dictGetEntryKey(de);
3652
942a3961 3653 addReplyBulkLen(c,ele);
12fea928 3654 addReply(c,ele);
3655 addReply(c,shared.crlf);
3656 dictDelete(set->ptr,ele);
3657 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3658 server.dirty++;
3659 }
3660 }
3661}
3662
2abb95a9 3663static void srandmemberCommand(redisClient *c) {
3664 robj *set;
3665 dictEntry *de;
3666
3667 set = lookupKeyRead(c->db,c->argv[1]);
3668 if (set == NULL) {
3669 addReply(c,shared.nullbulk);
3670 } else {
3671 if (set->type != REDIS_SET) {
3672 addReply(c,shared.wrongtypeerr);
3673 return;
3674 }
3675 de = dictGetRandomKey(set->ptr);
3676 if (de == NULL) {
3677 addReply(c,shared.nullbulk);
3678 } else {
3679 robj *ele = dictGetEntryKey(de);
3680
3681 addReplyBulkLen(c,ele);
3682 addReply(c,ele);
3683 addReply(c,shared.crlf);
3684 }
3685 }
3686}
3687
ed9b544e 3688static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
3689 dict **d1 = (void*) s1, **d2 = (void*) s2;
3690
3305306f 3691 return dictSize(*d1)-dictSize(*d2);
ed9b544e 3692}
3693
3694static void sinterGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey) {
3695 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3696 dictIterator *di;
3697 dictEntry *de;
3698 robj *lenobj = NULL, *dstset = NULL;
3699 int j, cardinality = 0;
3700
ed9b544e 3701 for (j = 0; j < setsnum; j++) {
3702 robj *setobj;
3305306f 3703
3704 setobj = dstkey ?
3705 lookupKeyWrite(c->db,setskeys[j]) :
3706 lookupKeyRead(c->db,setskeys[j]);
3707 if (!setobj) {
ed9b544e 3708 zfree(dv);
5faa6025 3709 if (dstkey) {
3710 deleteKey(c->db,dstkey);
3711 addReply(c,shared.ok);
3712 } else {
3713 addReply(c,shared.nullmultibulk);
3714 }
ed9b544e 3715 return;
3716 }
ed9b544e 3717 if (setobj->type != REDIS_SET) {
3718 zfree(dv);
c937aa89 3719 addReply(c,shared.wrongtypeerr);
ed9b544e 3720 return;
3721 }
3722 dv[j] = setobj->ptr;
3723 }
3724 /* Sort sets from the smallest to largest, this will improve our
3725 * algorithm's performace */
3726 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3727
3728 /* The first thing we should output is the total number of elements...
3729 * since this is a multi-bulk write, but at this stage we don't know
3730 * the intersection set size, so we use a trick, append an empty object
3731 * to the output list and save the pointer to later modify it with the
3732 * right length */
3733 if (!dstkey) {
3734 lenobj = createObject(REDIS_STRING,NULL);
3735 addReply(c,lenobj);
3736 decrRefCount(lenobj);
3737 } else {
3738 /* If we have a target key where to store the resulting set
3739 * create this key with an empty set inside */
3740 dstset = createSetObject();
ed9b544e 3741 }
3742
3743 /* Iterate all the elements of the first (smallest) set, and test
3744 * the element against all the other sets, if at least one set does
3745 * not include the element it is discarded */
3746 di = dictGetIterator(dv[0]);
ed9b544e 3747
3748 while((de = dictNext(di)) != NULL) {
3749 robj *ele;
3750
3751 for (j = 1; j < setsnum; j++)
3752 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
3753 if (j != setsnum)
3754 continue; /* at least one set does not contain the member */
3755 ele = dictGetEntryKey(de);
3756 if (!dstkey) {
942a3961 3757 addReplyBulkLen(c,ele);
ed9b544e 3758 addReply(c,ele);
3759 addReply(c,shared.crlf);
3760 cardinality++;
3761 } else {
3762 dictAdd(dstset->ptr,ele,NULL);
3763 incrRefCount(ele);
3764 }
3765 }
3766 dictReleaseIterator(di);
3767
83cdfe18
AG
3768 if (dstkey) {
3769 /* Store the resulting set into the target */
3770 deleteKey(c->db,dstkey);
3771 dictAdd(c->db->dict,dstkey,dstset);
3772 incrRefCount(dstkey);
3773 }
3774
40d224a9 3775 if (!dstkey) {
c937aa89 3776 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",cardinality);
40d224a9 3777 } else {
03fd01c7 3778 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3779 dictSize((dict*)dstset->ptr)));
40d224a9 3780 server.dirty++;
3781 }
ed9b544e 3782 zfree(dv);
3783}
3784
3785static void sinterCommand(redisClient *c) {
3786 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
3787}
3788
3789static void sinterstoreCommand(redisClient *c) {
3790 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
3791}
3792
f4f56e1d 3793#define REDIS_OP_UNION 0
3794#define REDIS_OP_DIFF 1
3795
3796static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 3797 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3798 dictIterator *di;
3799 dictEntry *de;
f4f56e1d 3800 robj *dstset = NULL;
40d224a9 3801 int j, cardinality = 0;
3802
40d224a9 3803 for (j = 0; j < setsnum; j++) {
3804 robj *setobj;
3805
3806 setobj = dstkey ?
3807 lookupKeyWrite(c->db,setskeys[j]) :
3808 lookupKeyRead(c->db,setskeys[j]);
3809 if (!setobj) {
3810 dv[j] = NULL;
3811 continue;
3812 }
3813 if (setobj->type != REDIS_SET) {
3814 zfree(dv);
3815 addReply(c,shared.wrongtypeerr);
3816 return;
3817 }
3818 dv[j] = setobj->ptr;
3819 }
3820
3821 /* We need a temp set object to store our union. If the dstkey
3822 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
3823 * this set object will be the resulting object to set into the target key*/
3824 dstset = createSetObject();
3825
40d224a9 3826 /* Iterate all the elements of all the sets, add every element a single
3827 * time to the result set */
3828 for (j = 0; j < setsnum; j++) {
51829ed3 3829 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 3830 if (!dv[j]) continue; /* non existing keys are like empty sets */
3831
3832 di = dictGetIterator(dv[j]);
40d224a9 3833
3834 while((de = dictNext(di)) != NULL) {
3835 robj *ele;
3836
3837 /* dictAdd will not add the same element multiple times */
3838 ele = dictGetEntryKey(de);
f4f56e1d 3839 if (op == REDIS_OP_UNION || j == 0) {
3840 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
3841 incrRefCount(ele);
40d224a9 3842 cardinality++;
3843 }
f4f56e1d 3844 } else if (op == REDIS_OP_DIFF) {
3845 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
3846 cardinality--;
3847 }
40d224a9 3848 }
3849 }
3850 dictReleaseIterator(di);
51829ed3
AG
3851
3852 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
40d224a9 3853 }
3854
f4f56e1d 3855 /* Output the content of the resulting set, if not in STORE mode */
3856 if (!dstkey) {
3857 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
3858 di = dictGetIterator(dstset->ptr);
f4f56e1d 3859 while((de = dictNext(di)) != NULL) {
3860 robj *ele;
3861
3862 ele = dictGetEntryKey(de);
942a3961 3863 addReplyBulkLen(c,ele);
f4f56e1d 3864 addReply(c,ele);
3865 addReply(c,shared.crlf);
3866 }
3867 dictReleaseIterator(di);
83cdfe18
AG
3868 } else {
3869 /* If we have a target key where to store the resulting set
3870 * create this key with the result set inside */
3871 deleteKey(c->db,dstkey);
3872 dictAdd(c->db->dict,dstkey,dstset);
3873 incrRefCount(dstkey);
f4f56e1d 3874 }
3875
3876 /* Cleanup */
40d224a9 3877 if (!dstkey) {
40d224a9 3878 decrRefCount(dstset);
3879 } else {
03fd01c7 3880 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3881 dictSize((dict*)dstset->ptr)));
40d224a9 3882 server.dirty++;
3883 }
3884 zfree(dv);
3885}
3886
3887static void sunionCommand(redisClient *c) {
f4f56e1d 3888 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 3889}
3890
3891static void sunionstoreCommand(redisClient *c) {
f4f56e1d 3892 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
3893}
3894
3895static void sdiffCommand(redisClient *c) {
3896 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
3897}
3898
3899static void sdiffstoreCommand(redisClient *c) {
3900 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 3901}
3902
6b47e12e 3903/* ==================================== ZSets =============================== */
3904
3905/* ZSETs are ordered sets using two data structures to hold the same elements
3906 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
3907 * data structure.
3908 *
3909 * The elements are added to an hash table mapping Redis objects to scores.
3910 * At the same time the elements are added to a skip list mapping scores
3911 * to Redis objects (so objects are sorted by scores in this "view"). */
3912
3913/* This skiplist implementation is almost a C translation of the original
3914 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
3915 * Alternative to Balanced Trees", modified in three ways:
3916 * a) this implementation allows for repeated values.
3917 * b) the comparison is not just by key (our 'score') but by satellite data.
3918 * c) there is a back pointer, so it's a doubly linked list with the back
3919 * pointers being only at "level 1". This allows to traverse the list
3920 * from tail to head, useful for ZREVRANGE. */
3921
3922static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
3923 zskiplistNode *zn = zmalloc(sizeof(*zn));
3924
3925 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
3926 zn->score = score;
3927 zn->obj = obj;
3928 return zn;
3929}
3930
3931static zskiplist *zslCreate(void) {
3932 int j;
3933 zskiplist *zsl;
3934
3935 zsl = zmalloc(sizeof(*zsl));
3936 zsl->level = 1;
cc812361 3937 zsl->length = 0;
6b47e12e 3938 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
3939 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
3940 zsl->header->forward[j] = NULL;
e3870fab 3941 zsl->header->backward = NULL;
3942 zsl->tail = NULL;
6b47e12e 3943 return zsl;
3944}
3945
fd8ccf44 3946static void zslFreeNode(zskiplistNode *node) {
3947 decrRefCount(node->obj);
ad807e6f 3948 zfree(node->forward);
fd8ccf44 3949 zfree(node);
3950}
3951
3952static void zslFree(zskiplist *zsl) {
ad807e6f 3953 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 3954
ad807e6f 3955 zfree(zsl->header->forward);
3956 zfree(zsl->header);
fd8ccf44 3957 while(node) {
599379dd 3958 next = node->forward[0];
fd8ccf44 3959 zslFreeNode(node);
3960 node = next;
3961 }
ad807e6f 3962 zfree(zsl);
fd8ccf44 3963}
3964
6b47e12e 3965static int zslRandomLevel(void) {
3966 int level = 1;
3967 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
3968 level += 1;
3969 return level;
3970}
3971
3972static void zslInsert(zskiplist *zsl, double score, robj *obj) {
3973 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
3974 int i, level;
3975
3976 x = zsl->header;
3977 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 3978 while (x->forward[i] &&
3979 (x->forward[i]->score < score ||
3980 (x->forward[i]->score == score &&
3981 compareStringObjects(x->forward[i]->obj,obj) < 0)))
6b47e12e 3982 x = x->forward[i];
3983 update[i] = x;
3984 }
6b47e12e 3985 /* we assume the key is not already inside, since we allow duplicated
3986 * scores, and the re-insertion of score and redis object should never
3987 * happpen since the caller of zslInsert() should test in the hash table
3988 * if the element is already inside or not. */
3989 level = zslRandomLevel();
3990 if (level > zsl->level) {
3991 for (i = zsl->level; i < level; i++)
3992 update[i] = zsl->header;
3993 zsl->level = level;
3994 }
3995 x = zslCreateNode(level,score,obj);
3996 for (i = 0; i < level; i++) {
3997 x->forward[i] = update[i]->forward[i];
3998 update[i]->forward[i] = x;
3999 }
bb975144 4000 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 4001 if (x->forward[0])
4002 x->forward[0]->backward = x;
4003 else
4004 zsl->tail = x;
cc812361 4005 zsl->length++;
6b47e12e 4006}
4007
50c55df5 4008/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 4009static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 4010 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4011 int i;
4012
4013 x = zsl->header;
4014 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4015 while (x->forward[i] &&
4016 (x->forward[i]->score < score ||
4017 (x->forward[i]->score == score &&
4018 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 4019 x = x->forward[i];
4020 update[i] = x;
4021 }
4022 /* We may have multiple elements with the same score, what we need
4023 * is to find the element with both the right score and object. */
4024 x = x->forward[0];
50c55df5 4025 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
9d60e6e4 4026 for (i = 0; i < zsl->level; i++) {
4027 if (update[i]->forward[i] != x) break;
4028 update[i]->forward[i] = x->forward[i];
4029 }
4030 if (x->forward[0]) {
4031 x->forward[0]->backward = (x->backward == zsl->header) ?
4032 NULL : x->backward;
e197b441 4033 } else {
9d60e6e4 4034 zsl->tail = x->backward;
e197b441 4035 }
9d60e6e4 4036 zslFreeNode(x);
4037 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4038 zsl->level--;
4039 zsl->length--;
4040 return 1;
4041 } else {
4042 return 0; /* not found */
e197b441 4043 }
4044 return 0; /* not found */
fd8ccf44 4045}
4046
1807985b 4047/* Delete all the elements with score between min and max from the skiplist.
4048 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4049 * Note that this function takes the reference to the hash table view of the
4050 * sorted set, in order to remove the elements from the hash table too. */
4051static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4052 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4053 unsigned long removed = 0;
4054 int i;
4055
4056 x = zsl->header;
4057 for (i = zsl->level-1; i >= 0; i--) {
4058 while (x->forward[i] && x->forward[i]->score < min)
4059 x = x->forward[i];
4060 update[i] = x;
4061 }
4062 /* We may have multiple elements with the same score, what we need
4063 * is to find the element with both the right score and object. */
4064 x = x->forward[0];
4065 while (x && x->score <= max) {
4066 zskiplistNode *next;
4067
4068 for (i = 0; i < zsl->level; i++) {
4069 if (update[i]->forward[i] != x) break;
4070 update[i]->forward[i] = x->forward[i];
4071 }
4072 if (x->forward[0]) {
4073 x->forward[0]->backward = (x->backward == zsl->header) ?
4074 NULL : x->backward;
4075 } else {
4076 zsl->tail = x->backward;
4077 }
4078 next = x->forward[0];
4079 dictDelete(dict,x->obj);
4080 zslFreeNode(x);
4081 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4082 zsl->level--;
4083 zsl->length--;
4084 removed++;
4085 x = next;
4086 }
4087 return removed; /* not found */
4088}
4089
50c55df5 4090/* Find the first node having a score equal or greater than the specified one.
4091 * Returns NULL if there is no match. */
4092static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4093 zskiplistNode *x;
4094 int i;
4095
4096 x = zsl->header;
4097 for (i = zsl->level-1; i >= 0; i--) {
4098 while (x->forward[i] && x->forward[i]->score < score)
4099 x = x->forward[i];
4100 }
4101 /* We may have multiple elements with the same score, what we need
4102 * is to find the element with both the right score and object. */
4103 return x->forward[0];
4104}
4105
fd8ccf44 4106/* The actual Z-commands implementations */
4107
4108static void zaddCommand(redisClient *c) {
4109 robj *zsetobj;
4110 zset *zs;
4111 double *score;
4112
4113 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4114 if (zsetobj == NULL) {
4115 zsetobj = createZsetObject();
4116 dictAdd(c->db->dict,c->argv[1],zsetobj);
4117 incrRefCount(c->argv[1]);
4118 } else {
4119 if (zsetobj->type != REDIS_ZSET) {
4120 addReply(c,shared.wrongtypeerr);
4121 return;
4122 }
4123 }
4124 score = zmalloc(sizeof(double));
4125 *score = strtod(c->argv[2]->ptr,NULL);
4126 zs = zsetobj->ptr;
4127 if (dictAdd(zs->dict,c->argv[3],score) == DICT_OK) {
4128 /* case 1: New element */
4129 incrRefCount(c->argv[3]); /* added to hash */
4130 zslInsert(zs->zsl,*score,c->argv[3]);
4131 incrRefCount(c->argv[3]); /* added to skiplist */
4132 server.dirty++;
4133 addReply(c,shared.cone);
4134 } else {
4135 dictEntry *de;
4136 double *oldscore;
4137
4138 /* case 2: Score update operation */
4139 de = dictFind(zs->dict,c->argv[3]);
4140 assert(de != NULL);
4141 oldscore = dictGetEntryVal(de);
4142 if (*score != *oldscore) {
4143 int deleted;
4144
e197b441 4145 deleted = zslDelete(zs->zsl,*oldscore,c->argv[3]);
fd8ccf44 4146 assert(deleted != 0);
4147 zslInsert(zs->zsl,*score,c->argv[3]);
4148 incrRefCount(c->argv[3]);
2161a965 4149 dictReplace(zs->dict,c->argv[3],score);
fd8ccf44 4150 server.dirty++;
2161a965 4151 } else {
4152 zfree(score);
fd8ccf44 4153 }
4154 addReply(c,shared.czero);
4155 }
4156}
4157
1b7106e7 4158static void zremCommand(redisClient *c) {
4159 robj *zsetobj;
4160 zset *zs;
4161
4162 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4163 if (zsetobj == NULL) {
4164 addReply(c,shared.czero);
4165 } else {
4166 dictEntry *de;
4167 double *oldscore;
4168 int deleted;
4169
4170 if (zsetobj->type != REDIS_ZSET) {
4171 addReply(c,shared.wrongtypeerr);
4172 return;
4173 }
4174 zs = zsetobj->ptr;
4175 de = dictFind(zs->dict,c->argv[2]);
4176 if (de == NULL) {
4177 addReply(c,shared.czero);
4178 return;
4179 }
4180 /* Delete from the skiplist */
4181 oldscore = dictGetEntryVal(de);
4182 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
4183 assert(deleted != 0);
4184
4185 /* Delete from the hash table */
4186 dictDelete(zs->dict,c->argv[2]);
4187 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4188 server.dirty++;
4189 addReply(c,shared.cone);
4190 }
4191}
4192
1807985b 4193static void zremrangebyscoreCommand(redisClient *c) {
4194 double min = strtod(c->argv[2]->ptr,NULL);
4195 double max = strtod(c->argv[3]->ptr,NULL);
4196 robj *zsetobj;
4197 zset *zs;
4198
4199 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4200 if (zsetobj == NULL) {
4201 addReply(c,shared.czero);
4202 } else {
4203 long deleted;
4204
4205 if (zsetobj->type != REDIS_ZSET) {
4206 addReply(c,shared.wrongtypeerr);
4207 return;
4208 }
4209 zs = zsetobj->ptr;
4210 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4211 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4212 server.dirty += deleted;
4213 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4214 }
4215}
4216
e3870fab 4217static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 4218 robj *o;
4219 int start = atoi(c->argv[2]->ptr);
4220 int end = atoi(c->argv[3]->ptr);
4221
4222 o = lookupKeyRead(c->db,c->argv[1]);
4223 if (o == NULL) {
4224 addReply(c,shared.nullmultibulk);
4225 } else {
4226 if (o->type != REDIS_ZSET) {
4227 addReply(c,shared.wrongtypeerr);
4228 } else {
4229 zset *zsetobj = o->ptr;
4230 zskiplist *zsl = zsetobj->zsl;
4231 zskiplistNode *ln;
4232
4233 int llen = zsl->length;
4234 int rangelen, j;
4235 robj *ele;
4236
4237 /* convert negative indexes */
4238 if (start < 0) start = llen+start;
4239 if (end < 0) end = llen+end;
4240 if (start < 0) start = 0;
4241 if (end < 0) end = 0;
4242
4243 /* indexes sanity checks */
4244 if (start > end || start >= llen) {
4245 /* Out of range start or start > end result in empty list */
4246 addReply(c,shared.emptymultibulk);
4247 return;
4248 }
4249 if (end >= llen) end = llen-1;
4250 rangelen = (end-start)+1;
4251
4252 /* Return the result in form of a multi-bulk reply */
e3870fab 4253 if (reverse) {
4254 ln = zsl->tail;
4255 while (start--)
4256 ln = ln->backward;
4257 } else {
4258 ln = zsl->header->forward[0];
4259 while (start--)
4260 ln = ln->forward[0];
4261 }
cc812361 4262
4263 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
4264 for (j = 0; j < rangelen; j++) {
0aad7a19 4265 ele = ln->obj;
cc812361 4266 addReplyBulkLen(c,ele);
4267 addReply(c,ele);
4268 addReply(c,shared.crlf);
e3870fab 4269 ln = reverse ? ln->backward : ln->forward[0];
cc812361 4270 }
4271 }
4272 }
4273}
4274
e3870fab 4275static void zrangeCommand(redisClient *c) {
4276 zrangeGenericCommand(c,0);
4277}
4278
4279static void zrevrangeCommand(redisClient *c) {
4280 zrangeGenericCommand(c,1);
4281}
4282
50c55df5 4283static void zrangebyscoreCommand(redisClient *c) {
4284 robj *o;
4285 double min = strtod(c->argv[2]->ptr,NULL);
4286 double max = strtod(c->argv[3]->ptr,NULL);
4287
4288 o = lookupKeyRead(c->db,c->argv[1]);
4289 if (o == NULL) {
4290 addReply(c,shared.nullmultibulk);
4291 } else {
4292 if (o->type != REDIS_ZSET) {
4293 addReply(c,shared.wrongtypeerr);
4294 } else {
4295 zset *zsetobj = o->ptr;
4296 zskiplist *zsl = zsetobj->zsl;
4297 zskiplistNode *ln;
4298 robj *ele, *lenobj;
4299 unsigned int rangelen = 0;
4300
4301 /* Get the first node with the score >= min */
4302 ln = zslFirstWithScore(zsl,min);
4303 if (ln == NULL) {
4304 /* No element matching the speciifed interval */
4305 addReply(c,shared.emptymultibulk);
4306 return;
4307 }
4308
4309 /* We don't know in advance how many matching elements there
4310 * are in the list, so we push this object that will represent
4311 * the multi-bulk length in the output buffer, and will "fix"
4312 * it later */
4313 lenobj = createObject(REDIS_STRING,NULL);
4314 addReply(c,lenobj);
4315
dbbc7285 4316 while(ln && ln->score <= max) {
50c55df5 4317 ele = ln->obj;
4318 addReplyBulkLen(c,ele);
4319 addReply(c,ele);
4320 addReply(c,shared.crlf);
4321 ln = ln->forward[0];
4322 rangelen++;
4323 }
4324 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4325 }
4326 }
4327}
4328
3c41331e 4329static void zcardCommand(redisClient *c) {
e197b441 4330 robj *o;
4331 zset *zs;
4332
4333 o = lookupKeyRead(c->db,c->argv[1]);
4334 if (o == NULL) {
4335 addReply(c,shared.czero);
4336 return;
4337 } else {
4338 if (o->type != REDIS_ZSET) {
4339 addReply(c,shared.wrongtypeerr);
4340 } else {
4341 zs = o->ptr;
4342 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",zs->zsl->length));
4343 }
4344 }
4345}
4346
6e333bbe 4347static void zscoreCommand(redisClient *c) {
4348 robj *o;
4349 zset *zs;
4350
4351 o = lookupKeyRead(c->db,c->argv[1]);
4352 if (o == NULL) {
4353 addReply(c,shared.czero);
4354 return;
4355 } else {
4356 if (o->type != REDIS_ZSET) {
4357 addReply(c,shared.wrongtypeerr);
4358 } else {
4359 dictEntry *de;
4360
4361 zs = o->ptr;
4362 de = dictFind(zs->dict,c->argv[2]);
4363 if (!de) {
4364 addReply(c,shared.nullbulk);
4365 } else {
4366 char buf[128];
4367 double *score = dictGetEntryVal(de);
4368
4369 snprintf(buf,sizeof(buf),"%.16g",*score);
4370 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
4371 strlen(buf),buf));
4372 }
4373 }
4374 }
4375}
4376
6b47e12e 4377/* ========================= Non type-specific commands ==================== */
4378
ed9b544e 4379static void flushdbCommand(redisClient *c) {
ca37e9cd 4380 server.dirty += dictSize(c->db->dict);
3305306f 4381 dictEmpty(c->db->dict);
4382 dictEmpty(c->db->expires);
ed9b544e 4383 addReply(c,shared.ok);
ed9b544e 4384}
4385
4386static void flushallCommand(redisClient *c) {
ca37e9cd 4387 server.dirty += emptyDb();
ed9b544e 4388 addReply(c,shared.ok);
f78fd11b 4389 rdbSave(server.dbfilename);
ca37e9cd 4390 server.dirty++;
ed9b544e 4391}
4392
56906eef 4393static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 4394 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 4395 so->type = type;
4396 so->pattern = pattern;
4397 return so;
4398}
4399
4400/* Return the value associated to the key with a name obtained
4401 * substituting the first occurence of '*' in 'pattern' with 'subst' */
56906eef 4402static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
ed9b544e 4403 char *p;
4404 sds spat, ssub;
4405 robj keyobj;
4406 int prefixlen, sublen, postfixlen;
ed9b544e 4407 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
4408 struct {
f1017b3f 4409 long len;
4410 long free;
ed9b544e 4411 char buf[REDIS_SORTKEY_MAX+1];
4412 } keyname;
4413
942a3961 4414 if (subst->encoding == REDIS_ENCODING_RAW)
4415 incrRefCount(subst);
4416 else {
4417 subst = getDecodedObject(subst);
4418 }
4419
ed9b544e 4420 spat = pattern->ptr;
4421 ssub = subst->ptr;
4422 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
4423 p = strchr(spat,'*');
4424 if (!p) return NULL;
4425
4426 prefixlen = p-spat;
4427 sublen = sdslen(ssub);
4428 postfixlen = sdslen(spat)-(prefixlen+1);
4429 memcpy(keyname.buf,spat,prefixlen);
4430 memcpy(keyname.buf+prefixlen,ssub,sublen);
4431 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
4432 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
4433 keyname.len = prefixlen+sublen+postfixlen;
4434
4435 keyobj.refcount = 1;
4436 keyobj.type = REDIS_STRING;
4437 keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
4438
942a3961 4439 decrRefCount(subst);
4440
a4d1ba9a 4441 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3305306f 4442 return lookupKeyRead(db,&keyobj);
ed9b544e 4443}
4444
4445/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
4446 * the additional parameter is not standard but a BSD-specific we have to
4447 * pass sorting parameters via the global 'server' structure */
4448static int sortCompare(const void *s1, const void *s2) {
4449 const redisSortObject *so1 = s1, *so2 = s2;
4450 int cmp;
4451
4452 if (!server.sort_alpha) {
4453 /* Numeric sorting. Here it's trivial as we precomputed scores */
4454 if (so1->u.score > so2->u.score) {
4455 cmp = 1;
4456 } else if (so1->u.score < so2->u.score) {
4457 cmp = -1;
4458 } else {
4459 cmp = 0;
4460 }
4461 } else {
4462 /* Alphanumeric sorting */
4463 if (server.sort_bypattern) {
4464 if (!so1->u.cmpobj || !so2->u.cmpobj) {
4465 /* At least one compare object is NULL */
4466 if (so1->u.cmpobj == so2->u.cmpobj)
4467 cmp = 0;
4468 else if (so1->u.cmpobj == NULL)
4469 cmp = -1;
4470 else
4471 cmp = 1;
4472 } else {
4473 /* We have both the objects, use strcoll */
4474 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
4475 }
4476 } else {
4477 /* Compare elements directly */
942a3961 4478 if (so1->obj->encoding == REDIS_ENCODING_RAW &&
4479 so2->obj->encoding == REDIS_ENCODING_RAW) {
4480 cmp = strcoll(so1->obj->ptr,so2->obj->ptr);
4481 } else {
4482 robj *dec1, *dec2;
4483
4484 dec1 = so1->obj->encoding == REDIS_ENCODING_RAW ?
4485 so1->obj : getDecodedObject(so1->obj);
4486 dec2 = so2->obj->encoding == REDIS_ENCODING_RAW ?
4487 so2->obj : getDecodedObject(so2->obj);
4488 cmp = strcoll(dec1->ptr,dec2->ptr);
4489 if (dec1 != so1->obj) decrRefCount(dec1);
4490 if (dec2 != so2->obj) decrRefCount(dec2);
4491 }
ed9b544e 4492 }
4493 }
4494 return server.sort_desc ? -cmp : cmp;
4495}
4496
4497/* The SORT command is the most complex command in Redis. Warning: this code
4498 * is optimized for speed and a bit less for readability */
4499static void sortCommand(redisClient *c) {
ed9b544e 4500 list *operations;
4501 int outputlen = 0;
4502 int desc = 0, alpha = 0;
4503 int limit_start = 0, limit_count = -1, start, end;
4504 int j, dontsort = 0, vectorlen;
4505 int getop = 0; /* GET operation counter */
4506 robj *sortval, *sortby = NULL;
4507 redisSortObject *vector; /* Resulting vector to sort */
4508
4509 /* Lookup the key to sort. It must be of the right types */
3305306f 4510 sortval = lookupKeyRead(c->db,c->argv[1]);
4511 if (sortval == NULL) {
c937aa89 4512 addReply(c,shared.nokeyerr);
ed9b544e 4513 return;
4514 }
ed9b544e 4515 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
c937aa89 4516 addReply(c,shared.wrongtypeerr);
ed9b544e 4517 return;
4518 }
4519
4520 /* Create a list of operations to perform for every sorted element.
4521 * Operations can be GET/DEL/INCR/DECR */
4522 operations = listCreate();
092dac2a 4523 listSetFreeMethod(operations,zfree);
ed9b544e 4524 j = 2;
4525
4526 /* Now we need to protect sortval incrementing its count, in the future
4527 * SORT may have options able to overwrite/delete keys during the sorting
4528 * and the sorted key itself may get destroied */
4529 incrRefCount(sortval);
4530
4531 /* The SORT command has an SQL-alike syntax, parse it */
4532 while(j < c->argc) {
4533 int leftargs = c->argc-j-1;
4534 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
4535 desc = 0;
4536 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
4537 desc = 1;
4538 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
4539 alpha = 1;
4540 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
4541 limit_start = atoi(c->argv[j+1]->ptr);
4542 limit_count = atoi(c->argv[j+2]->ptr);
4543 j+=2;
4544 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
4545 sortby = c->argv[j+1];
4546 /* If the BY pattern does not contain '*', i.e. it is constant,
4547 * we don't need to sort nor to lookup the weight keys. */
4548 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
4549 j++;
4550 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4551 listAddNodeTail(operations,createSortOperation(
4552 REDIS_SORT_GET,c->argv[j+1]));
4553 getop++;
4554 j++;
4555 } else if (!strcasecmp(c->argv[j]->ptr,"del") && leftargs >= 1) {
4556 listAddNodeTail(operations,createSortOperation(
4557 REDIS_SORT_DEL,c->argv[j+1]));
4558 j++;
4559 } else if (!strcasecmp(c->argv[j]->ptr,"incr") && leftargs >= 1) {
4560 listAddNodeTail(operations,createSortOperation(
4561 REDIS_SORT_INCR,c->argv[j+1]));
4562 j++;
4563 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4564 listAddNodeTail(operations,createSortOperation(
4565 REDIS_SORT_DECR,c->argv[j+1]));
4566 j++;
4567 } else {
4568 decrRefCount(sortval);
4569 listRelease(operations);
c937aa89 4570 addReply(c,shared.syntaxerr);
ed9b544e 4571 return;
4572 }
4573 j++;
4574 }
4575
4576 /* Load the sorting vector with all the objects to sort */
4577 vectorlen = (sortval->type == REDIS_LIST) ?
4578 listLength((list*)sortval->ptr) :
3305306f 4579 dictSize((dict*)sortval->ptr);
ed9b544e 4580 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 4581 j = 0;
4582 if (sortval->type == REDIS_LIST) {
4583 list *list = sortval->ptr;
6208b3a7 4584 listNode *ln;
4585
4586 listRewind(list);
4587 while((ln = listYield(list))) {
ed9b544e 4588 robj *ele = ln->value;
4589 vector[j].obj = ele;
4590 vector[j].u.score = 0;
4591 vector[j].u.cmpobj = NULL;
ed9b544e 4592 j++;
4593 }
4594 } else {
4595 dict *set = sortval->ptr;
4596 dictIterator *di;
4597 dictEntry *setele;
4598
4599 di = dictGetIterator(set);
ed9b544e 4600 while((setele = dictNext(di)) != NULL) {
4601 vector[j].obj = dictGetEntryKey(setele);
4602 vector[j].u.score = 0;
4603 vector[j].u.cmpobj = NULL;
4604 j++;
4605 }
4606 dictReleaseIterator(di);
4607 }
4608 assert(j == vectorlen);
4609
4610 /* Now it's time to load the right scores in the sorting vector */
4611 if (dontsort == 0) {
4612 for (j = 0; j < vectorlen; j++) {
4613 if (sortby) {
4614 robj *byval;
4615
3305306f 4616 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
ed9b544e 4617 if (!byval || byval->type != REDIS_STRING) continue;
4618 if (alpha) {
942a3961 4619 if (byval->encoding == REDIS_ENCODING_RAW) {
4620 vector[j].u.cmpobj = byval;
4621 incrRefCount(byval);
4622 } else {
4623 vector[j].u.cmpobj = getDecodedObject(byval);
4624 }
ed9b544e 4625 } else {
942a3961 4626 if (byval->encoding == REDIS_ENCODING_RAW) {
4627 vector[j].u.score = strtod(byval->ptr,NULL);
4628 } else {
f1017b3f 4629 if (byval->encoding == REDIS_ENCODING_INT) {
942a3961 4630 vector[j].u.score = (long)byval->ptr;
f1017b3f 4631 } else
942a3961 4632 assert(1 != 1);
4633 }
ed9b544e 4634 }
4635 } else {
942a3961 4636 if (!alpha) {
4637 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
4638 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
4639 else {
4640 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
4641 vector[j].u.score = (long) vector[j].obj->ptr;
4642 else
4643 assert(1 != 1);
4644 }
4645 }
ed9b544e 4646 }
4647 }
4648 }
4649
4650 /* We are ready to sort the vector... perform a bit of sanity check
4651 * on the LIMIT option too. We'll use a partial version of quicksort. */
4652 start = (limit_start < 0) ? 0 : limit_start;
4653 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
4654 if (start >= vectorlen) {
4655 start = vectorlen-1;
4656 end = vectorlen-2;
4657 }
4658 if (end >= vectorlen) end = vectorlen-1;
4659
4660 if (dontsort == 0) {
4661 server.sort_desc = desc;
4662 server.sort_alpha = alpha;
4663 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 4664 if (sortby && (start != 0 || end != vectorlen-1))
4665 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
4666 else
4667 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 4668 }
4669
4670 /* Send command output to the output buffer, performing the specified
4671 * GET/DEL/INCR/DECR operations if any. */
4672 outputlen = getop ? getop*(end-start+1) : end-start+1;
c937aa89 4673 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
ed9b544e 4674 for (j = start; j <= end; j++) {
6208b3a7 4675 listNode *ln;
ed9b544e 4676 if (!getop) {
942a3961 4677 addReplyBulkLen(c,vector[j].obj);
ed9b544e 4678 addReply(c,vector[j].obj);
4679 addReply(c,shared.crlf);
4680 }
6208b3a7 4681 listRewind(operations);
4682 while((ln = listYield(operations))) {
ed9b544e 4683 redisSortOperation *sop = ln->value;
3305306f 4684 robj *val = lookupKeyByPattern(c->db,sop->pattern,
ed9b544e 4685 vector[j].obj);
4686
4687 if (sop->type == REDIS_SORT_GET) {
4688 if (!val || val->type != REDIS_STRING) {
9eb00f21 4689 addReply(c,shared.nullbulk);
ed9b544e 4690 } else {
942a3961 4691 addReplyBulkLen(c,val);
ed9b544e 4692 addReply(c,val);
4693 addReply(c,shared.crlf);
4694 }
4695 } else if (sop->type == REDIS_SORT_DEL) {
4696 /* TODO */
4697 }
ed9b544e 4698 }
4699 }
4700
4701 /* Cleanup */
4702 decrRefCount(sortval);
4703 listRelease(operations);
4704 for (j = 0; j < vectorlen; j++) {
4705 if (sortby && alpha && vector[j].u.cmpobj)
4706 decrRefCount(vector[j].u.cmpobj);
4707 }
4708 zfree(vector);
4709}
4710
4711static void infoCommand(redisClient *c) {
4712 sds info;
4713 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 4714 int j;
ed9b544e 4715
4716 info = sdscatprintf(sdsempty(),
4717 "redis_version:%s\r\n"
f1017b3f 4718 "arch_bits:%s\r\n"
a0f643ea 4719 "uptime_in_seconds:%d\r\n"
4720 "uptime_in_days:%d\r\n"
ed9b544e 4721 "connected_clients:%d\r\n"
4722 "connected_slaves:%d\r\n"
5fba9f71 4723 "used_memory:%zu\r\n"
ed9b544e 4724 "changes_since_last_save:%lld\r\n"
be2bb6b0 4725 "bgsave_in_progress:%d\r\n"
ed9b544e 4726 "last_save_time:%d\r\n"
4727 "total_connections_received:%lld\r\n"
4728 "total_commands_processed:%lld\r\n"
a0f643ea 4729 "role:%s\r\n"
ed9b544e 4730 ,REDIS_VERSION,
f1017b3f 4731 (sizeof(long) == 8) ? "64" : "32",
a0f643ea 4732 uptime,
4733 uptime/(3600*24),
ed9b544e 4734 listLength(server.clients)-listLength(server.slaves),
4735 listLength(server.slaves),
4736 server.usedmemory,
4737 server.dirty,
be2bb6b0 4738 server.bgsaveinprogress,
ed9b544e 4739 server.lastsave,
4740 server.stat_numconnections,
4741 server.stat_numcommands,
a0f643ea 4742 server.masterhost == NULL ? "master" : "slave"
ed9b544e 4743 );
a0f643ea 4744 if (server.masterhost) {
4745 info = sdscatprintf(info,
4746 "master_host:%s\r\n"
4747 "master_port:%d\r\n"
4748 "master_link_status:%s\r\n"
4749 "master_last_io_seconds_ago:%d\r\n"
4750 ,server.masterhost,
4751 server.masterport,
4752 (server.replstate == REDIS_REPL_CONNECTED) ?
4753 "up" : "down",
f72b934d 4754 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 4755 );
4756 }
c3cb078d 4757 for (j = 0; j < server.dbnum; j++) {
4758 long long keys, vkeys;
4759
4760 keys = dictSize(server.db[j].dict);
4761 vkeys = dictSize(server.db[j].expires);
4762 if (keys || vkeys) {
4763 info = sdscatprintf(info, "db%d: keys=%lld,expires=%lld\r\n",
4764 j, keys, vkeys);
4765 }
4766 }
c937aa89 4767 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
ed9b544e 4768 addReplySds(c,info);
70003d28 4769 addReply(c,shared.crlf);
ed9b544e 4770}
4771
3305306f 4772static void monitorCommand(redisClient *c) {
4773 /* ignore MONITOR if aleady slave or in monitor mode */
4774 if (c->flags & REDIS_SLAVE) return;
4775
4776 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
4777 c->slaveseldb = 0;
6b47e12e 4778 listAddNodeTail(server.monitors,c);
3305306f 4779 addReply(c,shared.ok);
4780}
4781
4782/* ================================= Expire ================================= */
4783static int removeExpire(redisDb *db, robj *key) {
4784 if (dictDelete(db->expires,key) == DICT_OK) {
4785 return 1;
4786 } else {
4787 return 0;
4788 }
4789}
4790
4791static int setExpire(redisDb *db, robj *key, time_t when) {
4792 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
4793 return 0;
4794 } else {
4795 incrRefCount(key);
4796 return 1;
4797 }
4798}
4799
bb32ede5 4800/* Return the expire time of the specified key, or -1 if no expire
4801 * is associated with this key (i.e. the key is non volatile) */
4802static time_t getExpire(redisDb *db, robj *key) {
4803 dictEntry *de;
4804
4805 /* No expire? return ASAP */
4806 if (dictSize(db->expires) == 0 ||
4807 (de = dictFind(db->expires,key)) == NULL) return -1;
4808
4809 return (time_t) dictGetEntryVal(de);
4810}
4811
3305306f 4812static int expireIfNeeded(redisDb *db, robj *key) {
4813 time_t when;
4814 dictEntry *de;
4815
4816 /* No expire? return ASAP */
4817 if (dictSize(db->expires) == 0 ||
4818 (de = dictFind(db->expires,key)) == NULL) return 0;
4819
4820 /* Lookup the expire */
4821 when = (time_t) dictGetEntryVal(de);
4822 if (time(NULL) <= when) return 0;
4823
4824 /* Delete the key */
4825 dictDelete(db->expires,key);
4826 return dictDelete(db->dict,key) == DICT_OK;
4827}
4828
4829static int deleteIfVolatile(redisDb *db, robj *key) {
4830 dictEntry *de;
4831
4832 /* No expire? return ASAP */
4833 if (dictSize(db->expires) == 0 ||
4834 (de = dictFind(db->expires,key)) == NULL) return 0;
4835
4836 /* Delete the key */
0c66a471 4837 server.dirty++;
3305306f 4838 dictDelete(db->expires,key);
4839 return dictDelete(db->dict,key) == DICT_OK;
4840}
4841
802e8373 4842static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
3305306f 4843 dictEntry *de;
3305306f 4844
802e8373 4845 de = dictFind(c->db->dict,key);
3305306f 4846 if (de == NULL) {
4847 addReply(c,shared.czero);
4848 return;
4849 }
43e5ccdf 4850 if (seconds < 0) {
4851 if (deleteKey(c->db,key)) server.dirty++;
4852 addReply(c, shared.cone);
3305306f 4853 return;
4854 } else {
4855 time_t when = time(NULL)+seconds;
802e8373 4856 if (setExpire(c->db,key,when)) {
3305306f 4857 addReply(c,shared.cone);
77423026 4858 server.dirty++;
4859 } else {
3305306f 4860 addReply(c,shared.czero);
77423026 4861 }
3305306f 4862 return;
4863 }
4864}
4865
802e8373 4866static void expireCommand(redisClient *c) {
4867 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
4868}
4869
4870static void expireatCommand(redisClient *c) {
4871 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
4872}
4873
fd88489a 4874static void ttlCommand(redisClient *c) {
4875 time_t expire;
4876 int ttl = -1;
4877
4878 expire = getExpire(c->db,c->argv[1]);
4879 if (expire != -1) {
4880 ttl = (int) (expire-time(NULL));
4881 if (ttl < 0) ttl = -1;
4882 }
4883 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
4884}
4885
f6b141c5 4886static void msetGenericCommand(redisClient *c, int nx) {
4887 int j;
4888
4889 if ((c->argc % 2) == 0) {
4890 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
4891 return;
4892 }
4893 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4894 * set nothing at all if at least one already key exists. */
4895 if (nx) {
4896 for (j = 1; j < c->argc; j += 2) {
4897 if (dictFind(c->db->dict,c->argv[j]) != NULL) {
4898 addReply(c, shared.czero);
4899 return;
4900 }
4901 }
4902 }
4903
4904 for (j = 1; j < c->argc; j += 2) {
2ed22c8b 4905 int retval;
4906
4907 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
4908 if (retval == DICT_ERR) {
4909 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
4910 incrRefCount(c->argv[j+1]);
4911 } else {
4912 incrRefCount(c->argv[j]);
4913 incrRefCount(c->argv[j+1]);
4914 }
f6b141c5 4915 removeExpire(c->db,c->argv[j]);
4916 }
4917 server.dirty += (c->argc-1)/2;
4918 addReply(c, nx ? shared.cone : shared.ok);
4919}
4920
4921static void msetCommand(redisClient *c) {
4922 msetGenericCommand(c,0);
4923}
4924
4925static void msetnxCommand(redisClient *c) {
4926 msetGenericCommand(c,1);
4927}
4928
ed9b544e 4929/* =============================== Replication ============================= */
4930
a4d1ba9a 4931static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 4932 ssize_t nwritten, ret = size;
4933 time_t start = time(NULL);
4934
4935 timeout++;
4936 while(size) {
4937 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
4938 nwritten = write(fd,ptr,size);
4939 if (nwritten == -1) return -1;
4940 ptr += nwritten;
4941 size -= nwritten;
4942 }
4943 if ((time(NULL)-start) > timeout) {
4944 errno = ETIMEDOUT;
4945 return -1;
4946 }
4947 }
4948 return ret;
4949}
4950
a4d1ba9a 4951static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 4952 ssize_t nread, totread = 0;
4953 time_t start = time(NULL);
4954
4955 timeout++;
4956 while(size) {
4957 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
4958 nread = read(fd,ptr,size);
4959 if (nread == -1) return -1;
4960 ptr += nread;
4961 size -= nread;
4962 totread += nread;
4963 }
4964 if ((time(NULL)-start) > timeout) {
4965 errno = ETIMEDOUT;
4966 return -1;
4967 }
4968 }
4969 return totread;
4970}
4971
4972static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
4973 ssize_t nread = 0;
4974
4975 size--;
4976 while(size) {
4977 char c;
4978
4979 if (syncRead(fd,&c,1,timeout) == -1) return -1;
4980 if (c == '\n') {
4981 *ptr = '\0';
4982 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
4983 return nread;
4984 } else {
4985 *ptr++ = c;
4986 *ptr = '\0';
4987 nread++;
4988 }
4989 }
4990 return nread;
4991}
4992
4993static void syncCommand(redisClient *c) {
40d224a9 4994 /* ignore SYNC if aleady slave or in monitor mode */
4995 if (c->flags & REDIS_SLAVE) return;
4996
4997 /* SYNC can't be issued when the server has pending data to send to
4998 * the client about already issued commands. We need a fresh reply
4999 * buffer registering the differences between the BGSAVE and the current
5000 * dataset, so that we can copy to other slaves if needed. */
5001 if (listLength(c->reply) != 0) {
5002 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
5003 return;
5004 }
5005
5006 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
5007 /* Here we need to check if there is a background saving operation
5008 * in progress, or if it is required to start one */
5009 if (server.bgsaveinprogress) {
5010 /* Ok a background save is in progress. Let's check if it is a good
5011 * one for replication, i.e. if there is another slave that is
5012 * registering differences since the server forked to save */
5013 redisClient *slave;
5014 listNode *ln;
5015
6208b3a7 5016 listRewind(server.slaves);
5017 while((ln = listYield(server.slaves))) {
40d224a9 5018 slave = ln->value;
5019 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 5020 }
5021 if (ln) {
5022 /* Perfect, the server is already registering differences for
5023 * another slave. Set the right state, and copy the buffer. */
5024 listRelease(c->reply);
5025 c->reply = listDup(slave->reply);
40d224a9 5026 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5027 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
5028 } else {
5029 /* No way, we need to wait for the next BGSAVE in order to
5030 * register differences */
5031 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5032 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5033 }
5034 } else {
5035 /* Ok we don't have a BGSAVE in progress, let's start one */
5036 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
5037 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5038 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
5039 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
5040 return;
5041 }
5042 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5043 }
6208b3a7 5044 c->repldbfd = -1;
40d224a9 5045 c->flags |= REDIS_SLAVE;
5046 c->slaveseldb = 0;
6b47e12e 5047 listAddNodeTail(server.slaves,c);
40d224a9 5048 return;
5049}
5050
6208b3a7 5051static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
5052 redisClient *slave = privdata;
5053 REDIS_NOTUSED(el);
5054 REDIS_NOTUSED(mask);
5055 char buf[REDIS_IOBUF_LEN];
5056 ssize_t nwritten, buflen;
5057
5058 if (slave->repldboff == 0) {
5059 /* Write the bulk write count before to transfer the DB. In theory here
5060 * we don't know how much room there is in the output buffer of the
5061 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
5062 * operations) will never be smaller than the few bytes we need. */
5063 sds bulkcount;
5064
5065 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
5066 slave->repldbsize);
5067 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
5068 {
5069 sdsfree(bulkcount);
5070 freeClient(slave);
5071 return;
5072 }
5073 sdsfree(bulkcount);
5074 }
5075 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
5076 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
5077 if (buflen <= 0) {
5078 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
5079 (buflen == 0) ? "premature EOF" : strerror(errno));
5080 freeClient(slave);
5081 return;
5082 }
5083 if ((nwritten = write(fd,buf,buflen)) == -1) {
5084 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
5085 strerror(errno));
5086 freeClient(slave);
5087 return;
5088 }
5089 slave->repldboff += nwritten;
5090 if (slave->repldboff == slave->repldbsize) {
5091 close(slave->repldbfd);
5092 slave->repldbfd = -1;
5093 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5094 slave->replstate = REDIS_REPL_ONLINE;
5095 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
5096 sendReplyToClient, slave, NULL) == AE_ERR) {
5097 freeClient(slave);
5098 return;
5099 }
5100 addReplySds(slave,sdsempty());
5101 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
5102 }
5103}
ed9b544e 5104
a3b21203 5105/* This function is called at the end of every backgrond saving.
5106 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
5107 * otherwise REDIS_ERR is passed to the function.
5108 *
5109 * The goal of this function is to handle slaves waiting for a successful
5110 * background saving in order to perform non-blocking synchronization. */
5111static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 5112 listNode *ln;
5113 int startbgsave = 0;
ed9b544e 5114
6208b3a7 5115 listRewind(server.slaves);
5116 while((ln = listYield(server.slaves))) {
5117 redisClient *slave = ln->value;
ed9b544e 5118
6208b3a7 5119 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
5120 startbgsave = 1;
5121 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5122 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 5123 struct redis_stat buf;
6208b3a7 5124
5125 if (bgsaveerr != REDIS_OK) {
5126 freeClient(slave);
5127 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
5128 continue;
5129 }
5130 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 5131 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 5132 freeClient(slave);
5133 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
5134 continue;
5135 }
5136 slave->repldboff = 0;
5137 slave->repldbsize = buf.st_size;
5138 slave->replstate = REDIS_REPL_SEND_BULK;
5139 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5140 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave, NULL) == AE_ERR) {
5141 freeClient(slave);
5142 continue;
5143 }
5144 }
ed9b544e 5145 }
6208b3a7 5146 if (startbgsave) {
5147 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5148 listRewind(server.slaves);
5149 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
5150 while((ln = listYield(server.slaves))) {
5151 redisClient *slave = ln->value;
ed9b544e 5152
6208b3a7 5153 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
5154 freeClient(slave);
5155 }
5156 }
5157 }
ed9b544e 5158}
5159
5160static int syncWithMaster(void) {
5161 char buf[1024], tmpfile[256];
5162 int dumpsize;
5163 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
5164 int dfd;
5165
5166 if (fd == -1) {
5167 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
5168 strerror(errno));
5169 return REDIS_ERR;
5170 }
5171 /* Issue the SYNC command */
5172 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
5173 close(fd);
5174 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
5175 strerror(errno));
5176 return REDIS_ERR;
5177 }
5178 /* Read the bulk write count */
8c4d91fc 5179 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 5180 close(fd);
5181 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
5182 strerror(errno));
5183 return REDIS_ERR;
5184 }
4aa701c1 5185 if (buf[0] != '$') {
5186 close(fd);
5187 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
5188 return REDIS_ERR;
5189 }
c937aa89 5190 dumpsize = atoi(buf+1);
ed9b544e 5191 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
5192 /* Read the bulk write data on a temp file */
5193 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
5194 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
5195 if (dfd == -1) {
5196 close(fd);
5197 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
5198 return REDIS_ERR;
5199 }
5200 while(dumpsize) {
5201 int nread, nwritten;
5202
5203 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
5204 if (nread == -1) {
5205 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
5206 strerror(errno));
5207 close(fd);
5208 close(dfd);
5209 return REDIS_ERR;
5210 }
5211 nwritten = write(dfd,buf,nread);
5212 if (nwritten == -1) {
5213 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
5214 close(fd);
5215 close(dfd);
5216 return REDIS_ERR;
5217 }
5218 dumpsize -= nread;
5219 }
5220 close(dfd);
5221 if (rename(tmpfile,server.dbfilename) == -1) {
5222 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
5223 unlink(tmpfile);
5224 close(fd);
5225 return REDIS_ERR;
5226 }
5227 emptyDb();
f78fd11b 5228 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 5229 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
5230 close(fd);
5231 return REDIS_ERR;
5232 }
5233 server.master = createClient(fd);
5234 server.master->flags |= REDIS_MASTER;
5235 server.replstate = REDIS_REPL_CONNECTED;
5236 return REDIS_OK;
5237}
5238
321b0e13 5239static void slaveofCommand(redisClient *c) {
5240 if (!strcasecmp(c->argv[1]->ptr,"no") &&
5241 !strcasecmp(c->argv[2]->ptr,"one")) {
5242 if (server.masterhost) {
5243 sdsfree(server.masterhost);
5244 server.masterhost = NULL;
5245 if (server.master) freeClient(server.master);
5246 server.replstate = REDIS_REPL_NONE;
5247 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
5248 }
5249 } else {
5250 sdsfree(server.masterhost);
5251 server.masterhost = sdsdup(c->argv[1]->ptr);
5252 server.masterport = atoi(c->argv[2]->ptr);
5253 if (server.master) freeClient(server.master);
5254 server.replstate = REDIS_REPL_CONNECT;
5255 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
5256 server.masterhost, server.masterport);
5257 }
5258 addReply(c,shared.ok);
5259}
5260
3fd78bcd 5261/* ============================ Maxmemory directive ======================== */
5262
5263/* This function gets called when 'maxmemory' is set on the config file to limit
5264 * the max memory used by the server, and we are out of memory.
5265 * This function will try to, in order:
5266 *
5267 * - Free objects from the free list
5268 * - Try to remove keys with an EXPIRE set
5269 *
5270 * It is not possible to free enough memory to reach used-memory < maxmemory
5271 * the server will start refusing commands that will enlarge even more the
5272 * memory usage.
5273 */
5274static void freeMemoryIfNeeded(void) {
5275 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
5276 if (listLength(server.objfreelist)) {
5277 robj *o;
5278
5279 listNode *head = listFirst(server.objfreelist);
5280 o = listNodeValue(head);
5281 listDelNode(server.objfreelist,head);
5282 zfree(o);
5283 } else {
5284 int j, k, freed = 0;
5285
5286 for (j = 0; j < server.dbnum; j++) {
5287 int minttl = -1;
5288 robj *minkey = NULL;
5289 struct dictEntry *de;
5290
5291 if (dictSize(server.db[j].expires)) {
5292 freed = 1;
5293 /* From a sample of three keys drop the one nearest to
5294 * the natural expire */
5295 for (k = 0; k < 3; k++) {
5296 time_t t;
5297
5298 de = dictGetRandomKey(server.db[j].expires);
5299 t = (time_t) dictGetEntryVal(de);
5300 if (minttl == -1 || t < minttl) {
5301 minkey = dictGetEntryKey(de);
5302 minttl = t;
5303 }
5304 }
5305 deleteKey(server.db+j,minkey);
5306 }
5307 }
5308 if (!freed) return; /* nothing to free... */
5309 }
5310 }
5311}
5312
7f957c92 5313/* ================================= Debugging ============================== */
5314
5315static void debugCommand(redisClient *c) {
5316 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
5317 *((char*)-1) = 'x';
333298da 5318 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
5319 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
5320 robj *key, *val;
5321
5322 if (!de) {
5323 addReply(c,shared.nokeyerr);
5324 return;
5325 }
5326 key = dictGetEntryKey(de);
5327 val = dictGetEntryVal(de);
5328 addReplySds(c,sdscatprintf(sdsempty(),
942a3961 5329 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
5330 key, key->refcount, val, val->refcount, val->encoding));
7f957c92 5331 } else {
333298da 5332 addReplySds(c,sdsnew(
5333 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>]\r\n"));
7f957c92 5334 }
5335}
56906eef 5336
d76412d1 5337#ifdef HAVE_BACKTRACE
56906eef 5338static struct redisFunctionSym symsTable[] = {
724a51b1 5339{"compareStringObjects", (unsigned long)compareStringObjects},
5340{"isStringRepresentableAsLong", (unsigned long)isStringRepresentableAsLong},
942a3961 5341{"dictEncObjKeyCompare", (unsigned long)dictEncObjKeyCompare},
5342{"dictEncObjHash", (unsigned long)dictEncObjHash},
5343{"incrDecrCommand", (unsigned long)incrDecrCommand},
56906eef 5344{"freeStringObject", (unsigned long)freeStringObject},
5345{"freeListObject", (unsigned long)freeListObject},
5346{"freeSetObject", (unsigned long)freeSetObject},
5347{"decrRefCount", (unsigned long)decrRefCount},
5348{"createObject", (unsigned long)createObject},
5349{"freeClient", (unsigned long)freeClient},
5350{"rdbLoad", (unsigned long)rdbLoad},
942a3961 5351{"rdbSaveStringObject", (unsigned long)rdbSaveStringObject},
5352{"rdbSaveStringObjectRaw", (unsigned long)rdbSaveStringObjectRaw},
56906eef 5353{"addReply", (unsigned long)addReply},
5354{"addReplySds", (unsigned long)addReplySds},
5355{"incrRefCount", (unsigned long)incrRefCount},
5356{"rdbSaveBackground", (unsigned long)rdbSaveBackground},
5357{"createStringObject", (unsigned long)createStringObject},
5358{"replicationFeedSlaves", (unsigned long)replicationFeedSlaves},
5359{"syncWithMaster", (unsigned long)syncWithMaster},
5360{"tryObjectSharing", (unsigned long)tryObjectSharing},
942a3961 5361{"tryObjectEncoding", (unsigned long)tryObjectEncoding},
5362{"getDecodedObject", (unsigned long)getDecodedObject},
56906eef 5363{"removeExpire", (unsigned long)removeExpire},
5364{"expireIfNeeded", (unsigned long)expireIfNeeded},
5365{"deleteIfVolatile", (unsigned long)deleteIfVolatile},
5366{"deleteKey", (unsigned long)deleteKey},
5367{"getExpire", (unsigned long)getExpire},
5368{"setExpire", (unsigned long)setExpire},
a3b21203 5369{"updateSlavesWaitingBgsave", (unsigned long)updateSlavesWaitingBgsave},
56906eef 5370{"freeMemoryIfNeeded", (unsigned long)freeMemoryIfNeeded},
5371{"authCommand", (unsigned long)authCommand},
5372{"pingCommand", (unsigned long)pingCommand},
5373{"echoCommand", (unsigned long)echoCommand},
5374{"setCommand", (unsigned long)setCommand},
5375{"setnxCommand", (unsigned long)setnxCommand},
5376{"getCommand", (unsigned long)getCommand},
5377{"delCommand", (unsigned long)delCommand},
5378{"existsCommand", (unsigned long)existsCommand},
5379{"incrCommand", (unsigned long)incrCommand},
5380{"decrCommand", (unsigned long)decrCommand},
5381{"incrbyCommand", (unsigned long)incrbyCommand},
5382{"decrbyCommand", (unsigned long)decrbyCommand},
5383{"selectCommand", (unsigned long)selectCommand},
5384{"randomkeyCommand", (unsigned long)randomkeyCommand},
5385{"keysCommand", (unsigned long)keysCommand},
5386{"dbsizeCommand", (unsigned long)dbsizeCommand},
5387{"lastsaveCommand", (unsigned long)lastsaveCommand},
5388{"saveCommand", (unsigned long)saveCommand},
5389{"bgsaveCommand", (unsigned long)bgsaveCommand},
5390{"shutdownCommand", (unsigned long)shutdownCommand},
5391{"moveCommand", (unsigned long)moveCommand},
5392{"renameCommand", (unsigned long)renameCommand},
5393{"renamenxCommand", (unsigned long)renamenxCommand},
5394{"lpushCommand", (unsigned long)lpushCommand},
5395{"rpushCommand", (unsigned long)rpushCommand},
5396{"lpopCommand", (unsigned long)lpopCommand},
5397{"rpopCommand", (unsigned long)rpopCommand},
5398{"llenCommand", (unsigned long)llenCommand},
5399{"lindexCommand", (unsigned long)lindexCommand},
5400{"lrangeCommand", (unsigned long)lrangeCommand},
5401{"ltrimCommand", (unsigned long)ltrimCommand},
5402{"typeCommand", (unsigned long)typeCommand},
5403{"lsetCommand", (unsigned long)lsetCommand},
5404{"saddCommand", (unsigned long)saddCommand},
5405{"sremCommand", (unsigned long)sremCommand},
5406{"smoveCommand", (unsigned long)smoveCommand},
5407{"sismemberCommand", (unsigned long)sismemberCommand},
5408{"scardCommand", (unsigned long)scardCommand},
12fea928 5409{"spopCommand", (unsigned long)spopCommand},
2abb95a9 5410{"srandmemberCommand", (unsigned long)srandmemberCommand},
56906eef 5411{"sinterCommand", (unsigned long)sinterCommand},
5412{"sinterstoreCommand", (unsigned long)sinterstoreCommand},
5413{"sunionCommand", (unsigned long)sunionCommand},
5414{"sunionstoreCommand", (unsigned long)sunionstoreCommand},
5415{"sdiffCommand", (unsigned long)sdiffCommand},
5416{"sdiffstoreCommand", (unsigned long)sdiffstoreCommand},
5417{"syncCommand", (unsigned long)syncCommand},
5418{"flushdbCommand", (unsigned long)flushdbCommand},
5419{"flushallCommand", (unsigned long)flushallCommand},
5420{"sortCommand", (unsigned long)sortCommand},
5421{"lremCommand", (unsigned long)lremCommand},
5422{"infoCommand", (unsigned long)infoCommand},
5423{"mgetCommand", (unsigned long)mgetCommand},
5424{"monitorCommand", (unsigned long)monitorCommand},
5425{"expireCommand", (unsigned long)expireCommand},
802e8373 5426{"expireatCommand", (unsigned long)expireatCommand},
f6b141c5 5427{"getsetCommand", (unsigned long)getsetCommand},
56906eef 5428{"ttlCommand", (unsigned long)ttlCommand},
5429{"slaveofCommand", (unsigned long)slaveofCommand},
5430{"debugCommand", (unsigned long)debugCommand},
5431{"processCommand", (unsigned long)processCommand},
5432{"setupSigSegvAction", (unsigned long)setupSigSegvAction},
56906eef 5433{"readQueryFromClient", (unsigned long)readQueryFromClient},
a3b21203 5434{"rdbRemoveTempFile", (unsigned long)rdbRemoveTempFile},
f6b141c5 5435{"msetGenericCommand", (unsigned long)msetGenericCommand},
5436{"msetCommand", (unsigned long)msetCommand},
5437{"msetnxCommand", (unsigned long)msetnxCommand},
ace4ee54 5438{"zslCreateNode", (unsigned long)zslCreateNode},
5439{"zslCreate", (unsigned long)zslCreate},
5440{"zslFreeNode",(unsigned long)zslFreeNode},
5441{"zslFree",(unsigned long)zslFree},
5442{"zslRandomLevel",(unsigned long)zslRandomLevel},
5443{"zslInsert",(unsigned long)zslInsert},
5444{"zslDelete",(unsigned long)zslDelete},
5445{"createZsetObject",(unsigned long)createZsetObject},
5446{"zaddCommand",(unsigned long)zaddCommand},
e3870fab 5447{"zrangeGenericCommand",(unsigned long)zrangeGenericCommand},
cc812361 5448{"zrangeCommand",(unsigned long)zrangeCommand},
e3870fab 5449{"zrevrangeCommand",(unsigned long)zrevrangeCommand},
1b7106e7 5450{"zremCommand",(unsigned long)zremCommand},
2b59cfdf 5451{"rdbSaveDoubleValue",(unsigned long)rdbSaveDoubleValue},
5452{"rdbLoadDoubleValue",(unsigned long)rdbLoadDoubleValue},
56906eef 5453{NULL,0}
5454};
5455
5456/* This function try to convert a pointer into a function name. It's used in
5457 * oreder to provide a backtrace under segmentation fault that's able to
5458 * display functions declared as static (otherwise the backtrace is useless). */
5459static char *findFuncName(void *pointer, unsigned long *offset){
5460 int i, ret = -1;
5461 unsigned long off, minoff = 0;
5462
5463 /* Try to match against the Symbol with the smallest offset */
5464 for (i=0; symsTable[i].pointer; i++) {
5465 unsigned long lp = (unsigned long) pointer;
5466
5467 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
5468 off=lp-symsTable[i].pointer;
5469 if (ret < 0 || off < minoff) {
5470 minoff=off;
5471 ret=i;
5472 }
5473 }
5474 }
5475 if (ret == -1) return NULL;
5476 *offset = minoff;
5477 return symsTable[ret].name;
5478}
5479
5480static void *getMcontextEip(ucontext_t *uc) {
5481#if defined(__FreeBSD__)
5482 return (void*) uc->uc_mcontext.mc_eip;
5483#elif defined(__dietlibc__)
5484 return (void*) uc->uc_mcontext.eip;
06db1f50 5485#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
56906eef 5486 return (void*) uc->uc_mcontext->__ss.__eip;
06db1f50 5487#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 5488 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 5489 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 5490 #else
5491 return (void*) uc->uc_mcontext->__ss.__eip;
5492 #endif
b91cf5ef 5493#elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
56906eef 5494 return (void*) uc->uc_mcontext.gregs[REG_EIP];
b91cf5ef 5495#elif defined(__ia64__) /* Linux IA64 */
5496 return (void*) uc->uc_mcontext.sc_ip;
5497#else
5498 return NULL;
56906eef 5499#endif
5500}
5501
5502static void segvHandler(int sig, siginfo_t *info, void *secret) {
5503 void *trace[100];
5504 char **messages = NULL;
5505 int i, trace_size = 0;
5506 unsigned long offset=0;
5507 time_t uptime = time(NULL)-server.stat_starttime;
5508 ucontext_t *uc = (ucontext_t*) secret;
5509 REDIS_NOTUSED(info);
5510
5511 redisLog(REDIS_WARNING,
5512 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
5513 redisLog(REDIS_WARNING, "%s", sdscatprintf(sdsempty(),
433cc893 5514 "redis_version:%s; "
56906eef 5515 "uptime_in_seconds:%d; "
433cc893 5516 "connected_clients:%d; "
5517 "connected_slaves:%d; "
5518 "used_memory:%zu; "
5519 "changes_since_last_save:%lld; "
5520 "bgsave_in_progress:%d; "
5521 "last_save_time:%d; "
5522 "total_connections_received:%lld; "
5523 "total_commands_processed:%lld; "
5524 "role:%s;"
5525 ,REDIS_VERSION,
5526 uptime,
5527 listLength(server.clients)-listLength(server.slaves),
5528 listLength(server.slaves),
5529 server.usedmemory,
5530 server.dirty,
5531 server.bgsaveinprogress,
5532 server.lastsave,
5533 server.stat_numconnections,
5534 server.stat_numcommands,
5535 server.masterhost == NULL ? "master" : "slave"
5536 ));
56906eef 5537
5538 trace_size = backtrace(trace, 100);
de96dbfe 5539 /* overwrite sigaction with caller's address */
b91cf5ef 5540 if (getMcontextEip(uc) != NULL) {
5541 trace[1] = getMcontextEip(uc);
5542 }
56906eef 5543 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 5544
d76412d1 5545 for (i=1; i<trace_size; ++i) {
56906eef 5546 char *fn = findFuncName(trace[i], &offset), *p;
5547
5548 p = strchr(messages[i],'+');
5549 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
5550 redisLog(REDIS_WARNING,"%s", messages[i]);
5551 } else {
5552 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
5553 }
5554 }
5555 free(messages);
5556 exit(0);
fe3bbfbe 5557}
56906eef 5558
5559static void setupSigSegvAction(void) {
5560 struct sigaction act;
5561
5562 sigemptyset (&act.sa_mask);
5563 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
5564 * is used. Otherwise, sa_handler is used */
5565 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
5566 act.sa_sigaction = segvHandler;
5567 sigaction (SIGSEGV, &act, NULL);
5568 sigaction (SIGBUS, &act, NULL);
12fea928 5569 sigaction (SIGFPE, &act, NULL);
5570 sigaction (SIGILL, &act, NULL);
5571 sigaction (SIGBUS, &act, NULL);
e65fdc78 5572 return;
56906eef 5573}
d76412d1 5574#else /* HAVE_BACKTRACE */
5575static void setupSigSegvAction(void) {
5576}
5577#endif /* HAVE_BACKTRACE */
e65fdc78 5578
ed9b544e 5579/* =================================== Main! ================================ */
5580
0bc03378 5581#ifdef __linux__
5582int linuxOvercommitMemoryValue(void) {
5583 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
5584 char buf[64];
5585
5586 if (!fp) return -1;
5587 if (fgets(buf,64,fp) == NULL) {
5588 fclose(fp);
5589 return -1;
5590 }
5591 fclose(fp);
5592
5593 return atoi(buf);
5594}
5595
5596void linuxOvercommitMemoryWarning(void) {
5597 if (linuxOvercommitMemoryValue() == 0) {
b91cf5ef 5598 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
0bc03378 5599 }
5600}
5601#endif /* __linux__ */
5602
ed9b544e 5603static void daemonize(void) {
5604 int fd;
5605 FILE *fp;
5606
5607 if (fork() != 0) exit(0); /* parent exits */
5608 setsid(); /* create a new session */
5609
5610 /* Every output goes to /dev/null. If Redis is daemonized but
5611 * the 'logfile' is set to 'stdout' in the configuration file
5612 * it will not log at all. */
5613 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
5614 dup2(fd, STDIN_FILENO);
5615 dup2(fd, STDOUT_FILENO);
5616 dup2(fd, STDERR_FILENO);
5617 if (fd > STDERR_FILENO) close(fd);
5618 }
5619 /* Try to write the pid file */
ed329fcf 5620 fp = fopen(server.pidfile,"w");
ed9b544e 5621 if (fp) {
5622 fprintf(fp,"%d\n",getpid());
5623 fclose(fp);
5624 }
5625}
5626
5627int main(int argc, char **argv) {
5628 initServerConfig();
5629 if (argc == 2) {
5630 ResetServerSaveParams();
5631 loadServerConfig(argv[1]);
5632 } else if (argc > 2) {
5633 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
5634 exit(1);
8cca9b82 5635 } else {
5636 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
ed9b544e 5637 }
5638 initServer();
5639 if (server.daemonize) daemonize();
5640 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
b91cf5ef 5641#ifdef __linux__
5642 linuxOvercommitMemoryWarning();
5643#endif
f78fd11b 5644 if (rdbLoad(server.dbfilename) == REDIS_OK)
ed9b544e 5645 redisLog(REDIS_NOTICE,"DB loaded from disk");
5646 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
5647 acceptHandler, NULL, NULL) == AE_ERR) oom("creating file event");
46713f83 5648 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
ed9b544e 5649 aeMain(server.el);
5650 aeDeleteEventLoop(server.el);
5651 return 0;
5652}