]> git.saurik.com Git - redis.git/blame - redis.c
Fixed compilation on Linux
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
1812e024 30#define REDIS_VERSION "1.050"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
c9468bcf 40#define __USE_POSIX199309
ed9b544e 41#include <signal.h>
fbf9bcdb 42
43#ifdef HAVE_BACKTRACE
c9468bcf 44#include <execinfo.h>
45#include <ucontext.h>
fbf9bcdb 46#endif /* HAVE_BACKTRACE */
47
ed9b544e 48#include <sys/wait.h>
49#include <errno.h>
50#include <assert.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <inttypes.h>
54#include <arpa/inet.h>
55#include <sys/stat.h>
56#include <fcntl.h>
57#include <sys/time.h>
58#include <sys/resource.h>
f78fd11b 59#include <limits.h>
a7866db6 60#include <math.h>
0bc1b2f6 61
62#if defined(__sun)
5043dff3 63#include "solarisfixes.h"
64#endif
ed9b544e 65
c9468bcf 66#include "redis.h"
ed9b544e 67#include "ae.h" /* Event driven programming library */
68#include "sds.h" /* Dynamic safe strings */
69#include "anet.h" /* Networking the easy way */
70#include "dict.h" /* Hash tables */
71#include "adlist.h" /* Linked lists */
72#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 73#include "lzf.h" /* LZF compression library */
74#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ed9b544e 75
76/* Error codes */
77#define REDIS_OK 0
78#define REDIS_ERR -1
79
80/* Static server configuration */
81#define REDIS_SERVERPORT 6379 /* TCP port */
82#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 83#define REDIS_IOBUF_LEN 1024
ed9b544e 84#define REDIS_LOADBUF_LEN 1024
93ea3759 85#define REDIS_STATIC_ARGS 4
ed9b544e 86#define REDIS_DEFAULT_DBNUM 16
87#define REDIS_CONFIGLINE_MAX 1024
88#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
89#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94754ccc 90#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
6f376729 91#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
cd194638 92#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
ed9b544e 93
94/* Hash table parameters */
95#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 96
97/* Command flags */
3fd78bcd 98#define REDIS_CMD_BULK 1 /* Bulk write command */
99#define REDIS_CMD_INLINE 2 /* Inline command */
100/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
101 this flags will return an error when the 'maxmemory' option is set in the
102 config file and the server is using more than maxmemory bytes of memory.
103 In short this commands are denied on low memory conditions. */
104#define REDIS_CMD_DENYOOM 4
ed9b544e 105
106/* Object types */
107#define REDIS_STRING 0
108#define REDIS_LIST 1
109#define REDIS_SET 2
1812e024 110#define REDIS_ZSET 3
111#define REDIS_HASH 4
f78fd11b 112
942a3961 113/* Objects encoding */
114#define REDIS_ENCODING_RAW 0 /* Raw representation */
115#define REDIS_ENCODING_INT 1 /* Encoded as integer */
116
f78fd11b 117/* Object types only used for dumping to disk */
bb32ede5 118#define REDIS_EXPIRETIME 253
ed9b544e 119#define REDIS_SELECTDB 254
120#define REDIS_EOF 255
121
f78fd11b 122/* Defines related to the dump file format. To store 32 bits lengths for short
123 * keys requires a lot of space, so we check the most significant 2 bits of
124 * the first byte to interpreter the length:
125 *
126 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
127 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
128 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 129 * 11|000000 this means: specially encoded object will follow. The six bits
130 * number specify the kind of object that follows.
131 * See the REDIS_RDB_ENC_* defines.
f78fd11b 132 *
10c43610 133 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
134 * values, will fit inside. */
f78fd11b 135#define REDIS_RDB_6BITLEN 0
136#define REDIS_RDB_14BITLEN 1
137#define REDIS_RDB_32BITLEN 2
17be1a4a 138#define REDIS_RDB_ENCVAL 3
f78fd11b 139#define REDIS_RDB_LENERR UINT_MAX
140
a4d1ba9a 141/* When a length of a string object stored on disk has the first two bits
142 * set, the remaining two bits specify a special encoding for the object
143 * accordingly to the following defines: */
144#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
145#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
146#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 147#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 148
ed9b544e 149/* Client flags */
150#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
151#define REDIS_SLAVE 2 /* This client is a slave server */
152#define REDIS_MASTER 4 /* This client is a master server */
87eca727 153#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
ed9b544e 154
40d224a9 155/* Slave replication state - slave side */
ed9b544e 156#define REDIS_REPL_NONE 0 /* No active replication */
157#define REDIS_REPL_CONNECT 1 /* Must connect to master */
158#define REDIS_REPL_CONNECTED 2 /* Connected to master */
159
40d224a9 160/* Slave replication state - from the point of view of master
161 * Note that in SEND_BULK and ONLINE state the slave receives new updates
162 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
163 * to start the next background saving in order to send updates to it. */
164#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
165#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
166#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
167#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
168
ed9b544e 169/* List related stuff */
170#define REDIS_HEAD 0
171#define REDIS_TAIL 1
172
173/* Sort operations */
174#define REDIS_SORT_GET 0
175#define REDIS_SORT_DEL 1
176#define REDIS_SORT_INCR 2
177#define REDIS_SORT_DECR 3
178#define REDIS_SORT_ASC 4
179#define REDIS_SORT_DESC 5
180#define REDIS_SORTKEY_MAX 1024
181
182/* Log levels */
183#define REDIS_DEBUG 0
184#define REDIS_NOTICE 1
185#define REDIS_WARNING 2
186
187/* Anti-warning macro... */
188#define REDIS_NOTUSED(V) ((void) V)
189
6b47e12e 190#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
191#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 192
48f0308a 193/* Append only defines */
194#define APPENDFSYNC_NO 0
195#define APPENDFSYNC_ALWAYS 1
196#define APPENDFSYNC_EVERYSEC 2
197
ed9b544e 198/*================================= Data types ============================== */
199
200/* A redis object, that is a type able to hold a string / list / set */
201typedef struct redisObject {
ed9b544e 202 void *ptr;
942a3961 203 unsigned char type;
204 unsigned char encoding;
205 unsigned char notused[2];
ed9b544e 206 int refcount;
207} robj;
208
3305306f 209typedef struct redisDb {
210 dict *dict;
211 dict *expires;
212 int id;
213} redisDb;
214
ed9b544e 215/* With multiplexing we need to take per-clinet state.
216 * Clients are taken in a liked list. */
217typedef struct redisClient {
218 int fd;
3305306f 219 redisDb *db;
ed9b544e 220 int dictid;
221 sds querybuf;
e8a74421 222 robj **argv, **mbargv;
223 int argc, mbargc;
40d224a9 224 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 225 int multibulk; /* multi bulk command format active */
ed9b544e 226 list *reply;
227 int sentlen;
228 time_t lastinteraction; /* time of the last interaction, used for timeout */
40d224a9 229 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
230 int slaveseldb; /* slave selected db, if this client is a slave */
231 int authenticated; /* when requirepass is non-NULL */
232 int replstate; /* replication state if this is a slave */
233 int repldbfd; /* replication DB file descriptor */
6208b3a7 234 long repldboff; /* replication DB file offset */
40d224a9 235 off_t repldbsize; /* replication DB file size */
ed9b544e 236} redisClient;
237
238struct saveparam {
239 time_t seconds;
240 int changes;
241};
242
243/* Global server state structure */
244struct redisServer {
245 int port;
246 int fd;
3305306f 247 redisDb *db;
10c43610 248 dict *sharingpool;
249 unsigned int sharingpoolsize;
ed9b544e 250 long long dirty; /* changes to DB from the last save */
251 list *clients;
87eca727 252 list *slaves, *monitors;
ed9b544e 253 char neterr[ANET_ERR_LEN];
254 aeEventLoop *el;
255 int cronloops; /* number of times the cron function run */
256 list *objfreelist; /* A list of freed objects to avoid malloc() */
257 time_t lastsave; /* Unix time of last save succeeede */
5fba9f71 258 size_t usedmemory; /* Used memory in megabytes */
ed9b544e 259 /* Fields used only for stats */
260 time_t stat_starttime; /* server start time */
261 long long stat_numcommands; /* number of processed commands */
262 long long stat_numconnections; /* number of connections received */
263 /* Configuration */
264 int verbosity;
265 int glueoutputbuf;
266 int maxidletime;
267 int dbnum;
268 int daemonize;
44b38ef4 269 int appendonly;
48f0308a 270 int appendfsync;
271 time_t lastfsync;
44b38ef4 272 int appendfd;
273 int appendseldb;
ed329fcf 274 char *pidfile;
ed9b544e 275 int bgsaveinprogress;
9f3c422c 276 pid_t bgsavechildpid;
ed9b544e 277 struct saveparam *saveparams;
278 int saveparamslen;
279 char *logfile;
280 char *bindaddr;
281 char *dbfilename;
44b38ef4 282 char *appendfilename;
abcb223e 283 char *requirepass;
10c43610 284 int shareobjects;
ed9b544e 285 /* Replication related */
286 int isslave;
287 char *masterhost;
288 int masterport;
40d224a9 289 redisClient *master; /* client that is master for this slave */
ed9b544e 290 int replstate;
285add55 291 unsigned int maxclients;
d4465900 292 unsigned long maxmemory;
ed9b544e 293 /* Sort parameters - qsort_r() is only available under BSD so we
294 * have to take this state global, in order to pass it to sortCompare() */
295 int sort_desc;
296 int sort_alpha;
297 int sort_bypattern;
298};
299
300typedef void redisCommandProc(redisClient *c);
301struct redisCommand {
302 char *name;
303 redisCommandProc *proc;
304 int arity;
305 int flags;
306};
307
de96dbfe 308struct redisFunctionSym {
309 char *name;
56906eef 310 unsigned long pointer;
de96dbfe 311};
312
ed9b544e 313typedef struct _redisSortObject {
314 robj *obj;
315 union {
316 double score;
317 robj *cmpobj;
318 } u;
319} redisSortObject;
320
321typedef struct _redisSortOperation {
322 int type;
323 robj *pattern;
324} redisSortOperation;
325
6b47e12e 326/* ZSETs use a specialized version of Skiplists */
327
328typedef struct zskiplistNode {
329 struct zskiplistNode **forward;
e3870fab 330 struct zskiplistNode *backward;
6b47e12e 331 double score;
332 robj *obj;
333} zskiplistNode;
334
335typedef struct zskiplist {
e3870fab 336 struct zskiplistNode *header, *tail;
d13f767c 337 unsigned long length;
6b47e12e 338 int level;
339} zskiplist;
340
1812e024 341typedef struct zset {
342 dict *dict;
6b47e12e 343 zskiplist *zsl;
1812e024 344} zset;
345
6b47e12e 346/* Our shared "common" objects */
347
ed9b544e 348struct sharedObjectsStruct {
c937aa89 349 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
7b45bfb2 350 *colon, *nullbulk, *nullmultibulk,
c937aa89 351 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
352 *outofrangeerr, *plus,
ed9b544e 353 *select0, *select1, *select2, *select3, *select4,
354 *select5, *select6, *select7, *select8, *select9;
355} shared;
356
a7866db6 357/* Global vars that are actally used as constants. The following double
358 * values are used for double on-disk serialization, and are initialized
359 * at runtime to avoid strange compiler optimizations. */
360
361static double R_Zero, R_PosInf, R_NegInf, R_Nan;
362
ed9b544e 363/*================================ Prototypes =============================== */
364
365static void freeStringObject(robj *o);
366static void freeListObject(robj *o);
367static void freeSetObject(robj *o);
368static void decrRefCount(void *o);
369static robj *createObject(int type, void *ptr);
370static void freeClient(redisClient *c);
f78fd11b 371static int rdbLoad(char *filename);
ed9b544e 372static void addReply(redisClient *c, robj *obj);
373static void addReplySds(redisClient *c, sds s);
374static void incrRefCount(robj *o);
f78fd11b 375static int rdbSaveBackground(char *filename);
ed9b544e 376static robj *createStringObject(char *ptr, size_t len);
87eca727 377static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
44b38ef4 378static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 379static int syncWithMaster(void);
10c43610 380static robj *tryObjectSharing(robj *o);
942a3961 381static int tryObjectEncoding(robj *o);
382static robj *getDecodedObject(const robj *o);
3305306f 383static int removeExpire(redisDb *db, robj *key);
384static int expireIfNeeded(redisDb *db, robj *key);
385static int deleteIfVolatile(redisDb *db, robj *key);
94754ccc 386static int deleteKey(redisDb *db, robj *key);
bb32ede5 387static time_t getExpire(redisDb *db, robj *key);
388static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 389static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 390static void freeMemoryIfNeeded(void);
de96dbfe 391static int processCommand(redisClient *c);
56906eef 392static void setupSigSegvAction(void);
a3b21203 393static void rdbRemoveTempFile(pid_t childpid);
0ea663ea 394static size_t stringObjectLen(robj *o);
638e42ac 395static void processInputBuffer(redisClient *c);
6b47e12e 396static zskiplist *zslCreate(void);
fd8ccf44 397static void zslFree(zskiplist *zsl);
2b59cfdf 398static void zslInsert(zskiplist *zsl, double score, robj *obj);
ed9b544e 399
abcb223e 400static void authCommand(redisClient *c);
ed9b544e 401static void pingCommand(redisClient *c);
402static void echoCommand(redisClient *c);
403static void setCommand(redisClient *c);
404static void setnxCommand(redisClient *c);
405static void getCommand(redisClient *c);
406static void delCommand(redisClient *c);
407static void existsCommand(redisClient *c);
408static void incrCommand(redisClient *c);
409static void decrCommand(redisClient *c);
410static void incrbyCommand(redisClient *c);
411static void decrbyCommand(redisClient *c);
412static void selectCommand(redisClient *c);
413static void randomkeyCommand(redisClient *c);
414static void keysCommand(redisClient *c);
415static void dbsizeCommand(redisClient *c);
416static void lastsaveCommand(redisClient *c);
417static void saveCommand(redisClient *c);
418static void bgsaveCommand(redisClient *c);
419static void shutdownCommand(redisClient *c);
420static void moveCommand(redisClient *c);
421static void renameCommand(redisClient *c);
422static void renamenxCommand(redisClient *c);
423static void lpushCommand(redisClient *c);
424static void rpushCommand(redisClient *c);
425static void lpopCommand(redisClient *c);
426static void rpopCommand(redisClient *c);
427static void llenCommand(redisClient *c);
428static void lindexCommand(redisClient *c);
429static void lrangeCommand(redisClient *c);
430static void ltrimCommand(redisClient *c);
431static void typeCommand(redisClient *c);
432static void lsetCommand(redisClient *c);
433static void saddCommand(redisClient *c);
434static void sremCommand(redisClient *c);
a4460ef4 435static void smoveCommand(redisClient *c);
ed9b544e 436static void sismemberCommand(redisClient *c);
437static void scardCommand(redisClient *c);
12fea928 438static void spopCommand(redisClient *c);
2abb95a9 439static void srandmemberCommand(redisClient *c);
ed9b544e 440static void sinterCommand(redisClient *c);
441static void sinterstoreCommand(redisClient *c);
40d224a9 442static void sunionCommand(redisClient *c);
443static void sunionstoreCommand(redisClient *c);
f4f56e1d 444static void sdiffCommand(redisClient *c);
445static void sdiffstoreCommand(redisClient *c);
ed9b544e 446static void syncCommand(redisClient *c);
447static void flushdbCommand(redisClient *c);
448static void flushallCommand(redisClient *c);
449static void sortCommand(redisClient *c);
450static void lremCommand(redisClient *c);
451static void infoCommand(redisClient *c);
70003d28 452static void mgetCommand(redisClient *c);
87eca727 453static void monitorCommand(redisClient *c);
3305306f 454static void expireCommand(redisClient *c);
802e8373 455static void expireatCommand(redisClient *c);
f6b141c5 456static void getsetCommand(redisClient *c);
fd88489a 457static void ttlCommand(redisClient *c);
321b0e13 458static void slaveofCommand(redisClient *c);
7f957c92 459static void debugCommand(redisClient *c);
f6b141c5 460static void msetCommand(redisClient *c);
461static void msetnxCommand(redisClient *c);
fd8ccf44 462static void zaddCommand(redisClient *c);
cc812361 463static void zrangeCommand(redisClient *c);
50c55df5 464static void zrangebyscoreCommand(redisClient *c);
e3870fab 465static void zrevrangeCommand(redisClient *c);
3c41331e 466static void zcardCommand(redisClient *c);
1b7106e7 467static void zremCommand(redisClient *c);
6e333bbe 468static void zscoreCommand(redisClient *c);
1807985b 469static void zremrangebyscoreCommand(redisClient *c);
f6b141c5 470
ed9b544e 471/*================================= Globals ================================= */
472
473/* Global vars */
474static struct redisServer server; /* server global state */
475static struct redisCommand cmdTable[] = {
476 {"get",getCommand,2,REDIS_CMD_INLINE},
3fd78bcd 477 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
478 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
5109cdff 479 {"del",delCommand,-2,REDIS_CMD_INLINE},
ed9b544e 480 {"exists",existsCommand,2,REDIS_CMD_INLINE},
3fd78bcd 481 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
482 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
70003d28 483 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
3fd78bcd 484 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
485 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 486 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
487 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
488 {"llen",llenCommand,2,REDIS_CMD_INLINE},
489 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
3fd78bcd 490 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 491 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
492 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
493 {"lrem",lremCommand,4,REDIS_CMD_BULK},
3fd78bcd 494 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 495 {"srem",sremCommand,3,REDIS_CMD_BULK},
a4460ef4 496 {"smove",smoveCommand,4,REDIS_CMD_BULK},
ed9b544e 497 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
498 {"scard",scardCommand,2,REDIS_CMD_INLINE},
12fea928 499 {"spop",spopCommand,2,REDIS_CMD_INLINE},
2abb95a9 500 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
3fd78bcd 501 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
502 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
503 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
504 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
505 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
506 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 507 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
fd8ccf44 508 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
1b7106e7 509 {"zrem",zremCommand,3,REDIS_CMD_BULK},
1807985b 510 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
cc812361 511 {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
50c55df5 512 {"zrangebyscore",zrangebyscoreCommand,4,REDIS_CMD_INLINE},
e3870fab 513 {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
3c41331e 514 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
6e333bbe 515 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 516 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
517 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
f6b141c5 518 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
519 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
520 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 521 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
522 {"select",selectCommand,2,REDIS_CMD_INLINE},
523 {"move",moveCommand,3,REDIS_CMD_INLINE},
524 {"rename",renameCommand,3,REDIS_CMD_INLINE},
525 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
321b0e13 526 {"expire",expireCommand,3,REDIS_CMD_INLINE},
802e8373 527 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
ed9b544e 528 {"keys",keysCommand,2,REDIS_CMD_INLINE},
529 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
abcb223e 530 {"auth",authCommand,2,REDIS_CMD_INLINE},
ed9b544e 531 {"ping",pingCommand,1,REDIS_CMD_INLINE},
532 {"echo",echoCommand,2,REDIS_CMD_BULK},
533 {"save",saveCommand,1,REDIS_CMD_INLINE},
534 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
535 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
536 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
537 {"type",typeCommand,2,REDIS_CMD_INLINE},
538 {"sync",syncCommand,1,REDIS_CMD_INLINE},
539 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
540 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
3fd78bcd 541 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 542 {"info",infoCommand,1,REDIS_CMD_INLINE},
87eca727 543 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
fd88489a 544 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
321b0e13 545 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
7f957c92 546 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
ed9b544e 547 {NULL,NULL,0,0}
548};
ed9b544e 549/*============================ Utility functions ============================ */
550
551/* Glob-style pattern matching. */
552int stringmatchlen(const char *pattern, int patternLen,
553 const char *string, int stringLen, int nocase)
554{
555 while(patternLen) {
556 switch(pattern[0]) {
557 case '*':
558 while (pattern[1] == '*') {
559 pattern++;
560 patternLen--;
561 }
562 if (patternLen == 1)
563 return 1; /* match */
564 while(stringLen) {
565 if (stringmatchlen(pattern+1, patternLen-1,
566 string, stringLen, nocase))
567 return 1; /* match */
568 string++;
569 stringLen--;
570 }
571 return 0; /* no match */
572 break;
573 case '?':
574 if (stringLen == 0)
575 return 0; /* no match */
576 string++;
577 stringLen--;
578 break;
579 case '[':
580 {
581 int not, match;
582
583 pattern++;
584 patternLen--;
585 not = pattern[0] == '^';
586 if (not) {
587 pattern++;
588 patternLen--;
589 }
590 match = 0;
591 while(1) {
592 if (pattern[0] == '\\') {
593 pattern++;
594 patternLen--;
595 if (pattern[0] == string[0])
596 match = 1;
597 } else if (pattern[0] == ']') {
598 break;
599 } else if (patternLen == 0) {
600 pattern--;
601 patternLen++;
602 break;
603 } else if (pattern[1] == '-' && patternLen >= 3) {
604 int start = pattern[0];
605 int end = pattern[2];
606 int c = string[0];
607 if (start > end) {
608 int t = start;
609 start = end;
610 end = t;
611 }
612 if (nocase) {
613 start = tolower(start);
614 end = tolower(end);
615 c = tolower(c);
616 }
617 pattern += 2;
618 patternLen -= 2;
619 if (c >= start && c <= end)
620 match = 1;
621 } else {
622 if (!nocase) {
623 if (pattern[0] == string[0])
624 match = 1;
625 } else {
626 if (tolower((int)pattern[0]) == tolower((int)string[0]))
627 match = 1;
628 }
629 }
630 pattern++;
631 patternLen--;
632 }
633 if (not)
634 match = !match;
635 if (!match)
636 return 0; /* no match */
637 string++;
638 stringLen--;
639 break;
640 }
641 case '\\':
642 if (patternLen >= 2) {
643 pattern++;
644 patternLen--;
645 }
646 /* fall through */
647 default:
648 if (!nocase) {
649 if (pattern[0] != string[0])
650 return 0; /* no match */
651 } else {
652 if (tolower((int)pattern[0]) != tolower((int)string[0]))
653 return 0; /* no match */
654 }
655 string++;
656 stringLen--;
657 break;
658 }
659 pattern++;
660 patternLen--;
661 if (stringLen == 0) {
662 while(*pattern == '*') {
663 pattern++;
664 patternLen--;
665 }
666 break;
667 }
668 }
669 if (patternLen == 0 && stringLen == 0)
670 return 1;
671 return 0;
672}
673
56906eef 674static void redisLog(int level, const char *fmt, ...) {
ed9b544e 675 va_list ap;
676 FILE *fp;
677
678 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
679 if (!fp) return;
680
681 va_start(ap, fmt);
682 if (level >= server.verbosity) {
683 char *c = ".-*";
1904ecc1 684 char buf[64];
685 time_t now;
686
687 now = time(NULL);
6c9385e0 688 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
1904ecc1 689 fprintf(fp,"%s %c ",buf,c[level]);
ed9b544e 690 vfprintf(fp, fmt, ap);
691 fprintf(fp,"\n");
692 fflush(fp);
693 }
694 va_end(ap);
695
696 if (server.logfile) fclose(fp);
697}
698
699/*====================== Hash table type implementation ==================== */
700
701/* This is an hash table type that uses the SDS dynamic strings libary as
702 * keys and radis objects as values (objects can hold SDS strings,
703 * lists, sets). */
704
1812e024 705static void dictVanillaFree(void *privdata, void *val)
706{
707 DICT_NOTUSED(privdata);
708 zfree(val);
709}
710
ed9b544e 711static int sdsDictKeyCompare(void *privdata, const void *key1,
712 const void *key2)
713{
714 int l1,l2;
715 DICT_NOTUSED(privdata);
716
717 l1 = sdslen((sds)key1);
718 l2 = sdslen((sds)key2);
719 if (l1 != l2) return 0;
720 return memcmp(key1, key2, l1) == 0;
721}
722
723static void dictRedisObjectDestructor(void *privdata, void *val)
724{
725 DICT_NOTUSED(privdata);
726
727 decrRefCount(val);
728}
729
942a3961 730static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 731 const void *key2)
732{
733 const robj *o1 = key1, *o2 = key2;
734 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
735}
736
942a3961 737static unsigned int dictObjHash(const void *key) {
ed9b544e 738 const robj *o = key;
739 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
740}
741
942a3961 742static int dictEncObjKeyCompare(void *privdata, const void *key1,
743 const void *key2)
744{
745 const robj *o1 = key1, *o2 = key2;
746
747 if (o1->encoding == REDIS_ENCODING_RAW &&
748 o2->encoding == REDIS_ENCODING_RAW)
749 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
750 else {
751 robj *dec1, *dec2;
752 int cmp;
753
754 dec1 = o1->encoding != REDIS_ENCODING_RAW ?
755 getDecodedObject(o1) : (robj*)o1;
756 dec2 = o2->encoding != REDIS_ENCODING_RAW ?
757 getDecodedObject(o2) : (robj*)o2;
758 cmp = sdsDictKeyCompare(privdata,dec1->ptr,dec2->ptr);
759 if (dec1 != o1) decrRefCount(dec1);
760 if (dec2 != o2) decrRefCount(dec2);
761 return cmp;
762 }
763}
764
765static unsigned int dictEncObjHash(const void *key) {
766 const robj *o = key;
767
768 if (o->encoding == REDIS_ENCODING_RAW)
769 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
770 else {
771 robj *dec = getDecodedObject(o);
772 unsigned int hash = dictGenHashFunction(dec->ptr, sdslen((sds)dec->ptr));
773 decrRefCount(dec);
774 return hash;
775 }
776}
777
ed9b544e 778static dictType setDictType = {
942a3961 779 dictEncObjHash, /* hash function */
ed9b544e 780 NULL, /* key dup */
781 NULL, /* val dup */
942a3961 782 dictEncObjKeyCompare, /* key compare */
ed9b544e 783 dictRedisObjectDestructor, /* key destructor */
784 NULL /* val destructor */
785};
786
1812e024 787static dictType zsetDictType = {
788 dictEncObjHash, /* hash function */
789 NULL, /* key dup */
790 NULL, /* val dup */
791 dictEncObjKeyCompare, /* key compare */
792 dictRedisObjectDestructor, /* key destructor */
793 dictVanillaFree /* val destructor */
794};
795
ed9b544e 796static dictType hashDictType = {
942a3961 797 dictObjHash, /* hash function */
ed9b544e 798 NULL, /* key dup */
799 NULL, /* val dup */
942a3961 800 dictObjKeyCompare, /* key compare */
ed9b544e 801 dictRedisObjectDestructor, /* key destructor */
802 dictRedisObjectDestructor /* val destructor */
803};
804
805/* ========================= Random utility functions ======================= */
806
807/* Redis generally does not try to recover from out of memory conditions
808 * when allocating objects or strings, it is not clear if it will be possible
809 * to report this condition to the client since the networking layer itself
810 * is based on heap allocation for send buffers, so we simply abort.
811 * At least the code will be simpler to read... */
812static void oom(const char *msg) {
813 fprintf(stderr, "%s: Out of memory\n",msg);
814 fflush(stderr);
815 sleep(1);
816 abort();
817}
818
819/* ====================== Redis server networking stuff ===================== */
56906eef 820static void closeTimedoutClients(void) {
ed9b544e 821 redisClient *c;
ed9b544e 822 listNode *ln;
823 time_t now = time(NULL);
824
6208b3a7 825 listRewind(server.clients);
826 while ((ln = listYield(server.clients)) != NULL) {
ed9b544e 827 c = listNodeValue(ln);
828 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 829 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
ed9b544e 830 (now - c->lastinteraction > server.maxidletime)) {
831 redisLog(REDIS_DEBUG,"Closing idle client");
832 freeClient(c);
833 }
834 }
ed9b544e 835}
836
12fea928 837static int htNeedsResize(dict *dict) {
838 long long size, used;
839
840 size = dictSlots(dict);
841 used = dictSize(dict);
842 return (size && used && size > DICT_HT_INITIAL_SIZE &&
843 (used*100/size < REDIS_HT_MINFILL));
844}
845
0bc03378 846/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
847 * we resize the hash table to save memory */
56906eef 848static void tryResizeHashTables(void) {
0bc03378 849 int j;
850
851 for (j = 0; j < server.dbnum; j++) {
12fea928 852 if (htNeedsResize(server.db[j].dict)) {
853 redisLog(REDIS_DEBUG,"The hash table %d is too sparse, resize it...",j);
0bc03378 854 dictResize(server.db[j].dict);
12fea928 855 redisLog(REDIS_DEBUG,"Hash table %d resized.",j);
0bc03378 856 }
12fea928 857 if (htNeedsResize(server.db[j].expires))
858 dictResize(server.db[j].expires);
0bc03378 859 }
860}
861
56906eef 862static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 863 int j, loops = server.cronloops++;
ed9b544e 864 REDIS_NOTUSED(eventLoop);
865 REDIS_NOTUSED(id);
866 REDIS_NOTUSED(clientData);
867
868 /* Update the global state with the amount of used memory */
869 server.usedmemory = zmalloc_used_memory();
870
0bc03378 871 /* Show some info about non-empty databases */
ed9b544e 872 for (j = 0; j < server.dbnum; j++) {
dec423d9 873 long long size, used, vkeys;
94754ccc 874
3305306f 875 size = dictSlots(server.db[j].dict);
876 used = dictSize(server.db[j].dict);
94754ccc 877 vkeys = dictSize(server.db[j].expires);
c3cb078d 878 if (!(loops % 5) && (used || vkeys)) {
879 redisLog(REDIS_DEBUG,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 880 /* dictPrintStats(server.dict); */
ed9b544e 881 }
ed9b544e 882 }
883
0bc03378 884 /* We don't want to resize the hash tables while a bacground saving
885 * is in progress: the saving child is created using fork() that is
886 * implemented with a copy-on-write semantic in most modern systems, so
887 * if we resize the HT while there is the saving child at work actually
888 * a lot of memory movements in the parent will cause a lot of pages
889 * copied. */
890 if (!server.bgsaveinprogress) tryResizeHashTables();
891
ed9b544e 892 /* Show information about connected clients */
893 if (!(loops % 5)) {
21aecf4b 894 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
ed9b544e 895 listLength(server.clients)-listLength(server.slaves),
896 listLength(server.slaves),
10c43610 897 server.usedmemory,
3305306f 898 dictSize(server.sharingpool));
ed9b544e 899 }
900
901 /* Close connections of timedout clients */
0150db36 902 if (server.maxidletime && !(loops % 10))
ed9b544e 903 closeTimedoutClients();
904
905 /* Check if a background saving in progress terminated */
906 if (server.bgsaveinprogress) {
907 int statloc;
908 if (wait4(-1,&statloc,WNOHANG,NULL)) {
909 int exitcode = WEXITSTATUS(statloc);
9f3c422c 910 int bysignal = WIFSIGNALED(statloc);
911
912 if (!bysignal && exitcode == 0) {
ed9b544e 913 redisLog(REDIS_NOTICE,
914 "Background saving terminated with success");
915 server.dirty = 0;
916 server.lastsave = time(NULL);
9f3c422c 917 } else if (!bysignal && exitcode != 0) {
918 redisLog(REDIS_WARNING, "Background saving error");
ed9b544e 919 } else {
920 redisLog(REDIS_WARNING,
9f3c422c 921 "Background saving terminated by signal");
a3b21203 922 rdbRemoveTempFile(server.bgsavechildpid);
ed9b544e 923 }
924 server.bgsaveinprogress = 0;
9f3c422c 925 server.bgsavechildpid = -1;
a3b21203 926 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
ed9b544e 927 }
928 } else {
929 /* If there is not a background saving in progress check if
930 * we have to save now */
931 time_t now = time(NULL);
932 for (j = 0; j < server.saveparamslen; j++) {
933 struct saveparam *sp = server.saveparams+j;
934
935 if (server.dirty >= sp->changes &&
936 now-server.lastsave > sp->seconds) {
937 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
938 sp->changes, sp->seconds);
f78fd11b 939 rdbSaveBackground(server.dbfilename);
ed9b544e 940 break;
941 }
942 }
943 }
94754ccc 944
945 /* Try to expire a few timed out keys */
946 for (j = 0; j < server.dbnum; j++) {
947 redisDb *db = server.db+j;
948 int num = dictSize(db->expires);
949
950 if (num) {
951 time_t now = time(NULL);
952
953 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
954 num = REDIS_EXPIRELOOKUPS_PER_CRON;
955 while (num--) {
956 dictEntry *de;
957 time_t t;
958
959 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
960 t = (time_t) dictGetEntryVal(de);
961 if (now > t) {
962 deleteKey(db,dictGetEntryKey(de));
963 }
964 }
965 }
966 }
967
ed9b544e 968 /* Check if we should connect to a MASTER */
969 if (server.replstate == REDIS_REPL_CONNECT) {
970 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
971 if (syncWithMaster() == REDIS_OK) {
972 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
973 }
974 }
975 return 1000;
976}
977
978static void createSharedObjects(void) {
979 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
980 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
981 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 982 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
983 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
984 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
985 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
986 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
987 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 988 /* no such key */
ed9b544e 989 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
990 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
991 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 992 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
993 "-ERR no such key\r\n"));
ed9b544e 994 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
995 "-ERR syntax error\r\n"));
c937aa89 996 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
997 "-ERR source and destination objects are the same\r\n"));
998 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
999 "-ERR index out of range\r\n"));
ed9b544e 1000 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1001 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1002 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1003 shared.select0 = createStringObject("select 0\r\n",10);
1004 shared.select1 = createStringObject("select 1\r\n",10);
1005 shared.select2 = createStringObject("select 2\r\n",10);
1006 shared.select3 = createStringObject("select 3\r\n",10);
1007 shared.select4 = createStringObject("select 4\r\n",10);
1008 shared.select5 = createStringObject("select 5\r\n",10);
1009 shared.select6 = createStringObject("select 6\r\n",10);
1010 shared.select7 = createStringObject("select 7\r\n",10);
1011 shared.select8 = createStringObject("select 8\r\n",10);
1012 shared.select9 = createStringObject("select 9\r\n",10);
1013}
1014
1015static void appendServerSaveParams(time_t seconds, int changes) {
1016 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1017 server.saveparams[server.saveparamslen].seconds = seconds;
1018 server.saveparams[server.saveparamslen].changes = changes;
1019 server.saveparamslen++;
1020}
1021
1022static void ResetServerSaveParams() {
1023 zfree(server.saveparams);
1024 server.saveparams = NULL;
1025 server.saveparamslen = 0;
1026}
1027
1028static void initServerConfig() {
1029 server.dbnum = REDIS_DEFAULT_DBNUM;
1030 server.port = REDIS_SERVERPORT;
1031 server.verbosity = REDIS_DEBUG;
1032 server.maxidletime = REDIS_MAXIDLETIME;
1033 server.saveparams = NULL;
1034 server.logfile = NULL; /* NULL = log on standard output */
1035 server.bindaddr = NULL;
1036 server.glueoutputbuf = 1;
1037 server.daemonize = 0;
44b38ef4 1038 server.appendonly = 0;
4e141d5a 1039 server.appendfsync = APPENDFSYNC_ALWAYS;
48f0308a 1040 server.lastfsync = time(NULL);
44b38ef4 1041 server.appendfd = -1;
1042 server.appendseldb = -1; /* Make sure the first time will not match */
ed329fcf 1043 server.pidfile = "/var/run/redis.pid";
ed9b544e 1044 server.dbfilename = "dump.rdb";
44b38ef4 1045 server.appendfilename = "appendonly.log";
abcb223e 1046 server.requirepass = NULL;
10c43610 1047 server.shareobjects = 0;
21aecf4b 1048 server.sharingpoolsize = 1024;
285add55 1049 server.maxclients = 0;
3fd78bcd 1050 server.maxmemory = 0;
ed9b544e 1051 ResetServerSaveParams();
1052
1053 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1054 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1055 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1056 /* Replication related */
1057 server.isslave = 0;
1058 server.masterhost = NULL;
1059 server.masterport = 6379;
1060 server.master = NULL;
1061 server.replstate = REDIS_REPL_NONE;
a7866db6 1062
1063 /* Double constants initialization */
1064 R_Zero = 0.0;
1065 R_PosInf = 1.0/R_Zero;
1066 R_NegInf = -1.0/R_Zero;
1067 R_Nan = R_Zero/R_Zero;
ed9b544e 1068}
1069
1070static void initServer() {
1071 int j;
1072
1073 signal(SIGHUP, SIG_IGN);
1074 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1075 setupSigSegvAction();
ed9b544e 1076
1077 server.clients = listCreate();
1078 server.slaves = listCreate();
87eca727 1079 server.monitors = listCreate();
ed9b544e 1080 server.objfreelist = listCreate();
1081 createSharedObjects();
1082 server.el = aeCreateEventLoop();
3305306f 1083 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
10c43610 1084 server.sharingpool = dictCreate(&setDictType,NULL);
ed9b544e 1085 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1086 if (server.fd == -1) {
1087 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1088 exit(1);
1089 }
3305306f 1090 for (j = 0; j < server.dbnum; j++) {
1091 server.db[j].dict = dictCreate(&hashDictType,NULL);
1092 server.db[j].expires = dictCreate(&setDictType,NULL);
1093 server.db[j].id = j;
1094 }
ed9b544e 1095 server.cronloops = 0;
1096 server.bgsaveinprogress = 0;
9f3c422c 1097 server.bgsavechildpid = -1;
ed9b544e 1098 server.lastsave = time(NULL);
1099 server.dirty = 0;
1100 server.usedmemory = 0;
1101 server.stat_numcommands = 0;
1102 server.stat_numconnections = 0;
1103 server.stat_starttime = time(NULL);
1104 aeCreateTimeEvent(server.el, 1000, serverCron, NULL, NULL);
44b38ef4 1105
1106 if (server.appendonly) {
71eba477 1107 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
44b38ef4 1108 if (server.appendfd == -1) {
1109 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1110 strerror(errno));
1111 exit(1);
1112 }
1113 }
ed9b544e 1114}
1115
1116/* Empty the whole database */
ca37e9cd 1117static long long emptyDb() {
ed9b544e 1118 int j;
ca37e9cd 1119 long long removed = 0;
ed9b544e 1120
3305306f 1121 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1122 removed += dictSize(server.db[j].dict);
3305306f 1123 dictEmpty(server.db[j].dict);
1124 dictEmpty(server.db[j].expires);
1125 }
ca37e9cd 1126 return removed;
ed9b544e 1127}
1128
85dd2f3a 1129static int yesnotoi(char *s) {
1130 if (!strcasecmp(s,"yes")) return 1;
1131 else if (!strcasecmp(s,"no")) return 0;
1132 else return -1;
1133}
1134
ed9b544e 1135/* I agree, this is a very rudimental way to load a configuration...
1136 will improve later if the config gets more complex */
1137static void loadServerConfig(char *filename) {
c9a111ac 1138 FILE *fp;
ed9b544e 1139 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1140 int linenum = 0;
1141 sds line = NULL;
c9a111ac 1142
1143 if (filename[0] == '-' && filename[1] == '\0')
1144 fp = stdin;
1145 else {
1146 if ((fp = fopen(filename,"r")) == NULL) {
1147 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1148 exit(1);
1149 }
ed9b544e 1150 }
c9a111ac 1151
ed9b544e 1152 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1153 sds *argv;
1154 int argc, j;
1155
1156 linenum++;
1157 line = sdsnew(buf);
1158 line = sdstrim(line," \t\r\n");
1159
1160 /* Skip comments and blank lines*/
1161 if (line[0] == '#' || line[0] == '\0') {
1162 sdsfree(line);
1163 continue;
1164 }
1165
1166 /* Split into arguments */
1167 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1168 sdstolower(argv[0]);
1169
1170 /* Execute config directives */
bb0b03a3 1171 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1172 server.maxidletime = atoi(argv[1]);
0150db36 1173 if (server.maxidletime < 0) {
ed9b544e 1174 err = "Invalid timeout value"; goto loaderr;
1175 }
bb0b03a3 1176 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1177 server.port = atoi(argv[1]);
1178 if (server.port < 1 || server.port > 65535) {
1179 err = "Invalid port"; goto loaderr;
1180 }
bb0b03a3 1181 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1182 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1183 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1184 int seconds = atoi(argv[1]);
1185 int changes = atoi(argv[2]);
1186 if (seconds < 1 || changes < 0) {
1187 err = "Invalid save parameters"; goto loaderr;
1188 }
1189 appendServerSaveParams(seconds,changes);
bb0b03a3 1190 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1191 if (chdir(argv[1]) == -1) {
1192 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1193 argv[1], strerror(errno));
1194 exit(1);
1195 }
bb0b03a3 1196 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1197 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
1198 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1199 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1200 else {
1201 err = "Invalid log level. Must be one of debug, notice, warning";
1202 goto loaderr;
1203 }
bb0b03a3 1204 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1205 FILE *logfp;
ed9b544e 1206
1207 server.logfile = zstrdup(argv[1]);
bb0b03a3 1208 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1209 zfree(server.logfile);
1210 server.logfile = NULL;
1211 }
1212 if (server.logfile) {
1213 /* Test if we are able to open the file. The server will not
1214 * be able to abort just for this problem later... */
c9a111ac 1215 logfp = fopen(server.logfile,"a");
1216 if (logfp == NULL) {
ed9b544e 1217 err = sdscatprintf(sdsempty(),
1218 "Can't open the log file: %s", strerror(errno));
1219 goto loaderr;
1220 }
c9a111ac 1221 fclose(logfp);
ed9b544e 1222 }
bb0b03a3 1223 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1224 server.dbnum = atoi(argv[1]);
1225 if (server.dbnum < 1) {
1226 err = "Invalid number of databases"; goto loaderr;
1227 }
285add55 1228 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1229 server.maxclients = atoi(argv[1]);
3fd78bcd 1230 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
d4465900 1231 server.maxmemory = strtoll(argv[1], NULL, 10);
bb0b03a3 1232 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1233 server.masterhost = sdsnew(argv[1]);
1234 server.masterport = atoi(argv[2]);
1235 server.replstate = REDIS_REPL_CONNECT;
bb0b03a3 1236 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1237 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1238 err = "argument must be 'yes' or 'no'"; goto loaderr;
1239 }
bb0b03a3 1240 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
85dd2f3a 1241 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
10c43610 1242 err = "argument must be 'yes' or 'no'"; goto loaderr;
1243 }
e52c65b9 1244 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1245 server.sharingpoolsize = atoi(argv[1]);
1246 if (server.sharingpoolsize < 1) {
1247 err = "invalid object sharing pool size"; goto loaderr;
1248 }
bb0b03a3 1249 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1250 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1251 err = "argument must be 'yes' or 'no'"; goto loaderr;
1252 }
44b38ef4 1253 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1254 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1255 err = "argument must be 'yes' or 'no'"; goto loaderr;
1256 }
48f0308a 1257 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1258 if (strcasecmp(argv[1],"no")) {
1259 server.appendfsync = APPENDFSYNC_NO;
1260 } else if (strcasecmp(argv[1],"always")) {
1261 server.appendfsync = APPENDFSYNC_ALWAYS;
1262 } else if (strcasecmp(argv[1],"everysec")) {
1263 server.appendfsync = APPENDFSYNC_EVERYSEC;
1264 } else {
1265 err = "argument must be 'no', 'always' or 'everysec'";
1266 goto loaderr;
1267 }
bb0b03a3 1268 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
abcb223e 1269 server.requirepass = zstrdup(argv[1]);
bb0b03a3 1270 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
ed329fcf 1271 server.pidfile = zstrdup(argv[1]);
bb0b03a3 1272 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
b8b553c8 1273 server.dbfilename = zstrdup(argv[1]);
ed9b544e 1274 } else {
1275 err = "Bad directive or wrong number of arguments"; goto loaderr;
1276 }
1277 for (j = 0; j < argc; j++)
1278 sdsfree(argv[j]);
1279 zfree(argv);
1280 sdsfree(line);
1281 }
c9a111ac 1282 if (fp != stdin) fclose(fp);
ed9b544e 1283 return;
1284
1285loaderr:
1286 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1287 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1288 fprintf(stderr, ">>> '%s'\n", line);
1289 fprintf(stderr, "%s\n", err);
1290 exit(1);
1291}
1292
1293static void freeClientArgv(redisClient *c) {
1294 int j;
1295
1296 for (j = 0; j < c->argc; j++)
1297 decrRefCount(c->argv[j]);
e8a74421 1298 for (j = 0; j < c->mbargc; j++)
1299 decrRefCount(c->mbargv[j]);
ed9b544e 1300 c->argc = 0;
e8a74421 1301 c->mbargc = 0;
ed9b544e 1302}
1303
1304static void freeClient(redisClient *c) {
1305 listNode *ln;
1306
1307 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1308 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1309 sdsfree(c->querybuf);
1310 listRelease(c->reply);
1311 freeClientArgv(c);
1312 close(c->fd);
1313 ln = listSearchKey(server.clients,c);
1314 assert(ln != NULL);
1315 listDelNode(server.clients,ln);
1316 if (c->flags & REDIS_SLAVE) {
6208b3a7 1317 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1318 close(c->repldbfd);
87eca727 1319 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1320 ln = listSearchKey(l,c);
ed9b544e 1321 assert(ln != NULL);
87eca727 1322 listDelNode(l,ln);
ed9b544e 1323 }
1324 if (c->flags & REDIS_MASTER) {
1325 server.master = NULL;
1326 server.replstate = REDIS_REPL_CONNECT;
1327 }
93ea3759 1328 zfree(c->argv);
e8a74421 1329 zfree(c->mbargv);
ed9b544e 1330 zfree(c);
1331}
1332
1333static void glueReplyBuffersIfNeeded(redisClient *c) {
1334 int totlen = 0;
6208b3a7 1335 listNode *ln;
ed9b544e 1336 robj *o;
1337
6208b3a7 1338 listRewind(c->reply);
1339 while((ln = listYield(c->reply))) {
ed9b544e 1340 o = ln->value;
1341 totlen += sdslen(o->ptr);
ed9b544e 1342 /* This optimization makes more sense if we don't have to copy
1343 * too much data */
1344 if (totlen > 1024) return;
1345 }
1346 if (totlen > 0) {
1347 char buf[1024];
1348 int copylen = 0;
1349
6208b3a7 1350 listRewind(c->reply);
1351 while((ln = listYield(c->reply))) {
ed9b544e 1352 o = ln->value;
1353 memcpy(buf+copylen,o->ptr,sdslen(o->ptr));
1354 copylen += sdslen(o->ptr);
1355 listDelNode(c->reply,ln);
ed9b544e 1356 }
1357 /* Now the output buffer is empty, add the new single element */
6fdc78ac 1358 o = createObject(REDIS_STRING,sdsnewlen(buf,totlen));
6b47e12e 1359 listAddNodeTail(c->reply,o);
ed9b544e 1360 }
1361}
1362
1363static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1364 redisClient *c = privdata;
1365 int nwritten = 0, totwritten = 0, objlen;
1366 robj *o;
1367 REDIS_NOTUSED(el);
1368 REDIS_NOTUSED(mask);
1369
1370 if (server.glueoutputbuf && listLength(c->reply) > 1)
1371 glueReplyBuffersIfNeeded(c);
1372 while(listLength(c->reply)) {
1373 o = listNodeValue(listFirst(c->reply));
1374 objlen = sdslen(o->ptr);
1375
1376 if (objlen == 0) {
1377 listDelNode(c->reply,listFirst(c->reply));
1378 continue;
1379 }
1380
1381 if (c->flags & REDIS_MASTER) {
6f376729 1382 /* Don't reply to a master */
ed9b544e 1383 nwritten = objlen - c->sentlen;
1384 } else {
a4d1ba9a 1385 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 1386 if (nwritten <= 0) break;
1387 }
1388 c->sentlen += nwritten;
1389 totwritten += nwritten;
1390 /* If we fully sent the object on head go to the next one */
1391 if (c->sentlen == objlen) {
1392 listDelNode(c->reply,listFirst(c->reply));
1393 c->sentlen = 0;
1394 }
6f376729 1395 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1396 * bytes, in a single threaded server it's a good idea to server
1397 * other clients as well, even if a very large request comes from
1398 * super fast link that is always able to accept data (in real world
1399 * terms think to 'KEYS *' against the loopback interfae) */
1400 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 1401 }
1402 if (nwritten == -1) {
1403 if (errno == EAGAIN) {
1404 nwritten = 0;
1405 } else {
1406 redisLog(REDIS_DEBUG,
1407 "Error writing to client: %s", strerror(errno));
1408 freeClient(c);
1409 return;
1410 }
1411 }
1412 if (totwritten > 0) c->lastinteraction = time(NULL);
1413 if (listLength(c->reply) == 0) {
1414 c->sentlen = 0;
1415 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1416 }
1417}
1418
1419static struct redisCommand *lookupCommand(char *name) {
1420 int j = 0;
1421 while(cmdTable[j].name != NULL) {
bb0b03a3 1422 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
ed9b544e 1423 j++;
1424 }
1425 return NULL;
1426}
1427
1428/* resetClient prepare the client to process the next command */
1429static void resetClient(redisClient *c) {
1430 freeClientArgv(c);
1431 c->bulklen = -1;
e8a74421 1432 c->multibulk = 0;
ed9b544e 1433}
1434
1435/* If this function gets called we already read a whole
1436 * command, argments are in the client argv/argc fields.
1437 * processCommand() execute the command or prepare the
1438 * server for a bulk read from the client.
1439 *
1440 * If 1 is returned the client is still alive and valid and
1441 * and other operations can be performed by the caller. Otherwise
1442 * if 0 is returned the client was destroied (i.e. after QUIT). */
1443static int processCommand(redisClient *c) {
1444 struct redisCommand *cmd;
1445 long long dirty;
1446
3fd78bcd 1447 /* Free some memory if needed (maxmemory setting) */
1448 if (server.maxmemory) freeMemoryIfNeeded();
1449
e8a74421 1450 /* Handle the multi bulk command type. This is an alternative protocol
1451 * supported by Redis in order to receive commands that are composed of
1452 * multiple binary-safe "bulk" arguments. The latency of processing is
1453 * a bit higher but this allows things like multi-sets, so if this
1454 * protocol is used only for MSET and similar commands this is a big win. */
1455 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1456 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1457 if (c->multibulk <= 0) {
1458 resetClient(c);
1459 return 1;
1460 } else {
1461 decrRefCount(c->argv[c->argc-1]);
1462 c->argc--;
1463 return 1;
1464 }
1465 } else if (c->multibulk) {
1466 if (c->bulklen == -1) {
1467 if (((char*)c->argv[0]->ptr)[0] != '$') {
1468 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1469 resetClient(c);
1470 return 1;
1471 } else {
1472 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1473 decrRefCount(c->argv[0]);
1474 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1475 c->argc--;
1476 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1477 resetClient(c);
1478 return 1;
1479 }
1480 c->argc--;
1481 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1482 return 1;
1483 }
1484 } else {
1485 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1486 c->mbargv[c->mbargc] = c->argv[0];
1487 c->mbargc++;
1488 c->argc--;
1489 c->multibulk--;
1490 if (c->multibulk == 0) {
1491 robj **auxargv;
1492 int auxargc;
1493
1494 /* Here we need to swap the multi-bulk argc/argv with the
1495 * normal argc/argv of the client structure. */
1496 auxargv = c->argv;
1497 c->argv = c->mbargv;
1498 c->mbargv = auxargv;
1499
1500 auxargc = c->argc;
1501 c->argc = c->mbargc;
1502 c->mbargc = auxargc;
1503
1504 /* We need to set bulklen to something different than -1
1505 * in order for the code below to process the command without
1506 * to try to read the last argument of a bulk command as
1507 * a special argument. */
1508 c->bulklen = 0;
1509 /* continue below and process the command */
1510 } else {
1511 c->bulklen = -1;
1512 return 1;
1513 }
1514 }
1515 }
1516 /* -- end of multi bulk commands processing -- */
1517
ed9b544e 1518 /* The QUIT command is handled as a special case. Normal command
1519 * procs are unable to close the client connection safely */
bb0b03a3 1520 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 1521 freeClient(c);
1522 return 0;
1523 }
1524 cmd = lookupCommand(c->argv[0]->ptr);
1525 if (!cmd) {
1526 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1527 resetClient(c);
1528 return 1;
1529 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1530 (c->argc < -cmd->arity)) {
1531 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
1532 resetClient(c);
1533 return 1;
3fd78bcd 1534 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1535 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1536 resetClient(c);
1537 return 1;
ed9b544e 1538 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1539 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1540
1541 decrRefCount(c->argv[c->argc-1]);
1542 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1543 c->argc--;
1544 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1545 resetClient(c);
1546 return 1;
1547 }
1548 c->argc--;
1549 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1550 /* It is possible that the bulk read is already in the
8d0490e7 1551 * buffer. Check this condition and handle it accordingly.
1552 * This is just a fast path, alternative to call processInputBuffer().
1553 * It's a good idea since the code is small and this condition
1554 * happens most of the times. */
ed9b544e 1555 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1556 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1557 c->argc++;
1558 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1559 } else {
1560 return 1;
1561 }
1562 }
10c43610 1563 /* Let's try to share objects on the command arguments vector */
1564 if (server.shareobjects) {
1565 int j;
1566 for(j = 1; j < c->argc; j++)
1567 c->argv[j] = tryObjectSharing(c->argv[j]);
1568 }
942a3961 1569 /* Let's try to encode the bulk object to save space. */
1570 if (cmd->flags & REDIS_CMD_BULK)
1571 tryObjectEncoding(c->argv[c->argc-1]);
1572
e63943a4 1573 /* Check if the user is authenticated */
1574 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1575 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1576 resetClient(c);
1577 return 1;
1578 }
1579
ed9b544e 1580 /* Exec the command */
1581 dirty = server.dirty;
1582 cmd->proc(c);
44b38ef4 1583 if (server.appendonly != 0)
1584 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
ed9b544e 1585 if (server.dirty-dirty != 0 && listLength(server.slaves))
3305306f 1586 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
87eca727 1587 if (listLength(server.monitors))
3305306f 1588 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
ed9b544e 1589 server.stat_numcommands++;
1590
1591 /* Prepare the client for the next command */
1592 if (c->flags & REDIS_CLOSE) {
1593 freeClient(c);
1594 return 0;
1595 }
1596 resetClient(c);
1597 return 1;
1598}
1599
87eca727 1600static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6208b3a7 1601 listNode *ln;
ed9b544e 1602 int outc = 0, j;
93ea3759 1603 robj **outv;
1604 /* (args*2)+1 is enough room for args, spaces, newlines */
1605 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1606
1607 if (argc <= REDIS_STATIC_ARGS) {
1608 outv = static_outv;
1609 } else {
1610 outv = zmalloc(sizeof(robj*)*(argc*2+1));
93ea3759 1611 }
ed9b544e 1612
1613 for (j = 0; j < argc; j++) {
1614 if (j != 0) outv[outc++] = shared.space;
1615 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1616 robj *lenobj;
1617
1618 lenobj = createObject(REDIS_STRING,
0ea663ea 1619 sdscatprintf(sdsempty(),"%d\r\n",
1620 stringObjectLen(argv[j])));
ed9b544e 1621 lenobj->refcount = 0;
1622 outv[outc++] = lenobj;
1623 }
1624 outv[outc++] = argv[j];
1625 }
1626 outv[outc++] = shared.crlf;
1627
40d224a9 1628 /* Increment all the refcounts at start and decrement at end in order to
1629 * be sure to free objects if there is no slave in a replication state
1630 * able to be feed with commands */
1631 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
6208b3a7 1632 listRewind(slaves);
1633 while((ln = listYield(slaves))) {
ed9b544e 1634 redisClient *slave = ln->value;
40d224a9 1635
1636 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 1637 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 1638
1639 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 1640 if (slave->slaveseldb != dictid) {
1641 robj *selectcmd;
1642
1643 switch(dictid) {
1644 case 0: selectcmd = shared.select0; break;
1645 case 1: selectcmd = shared.select1; break;
1646 case 2: selectcmd = shared.select2; break;
1647 case 3: selectcmd = shared.select3; break;
1648 case 4: selectcmd = shared.select4; break;
1649 case 5: selectcmd = shared.select5; break;
1650 case 6: selectcmd = shared.select6; break;
1651 case 7: selectcmd = shared.select7; break;
1652 case 8: selectcmd = shared.select8; break;
1653 case 9: selectcmd = shared.select9; break;
1654 default:
1655 selectcmd = createObject(REDIS_STRING,
1656 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1657 selectcmd->refcount = 0;
1658 break;
1659 }
1660 addReply(slave,selectcmd);
1661 slave->slaveseldb = dictid;
1662 }
1663 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 1664 }
40d224a9 1665 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 1666 if (outv != static_outv) zfree(outv);
ed9b544e 1667}
1668
44b38ef4 1669/* TODO: translate EXPIREs into EXPIRETOs */
1670static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
1671 sds buf = sdsempty();
1672 int j;
1673 ssize_t nwritten;
48f0308a 1674 time_t now;
4e141d5a 1675 robj *tmpargv[3];
44b38ef4 1676
1677 /* The DB this command was targetting is not the same as the last command
1678 * we appendend. To issue a SELECT command is needed. */
1679 if (dictid != server.appendseldb) {
1680 char seldb[64];
1681
1682 snprintf(seldb,sizeof(seldb),"%d",dictid);
1683 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
1684 strlen(seldb),seldb);
16f92547 1685 server.appendseldb = dictid;
44b38ef4 1686 }
4e141d5a 1687
1688 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
1689 * EXPIREs into EXPIREATs calls */
1690 if (cmd->proc == expireCommand) {
1691 long when;
1692
1693 tmpargv[0] = createStringObject("EXPIREAT",8);
1694 tmpargv[1] = argv[1];
1695 incrRefCount(argv[1]);
1696 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
1697 tmpargv[2] = createObject(REDIS_STRING,
1698 sdscatprintf(sdsempty(),"%ld",when));
1699 argv = tmpargv;
1700 }
1701
44b38ef4 1702 /* Append the actual command */
1703 buf = sdscatprintf(buf,"*%d\r\n",argc);
1704 for (j = 0; j < argc; j++) {
1705 robj *o = argv[j];
1706
1707 if (o->encoding != REDIS_ENCODING_RAW)
1708 o = getDecodedObject(o);
1709 buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr));
1710 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
1711 buf = sdscatlen(buf,"\r\n",2);
1712 if (o != argv[j])
1713 decrRefCount(o);
1714 }
4e141d5a 1715
1716 /* Free the objects from the modified argv for EXPIREAT */
1717 if (cmd->proc == expireCommand) {
1718 for (j = 0; j < 3; j++)
1719 decrRefCount(argv[j]);
1720 }
1721
44b38ef4 1722 /* We want to perform a single write. This should be guaranteed atomic
1723 * at least if the filesystem we are writing is a real physical one.
1724 * While this will save us against the server being killed I don't think
1725 * there is much to do about the whole server stopping for power problems
1726 * or alike */
1727 nwritten = write(server.appendfd,buf,sdslen(buf));
71eba477 1728 if (nwritten != (signed)sdslen(buf)) {
44b38ef4 1729 /* Ooops, we are in troubles. The best thing to do for now is
1730 * to simply exit instead to give the illusion that everything is
1731 * working as expected. */
1732 if (nwritten == -1) {
1733 redisLog(REDIS_WARNING,"Aborting on error writing to the append-only file: %s",strerror(errno));
1734 } else {
1735 redisLog(REDIS_WARNING,"Aborting on short write while writing to the append-only file: %s",strerror(errno));
1736 }
1737 abort();
48f0308a 1738 }
1739 now = time(NULL);
1740 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
1741 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
1742 now-server.lastfsync > 1))
1743 {
1744 fsync(server.appendfd); /* Let's try to get this data on the disk */
1745 server.lastfsync = now;
1746 }
44b38ef4 1747}
1748
638e42ac 1749static void processInputBuffer(redisClient *c) {
ed9b544e 1750again:
1751 if (c->bulklen == -1) {
1752 /* Read the first line of the query */
1753 char *p = strchr(c->querybuf,'\n');
1754 size_t querylen;
644fafa3 1755
ed9b544e 1756 if (p) {
1757 sds query, *argv;
1758 int argc, j;
1759
1760 query = c->querybuf;
1761 c->querybuf = sdsempty();
1762 querylen = 1+(p-(query));
1763 if (sdslen(query) > querylen) {
1764 /* leave data after the first line of the query in the buffer */
1765 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1766 }
1767 *p = '\0'; /* remove "\n" */
1768 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1769 sdsupdatelen(query);
1770
1771 /* Now we can split the query in arguments */
1772 if (sdslen(query) == 0) {
1773 /* Ignore empty query */
1774 sdsfree(query);
1775 return;
1776 }
1777 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 1778 sdsfree(query);
1779
1780 if (c->argv) zfree(c->argv);
1781 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 1782
1783 for (j = 0; j < argc; j++) {
ed9b544e 1784 if (sdslen(argv[j])) {
1785 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1786 c->argc++;
1787 } else {
1788 sdsfree(argv[j]);
1789 }
1790 }
1791 zfree(argv);
1792 /* Execute the command. If the client is still valid
1793 * after processCommand() return and there is something
1794 * on the query buffer try to process the next command. */
af807d87 1795 if (c->argc && processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1796 return;
644fafa3 1797 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
ed9b544e 1798 redisLog(REDIS_DEBUG, "Client protocol error");
1799 freeClient(c);
1800 return;
1801 }
1802 } else {
1803 /* Bulk read handling. Note that if we are at this point
1804 the client already sent a command terminated with a newline,
1805 we are reading the bulk data that is actually the last
1806 argument of the command. */
1807 int qbl = sdslen(c->querybuf);
1808
1809 if (c->bulklen <= qbl) {
1810 /* Copy everything but the final CRLF as final argument */
1811 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1812 c->argc++;
1813 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 1814 /* Process the command. If the client is still valid after
1815 * the processing and there is more data in the buffer
1816 * try to parse it. */
1817 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 1818 return;
1819 }
1820 }
1821}
1822
638e42ac 1823static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1824 redisClient *c = (redisClient*) privdata;
1825 char buf[REDIS_IOBUF_LEN];
1826 int nread;
1827 REDIS_NOTUSED(el);
1828 REDIS_NOTUSED(mask);
1829
1830 nread = read(fd, buf, REDIS_IOBUF_LEN);
1831 if (nread == -1) {
1832 if (errno == EAGAIN) {
1833 nread = 0;
1834 } else {
1835 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1836 freeClient(c);
1837 return;
1838 }
1839 } else if (nread == 0) {
1840 redisLog(REDIS_DEBUG, "Client closed connection");
1841 freeClient(c);
1842 return;
1843 }
1844 if (nread) {
1845 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1846 c->lastinteraction = time(NULL);
1847 } else {
1848 return;
1849 }
1850 processInputBuffer(c);
1851}
1852
ed9b544e 1853static int selectDb(redisClient *c, int id) {
1854 if (id < 0 || id >= server.dbnum)
1855 return REDIS_ERR;
3305306f 1856 c->db = &server.db[id];
ed9b544e 1857 return REDIS_OK;
1858}
1859
40d224a9 1860static void *dupClientReplyValue(void *o) {
1861 incrRefCount((robj*)o);
1862 return 0;
1863}
1864
ed9b544e 1865static redisClient *createClient(int fd) {
1866 redisClient *c = zmalloc(sizeof(*c));
1867
1868 anetNonBlock(NULL,fd);
1869 anetTcpNoDelay(NULL,fd);
1870 if (!c) return NULL;
1871 selectDb(c,0);
1872 c->fd = fd;
1873 c->querybuf = sdsempty();
1874 c->argc = 0;
93ea3759 1875 c->argv = NULL;
ed9b544e 1876 c->bulklen = -1;
e8a74421 1877 c->multibulk = 0;
1878 c->mbargc = 0;
1879 c->mbargv = NULL;
ed9b544e 1880 c->sentlen = 0;
1881 c->flags = 0;
1882 c->lastinteraction = time(NULL);
abcb223e 1883 c->authenticated = 0;
40d224a9 1884 c->replstate = REDIS_REPL_NONE;
6b47e12e 1885 c->reply = listCreate();
ed9b544e 1886 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 1887 listSetDupMethod(c->reply,dupClientReplyValue);
ed9b544e 1888 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1889 readQueryFromClient, c, NULL) == AE_ERR) {
1890 freeClient(c);
1891 return NULL;
1892 }
6b47e12e 1893 listAddNodeTail(server.clients,c);
ed9b544e 1894 return c;
1895}
1896
1897static void addReply(redisClient *c, robj *obj) {
1898 if (listLength(c->reply) == 0 &&
6208b3a7 1899 (c->replstate == REDIS_REPL_NONE ||
1900 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 1901 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
1902 sendReplyToClient, c, NULL) == AE_ERR) return;
942a3961 1903 if (obj->encoding != REDIS_ENCODING_RAW) {
1904 obj = getDecodedObject(obj);
1905 } else {
1906 incrRefCount(obj);
1907 }
6b47e12e 1908 listAddNodeTail(c->reply,obj);
ed9b544e 1909}
1910
1911static void addReplySds(redisClient *c, sds s) {
1912 robj *o = createObject(REDIS_STRING,s);
1913 addReply(c,o);
1914 decrRefCount(o);
1915}
1916
942a3961 1917static void addReplyBulkLen(redisClient *c, robj *obj) {
1918 size_t len;
1919
1920 if (obj->encoding == REDIS_ENCODING_RAW) {
1921 len = sdslen(obj->ptr);
1922 } else {
1923 long n = (long)obj->ptr;
1924
1925 len = 1;
1926 if (n < 0) {
1927 len++;
1928 n = -n;
1929 }
1930 while((n = n/10) != 0) {
1931 len++;
1932 }
1933 }
1934 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",len));
1935}
1936
ed9b544e 1937static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
1938 int cport, cfd;
1939 char cip[128];
285add55 1940 redisClient *c;
ed9b544e 1941 REDIS_NOTUSED(el);
1942 REDIS_NOTUSED(mask);
1943 REDIS_NOTUSED(privdata);
1944
1945 cfd = anetAccept(server.neterr, fd, cip, &cport);
1946 if (cfd == AE_ERR) {
1947 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
1948 return;
1949 }
1950 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
285add55 1951 if ((c = createClient(cfd)) == NULL) {
ed9b544e 1952 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
1953 close(cfd); /* May be already closed, just ingore errors */
1954 return;
1955 }
285add55 1956 /* If maxclient directive is set and this is one client more... close the
1957 * connection. Note that we create the client instead to check before
1958 * for this condition, since now the socket is already set in nonblocking
1959 * mode and we can send an error for free using the Kernel I/O */
1960 if (server.maxclients && listLength(server.clients) > server.maxclients) {
1961 char *err = "-ERR max number of clients reached\r\n";
1962
1963 /* That's a best effort error message, don't check write errors */
d7fc9edb 1964 (void) write(c->fd,err,strlen(err));
285add55 1965 freeClient(c);
1966 return;
1967 }
ed9b544e 1968 server.stat_numconnections++;
1969}
1970
1971/* ======================= Redis objects implementation ===================== */
1972
1973static robj *createObject(int type, void *ptr) {
1974 robj *o;
1975
1976 if (listLength(server.objfreelist)) {
1977 listNode *head = listFirst(server.objfreelist);
1978 o = listNodeValue(head);
1979 listDelNode(server.objfreelist,head);
1980 } else {
1981 o = zmalloc(sizeof(*o));
1982 }
ed9b544e 1983 o->type = type;
942a3961 1984 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 1985 o->ptr = ptr;
1986 o->refcount = 1;
1987 return o;
1988}
1989
1990static robj *createStringObject(char *ptr, size_t len) {
1991 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
1992}
1993
1994static robj *createListObject(void) {
1995 list *l = listCreate();
1996
ed9b544e 1997 listSetFreeMethod(l,decrRefCount);
1998 return createObject(REDIS_LIST,l);
1999}
2000
2001static robj *createSetObject(void) {
2002 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 2003 return createObject(REDIS_SET,d);
2004}
2005
1812e024 2006static robj *createZsetObject(void) {
6b47e12e 2007 zset *zs = zmalloc(sizeof(*zs));
2008
2009 zs->dict = dictCreate(&zsetDictType,NULL);
2010 zs->zsl = zslCreate();
2011 return createObject(REDIS_ZSET,zs);
1812e024 2012}
2013
ed9b544e 2014static void freeStringObject(robj *o) {
942a3961 2015 if (o->encoding == REDIS_ENCODING_RAW) {
2016 sdsfree(o->ptr);
2017 }
ed9b544e 2018}
2019
2020static void freeListObject(robj *o) {
2021 listRelease((list*) o->ptr);
2022}
2023
2024static void freeSetObject(robj *o) {
2025 dictRelease((dict*) o->ptr);
2026}
2027
fd8ccf44 2028static void freeZsetObject(robj *o) {
2029 zset *zs = o->ptr;
2030
2031 dictRelease(zs->dict);
2032 zslFree(zs->zsl);
2033 zfree(zs);
2034}
2035
ed9b544e 2036static void freeHashObject(robj *o) {
2037 dictRelease((dict*) o->ptr);
2038}
2039
2040static void incrRefCount(robj *o) {
2041 o->refcount++;
94754ccc 2042#ifdef DEBUG_REFCOUNT
2043 if (o->type == REDIS_STRING)
2044 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
2045#endif
ed9b544e 2046}
2047
2048static void decrRefCount(void *obj) {
2049 robj *o = obj;
94754ccc 2050
2051#ifdef DEBUG_REFCOUNT
2052 if (o->type == REDIS_STRING)
2053 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
2054#endif
ed9b544e 2055 if (--(o->refcount) == 0) {
2056 switch(o->type) {
2057 case REDIS_STRING: freeStringObject(o); break;
2058 case REDIS_LIST: freeListObject(o); break;
2059 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 2060 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 2061 case REDIS_HASH: freeHashObject(o); break;
2062 default: assert(0 != 0); break;
2063 }
2064 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2065 !listAddNodeHead(server.objfreelist,o))
2066 zfree(o);
2067 }
2068}
2069
942a3961 2070static robj *lookupKey(redisDb *db, robj *key) {
2071 dictEntry *de = dictFind(db->dict,key);
2072 return de ? dictGetEntryVal(de) : NULL;
2073}
2074
2075static robj *lookupKeyRead(redisDb *db, robj *key) {
2076 expireIfNeeded(db,key);
2077 return lookupKey(db,key);
2078}
2079
2080static robj *lookupKeyWrite(redisDb *db, robj *key) {
2081 deleteIfVolatile(db,key);
2082 return lookupKey(db,key);
2083}
2084
2085static int deleteKey(redisDb *db, robj *key) {
2086 int retval;
2087
2088 /* We need to protect key from destruction: after the first dictDelete()
2089 * it may happen that 'key' is no longer valid if we don't increment
2090 * it's count. This may happen when we get the object reference directly
2091 * from the hash table with dictRandomKey() or dict iterators */
2092 incrRefCount(key);
2093 if (dictSize(db->expires)) dictDelete(db->expires,key);
2094 retval = dictDelete(db->dict,key);
2095 decrRefCount(key);
2096
2097 return retval == DICT_OK;
2098}
2099
10c43610 2100/* Try to share an object against the shared objects pool */
2101static robj *tryObjectSharing(robj *o) {
2102 struct dictEntry *de;
2103 unsigned long c;
2104
3305306f 2105 if (o == NULL || server.shareobjects == 0) return o;
10c43610 2106
2107 assert(o->type == REDIS_STRING);
2108 de = dictFind(server.sharingpool,o);
2109 if (de) {
2110 robj *shared = dictGetEntryKey(de);
2111
2112 c = ((unsigned long) dictGetEntryVal(de))+1;
2113 dictGetEntryVal(de) = (void*) c;
2114 incrRefCount(shared);
2115 decrRefCount(o);
2116 return shared;
2117 } else {
2118 /* Here we are using a stream algorihtm: Every time an object is
2119 * shared we increment its count, everytime there is a miss we
2120 * recrement the counter of a random object. If this object reaches
2121 * zero we remove the object and put the current object instead. */
3305306f 2122 if (dictSize(server.sharingpool) >=
10c43610 2123 server.sharingpoolsize) {
2124 de = dictGetRandomKey(server.sharingpool);
2125 assert(de != NULL);
2126 c = ((unsigned long) dictGetEntryVal(de))-1;
2127 dictGetEntryVal(de) = (void*) c;
2128 if (c == 0) {
2129 dictDelete(server.sharingpool,de->key);
2130 }
2131 } else {
2132 c = 0; /* If the pool is empty we want to add this object */
2133 }
2134 if (c == 0) {
2135 int retval;
2136
2137 retval = dictAdd(server.sharingpool,o,(void*)1);
2138 assert(retval == DICT_OK);
2139 incrRefCount(o);
2140 }
2141 return o;
2142 }
2143}
2144
724a51b1 2145/* Check if the nul-terminated string 's' can be represented by a long
2146 * (that is, is a number that fits into long without any other space or
2147 * character before or after the digits).
2148 *
2149 * If so, the function returns REDIS_OK and *longval is set to the value
2150 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 2151static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 2152 char buf[32], *endptr;
2153 long value;
2154 int slen;
2155
2156 value = strtol(s, &endptr, 10);
2157 if (endptr[0] != '\0') return REDIS_ERR;
2158 slen = snprintf(buf,32,"%ld",value);
2159
2160 /* If the number converted back into a string is not identical
2161 * then it's not possible to encode the string as integer */
f69f2cba 2162 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 2163 if (longval) *longval = value;
2164 return REDIS_OK;
2165}
2166
942a3961 2167/* Try to encode a string object in order to save space */
2168static int tryObjectEncoding(robj *o) {
2169 long value;
942a3961 2170 sds s = o->ptr;
3305306f 2171
942a3961 2172 if (o->encoding != REDIS_ENCODING_RAW)
2173 return REDIS_ERR; /* Already encoded */
3305306f 2174
942a3961 2175 /* It's not save to encode shared objects: shared objects can be shared
2176 * everywhere in the "object space" of Redis. Encoded objects can only
2177 * appear as "values" (and not, for instance, as keys) */
2178 if (o->refcount > 1) return REDIS_ERR;
3305306f 2179
942a3961 2180 /* Currently we try to encode only strings */
2181 assert(o->type == REDIS_STRING);
94754ccc 2182
724a51b1 2183 /* Check if we can represent this string as a long integer */
2184 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
942a3961 2185
2186 /* Ok, this object can be encoded */
2187 o->encoding = REDIS_ENCODING_INT;
2188 sdsfree(o->ptr);
2189 o->ptr = (void*) value;
2190 return REDIS_OK;
2191}
2192
2193/* Get a decoded version of an encoded object (returned as a new object) */
2194static robj *getDecodedObject(const robj *o) {
2195 robj *dec;
2196
2197 assert(o->encoding != REDIS_ENCODING_RAW);
2198 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2199 char buf[32];
2200
2201 snprintf(buf,32,"%ld",(long)o->ptr);
2202 dec = createStringObject(buf,strlen(buf));
2203 return dec;
2204 } else {
2205 assert(1 != 1);
2206 }
3305306f 2207}
2208
d7f43c08 2209/* Compare two string objects via strcmp() or alike.
2210 * Note that the objects may be integer-encoded. In such a case we
2211 * use snprintf() to get a string representation of the numbers on the stack
2212 * and compare the strings, it's much faster than calling getDecodedObject(). */
724a51b1 2213static int compareStringObjects(robj *a, robj *b) {
2214 assert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 2215 char bufa[128], bufb[128], *astr, *bstr;
2216 int bothsds = 1;
724a51b1 2217
e197b441 2218 if (a == b) return 0;
d7f43c08 2219 if (a->encoding != REDIS_ENCODING_RAW) {
2220 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2221 astr = bufa;
2222 bothsds = 0;
724a51b1 2223 } else {
d7f43c08 2224 astr = a->ptr;
724a51b1 2225 }
d7f43c08 2226 if (b->encoding != REDIS_ENCODING_RAW) {
2227 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2228 bstr = bufb;
2229 bothsds = 0;
2230 } else {
2231 bstr = b->ptr;
2232 }
2233 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 2234}
2235
0ea663ea 2236static size_t stringObjectLen(robj *o) {
2237 assert(o->type == REDIS_STRING);
2238 if (o->encoding == REDIS_ENCODING_RAW) {
2239 return sdslen(o->ptr);
2240 } else {
2241 char buf[32];
2242
2243 return snprintf(buf,32,"%ld",(long)o->ptr);
2244 }
2245}
2246
ed9b544e 2247/*============================ DB saving/loading ============================ */
2248
f78fd11b 2249static int rdbSaveType(FILE *fp, unsigned char type) {
2250 if (fwrite(&type,1,1,fp) == 0) return -1;
2251 return 0;
2252}
2253
bb32ede5 2254static int rdbSaveTime(FILE *fp, time_t t) {
2255 int32_t t32 = (int32_t) t;
2256 if (fwrite(&t32,4,1,fp) == 0) return -1;
2257 return 0;
2258}
2259
e3566d4b 2260/* check rdbLoadLen() comments for more info */
f78fd11b 2261static int rdbSaveLen(FILE *fp, uint32_t len) {
2262 unsigned char buf[2];
2263
2264 if (len < (1<<6)) {
2265 /* Save a 6 bit len */
10c43610 2266 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 2267 if (fwrite(buf,1,1,fp) == 0) return -1;
2268 } else if (len < (1<<14)) {
2269 /* Save a 14 bit len */
10c43610 2270 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 2271 buf[1] = len&0xFF;
17be1a4a 2272 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 2273 } else {
2274 /* Save a 32 bit len */
10c43610 2275 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 2276 if (fwrite(buf,1,1,fp) == 0) return -1;
2277 len = htonl(len);
2278 if (fwrite(&len,4,1,fp) == 0) return -1;
2279 }
2280 return 0;
2281}
2282
e3566d4b 2283/* String objects in the form "2391" "-100" without any space and with a
2284 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2285 * encoded as integers to save space */
56906eef 2286static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
e3566d4b 2287 long long value;
2288 char *endptr, buf[32];
2289
2290 /* Check if it's possible to encode this value as a number */
2291 value = strtoll(s, &endptr, 10);
2292 if (endptr[0] != '\0') return 0;
2293 snprintf(buf,32,"%lld",value);
2294
2295 /* If the number converted back into a string is not identical
2296 * then it's not possible to encode the string as integer */
2297 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2298
2299 /* Finally check if it fits in our ranges */
2300 if (value >= -(1<<7) && value <= (1<<7)-1) {
2301 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2302 enc[1] = value&0xFF;
2303 return 2;
2304 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2305 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2306 enc[1] = value&0xFF;
2307 enc[2] = (value>>8)&0xFF;
2308 return 3;
2309 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2310 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2311 enc[1] = value&0xFF;
2312 enc[2] = (value>>8)&0xFF;
2313 enc[3] = (value>>16)&0xFF;
2314 enc[4] = (value>>24)&0xFF;
2315 return 5;
2316 } else {
2317 return 0;
2318 }
2319}
2320
774e3047 2321static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2322 unsigned int comprlen, outlen;
2323 unsigned char byte;
2324 void *out;
2325
2326 /* We require at least four bytes compression for this to be worth it */
2327 outlen = sdslen(obj->ptr)-4;
2328 if (outlen <= 0) return 0;
3a2694c4 2329 if ((out = zmalloc(outlen+1)) == NULL) return 0;
774e3047 2330 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2331 if (comprlen == 0) {
88e85998 2332 zfree(out);
774e3047 2333 return 0;
2334 }
2335 /* Data compressed! Let's save it on disk */
2336 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2337 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2338 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2339 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2340 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 2341 zfree(out);
774e3047 2342 return comprlen;
2343
2344writeerr:
88e85998 2345 zfree(out);
774e3047 2346 return -1;
2347}
2348
e3566d4b 2349/* Save a string objet as [len][data] on disk. If the object is a string
2350 * representation of an integer value we try to safe it in a special form */
942a3961 2351static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2352 size_t len;
e3566d4b 2353 int enclen;
10c43610 2354
942a3961 2355 len = sdslen(obj->ptr);
2356
774e3047 2357 /* Try integer encoding */
e3566d4b 2358 if (len <= 11) {
2359 unsigned char buf[5];
2360 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2361 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2362 return 0;
2363 }
2364 }
774e3047 2365
2366 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 2367 * aaaaaaaaaaaaaaaaaa so skip it */
942a3961 2368 if (len > 20) {
774e3047 2369 int retval;
2370
2371 retval = rdbSaveLzfStringObject(fp,obj);
2372 if (retval == -1) return -1;
2373 if (retval > 0) return 0;
2374 /* retval == 0 means data can't be compressed, save the old way */
2375 }
2376
2377 /* Store verbatim */
10c43610 2378 if (rdbSaveLen(fp,len) == -1) return -1;
2379 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2380 return 0;
2381}
2382
942a3961 2383/* Like rdbSaveStringObjectRaw() but handle encoded objects */
2384static int rdbSaveStringObject(FILE *fp, robj *obj) {
2385 int retval;
2386 robj *dec;
2387
2388 if (obj->encoding != REDIS_ENCODING_RAW) {
2389 dec = getDecodedObject(obj);
2390 retval = rdbSaveStringObjectRaw(fp,dec);
2391 decrRefCount(dec);
2392 return retval;
2393 } else {
2394 return rdbSaveStringObjectRaw(fp,obj);
2395 }
2396}
2397
a7866db6 2398/* Save a double value. Doubles are saved as strings prefixed by an unsigned
2399 * 8 bit integer specifing the length of the representation.
2400 * This 8 bit integer has special values in order to specify the following
2401 * conditions:
2402 * 253: not a number
2403 * 254: + inf
2404 * 255: - inf
2405 */
2406static int rdbSaveDoubleValue(FILE *fp, double val) {
2407 unsigned char buf[128];
2408 int len;
2409
2410 if (isnan(val)) {
2411 buf[0] = 253;
2412 len = 1;
2413 } else if (!isfinite(val)) {
2414 len = 1;
2415 buf[0] = (val < 0) ? 255 : 254;
2416 } else {
2417 snprintf((char*)buf+1,sizeof(buf)-1,"%.16g",val);
2418 buf[0] = strlen((char*)buf);
2419 len = buf[0]+1;
2420 }
2421 if (fwrite(buf,len,1,fp) == 0) return -1;
2422 return 0;
2423}
2424
ed9b544e 2425/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 2426static int rdbSave(char *filename) {
ed9b544e 2427 dictIterator *di = NULL;
2428 dictEntry *de;
ed9b544e 2429 FILE *fp;
2430 char tmpfile[256];
2431 int j;
bb32ede5 2432 time_t now = time(NULL);
ed9b544e 2433
a3b21203 2434 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 2435 fp = fopen(tmpfile,"w");
2436 if (!fp) {
2437 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2438 return REDIS_ERR;
2439 }
f78fd11b 2440 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 2441 for (j = 0; j < server.dbnum; j++) {
bb32ede5 2442 redisDb *db = server.db+j;
2443 dict *d = db->dict;
3305306f 2444 if (dictSize(d) == 0) continue;
ed9b544e 2445 di = dictGetIterator(d);
2446 if (!di) {
2447 fclose(fp);
2448 return REDIS_ERR;
2449 }
2450
2451 /* Write the SELECT DB opcode */
f78fd11b 2452 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2453 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 2454
2455 /* Iterate this DB writing every entry */
2456 while((de = dictNext(di)) != NULL) {
2457 robj *key = dictGetEntryKey(de);
2458 robj *o = dictGetEntryVal(de);
bb32ede5 2459 time_t expiretime = getExpire(db,key);
2460
2461 /* Save the expire time */
2462 if (expiretime != -1) {
2463 /* If this key is already expired skip it */
2464 if (expiretime < now) continue;
2465 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2466 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2467 }
2468 /* Save the key and associated value */
f78fd11b 2469 if (rdbSaveType(fp,o->type) == -1) goto werr;
10c43610 2470 if (rdbSaveStringObject(fp,key) == -1) goto werr;
f78fd11b 2471 if (o->type == REDIS_STRING) {
ed9b544e 2472 /* Save a string value */
10c43610 2473 if (rdbSaveStringObject(fp,o) == -1) goto werr;
f78fd11b 2474 } else if (o->type == REDIS_LIST) {
ed9b544e 2475 /* Save a list value */
2476 list *list = o->ptr;
6208b3a7 2477 listNode *ln;
ed9b544e 2478
6208b3a7 2479 listRewind(list);
f78fd11b 2480 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
6208b3a7 2481 while((ln = listYield(list))) {
ed9b544e 2482 robj *eleobj = listNodeValue(ln);
f78fd11b 2483
10c43610 2484 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2485 }
f78fd11b 2486 } else if (o->type == REDIS_SET) {
ed9b544e 2487 /* Save a set value */
2488 dict *set = o->ptr;
2489 dictIterator *di = dictGetIterator(set);
2490 dictEntry *de;
2491
3305306f 2492 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
ed9b544e 2493 while((de = dictNext(di)) != NULL) {
10c43610 2494 robj *eleobj = dictGetEntryKey(de);
ed9b544e 2495
10c43610 2496 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
ed9b544e 2497 }
2498 dictReleaseIterator(di);
2b59cfdf 2499 } else if (o->type == REDIS_ZSET) {
2500 /* Save a set value */
2501 zset *zs = o->ptr;
2502 dictIterator *di = dictGetIterator(zs->dict);
2503 dictEntry *de;
2504
2505 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
2506 while((de = dictNext(di)) != NULL) {
2507 robj *eleobj = dictGetEntryKey(de);
2508 double *score = dictGetEntryVal(de);
2509
2510 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2511 if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
2512 }
2513 dictReleaseIterator(di);
ed9b544e 2514 } else {
2515 assert(0 != 0);
2516 }
2517 }
2518 dictReleaseIterator(di);
2519 }
2520 /* EOF opcode */
f78fd11b 2521 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2522
2523 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 2524 fflush(fp);
2525 fsync(fileno(fp));
2526 fclose(fp);
2527
2528 /* Use RENAME to make sure the DB file is changed atomically only
2529 * if the generate DB file is ok. */
2530 if (rename(tmpfile,filename) == -1) {
325d1eb4 2531 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 2532 unlink(tmpfile);
2533 return REDIS_ERR;
2534 }
2535 redisLog(REDIS_NOTICE,"DB saved on disk");
2536 server.dirty = 0;
2537 server.lastsave = time(NULL);
2538 return REDIS_OK;
2539
2540werr:
2541 fclose(fp);
2542 unlink(tmpfile);
2543 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2544 if (di) dictReleaseIterator(di);
2545 return REDIS_ERR;
2546}
2547
f78fd11b 2548static int rdbSaveBackground(char *filename) {
ed9b544e 2549 pid_t childpid;
2550
2551 if (server.bgsaveinprogress) return REDIS_ERR;
2552 if ((childpid = fork()) == 0) {
2553 /* Child */
2554 close(server.fd);
f78fd11b 2555 if (rdbSave(filename) == REDIS_OK) {
ed9b544e 2556 exit(0);
2557 } else {
2558 exit(1);
2559 }
2560 } else {
2561 /* Parent */
5a7c647e 2562 if (childpid == -1) {
2563 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2564 strerror(errno));
2565 return REDIS_ERR;
2566 }
ed9b544e 2567 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
2568 server.bgsaveinprogress = 1;
9f3c422c 2569 server.bgsavechildpid = childpid;
ed9b544e 2570 return REDIS_OK;
2571 }
2572 return REDIS_OK; /* unreached */
2573}
2574
a3b21203 2575static void rdbRemoveTempFile(pid_t childpid) {
2576 char tmpfile[256];
2577
2578 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2579 unlink(tmpfile);
2580}
2581
f78fd11b 2582static int rdbLoadType(FILE *fp) {
2583 unsigned char type;
7b45bfb2 2584 if (fread(&type,1,1,fp) == 0) return -1;
2585 return type;
2586}
2587
bb32ede5 2588static time_t rdbLoadTime(FILE *fp) {
2589 int32_t t32;
2590 if (fread(&t32,4,1,fp) == 0) return -1;
2591 return (time_t) t32;
2592}
2593
e3566d4b 2594/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
2595 * of this file for a description of how this are stored on disk.
2596 *
2597 * isencoded is set to 1 if the readed length is not actually a length but
2598 * an "encoding type", check the above comments for more info */
2599static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
f78fd11b 2600 unsigned char buf[2];
2601 uint32_t len;
2602
e3566d4b 2603 if (isencoded) *isencoded = 0;
f78fd11b 2604 if (rdbver == 0) {
2605 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2606 return ntohl(len);
2607 } else {
17be1a4a 2608 int type;
2609
f78fd11b 2610 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
17be1a4a 2611 type = (buf[0]&0xC0)>>6;
2612 if (type == REDIS_RDB_6BITLEN) {
f78fd11b 2613 /* Read a 6 bit len */
e3566d4b 2614 return buf[0]&0x3F;
2615 } else if (type == REDIS_RDB_ENCVAL) {
2616 /* Read a 6 bit len encoding type */
2617 if (isencoded) *isencoded = 1;
2618 return buf[0]&0x3F;
17be1a4a 2619 } else if (type == REDIS_RDB_14BITLEN) {
f78fd11b 2620 /* Read a 14 bit len */
2621 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
2622 return ((buf[0]&0x3F)<<8)|buf[1];
2623 } else {
2624 /* Read a 32 bit len */
2625 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2626 return ntohl(len);
2627 }
2628 }
f78fd11b 2629}
2630
e3566d4b 2631static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
2632 unsigned char enc[4];
2633 long long val;
2634
2635 if (enctype == REDIS_RDB_ENC_INT8) {
2636 if (fread(enc,1,1,fp) == 0) return NULL;
2637 val = (signed char)enc[0];
2638 } else if (enctype == REDIS_RDB_ENC_INT16) {
2639 uint16_t v;
2640 if (fread(enc,2,1,fp) == 0) return NULL;
2641 v = enc[0]|(enc[1]<<8);
2642 val = (int16_t)v;
2643 } else if (enctype == REDIS_RDB_ENC_INT32) {
2644 uint32_t v;
2645 if (fread(enc,4,1,fp) == 0) return NULL;
2646 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2647 val = (int32_t)v;
2648 } else {
2649 val = 0; /* anti-warning */
2650 assert(0!=0);
2651 }
2652 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2653}
2654
88e85998 2655static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2656 unsigned int len, clen;
2657 unsigned char *c = NULL;
2658 sds val = NULL;
2659
2660 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2661 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2662 if ((c = zmalloc(clen)) == NULL) goto err;
2663 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2664 if (fread(c,clen,1,fp) == 0) goto err;
2665 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 2666 zfree(c);
88e85998 2667 return createObject(REDIS_STRING,val);
2668err:
2669 zfree(c);
2670 sdsfree(val);
2671 return NULL;
2672}
2673
e3566d4b 2674static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2675 int isencoded;
2676 uint32_t len;
f78fd11b 2677 sds val;
2678
e3566d4b 2679 len = rdbLoadLen(fp,rdbver,&isencoded);
2680 if (isencoded) {
2681 switch(len) {
2682 case REDIS_RDB_ENC_INT8:
2683 case REDIS_RDB_ENC_INT16:
2684 case REDIS_RDB_ENC_INT32:
3305306f 2685 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
88e85998 2686 case REDIS_RDB_ENC_LZF:
2687 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
e3566d4b 2688 default:
2689 assert(0!=0);
2690 }
2691 }
2692
f78fd11b 2693 if (len == REDIS_RDB_LENERR) return NULL;
2694 val = sdsnewlen(NULL,len);
2695 if (len && fread(val,len,1,fp) == 0) {
2696 sdsfree(val);
2697 return NULL;
2698 }
10c43610 2699 return tryObjectSharing(createObject(REDIS_STRING,val));
f78fd11b 2700}
2701
a7866db6 2702/* For information about double serialization check rdbSaveDoubleValue() */
2703static int rdbLoadDoubleValue(FILE *fp, double *val) {
2704 char buf[128];
2705 unsigned char len;
2706
2707 if (fread(&len,1,1,fp) == 0) return -1;
2708 switch(len) {
2709 case 255: *val = R_NegInf; return 0;
2710 case 254: *val = R_PosInf; return 0;
2711 case 253: *val = R_Nan; return 0;
2712 default:
2713 if (fread(buf,len,1,fp) == 0) return -1;
2714 sscanf(buf, "%lg", val);
2715 return 0;
2716 }
2717}
2718
f78fd11b 2719static int rdbLoad(char *filename) {
ed9b544e 2720 FILE *fp;
f78fd11b 2721 robj *keyobj = NULL;
2722 uint32_t dbid;
bb32ede5 2723 int type, retval, rdbver;
3305306f 2724 dict *d = server.db[0].dict;
bb32ede5 2725 redisDb *db = server.db+0;
f78fd11b 2726 char buf[1024];
bb32ede5 2727 time_t expiretime = -1, now = time(NULL);
2728
ed9b544e 2729 fp = fopen(filename,"r");
2730 if (!fp) return REDIS_ERR;
2731 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 2732 buf[9] = '\0';
2733 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 2734 fclose(fp);
2735 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2736 return REDIS_ERR;
2737 }
f78fd11b 2738 rdbver = atoi(buf+5);
2739 if (rdbver > 1) {
2740 fclose(fp);
2741 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2742 return REDIS_ERR;
2743 }
ed9b544e 2744 while(1) {
2745 robj *o;
2746
2747 /* Read type. */
f78fd11b 2748 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 2749 if (type == REDIS_EXPIRETIME) {
2750 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2751 /* We read the time so we need to read the object type again */
2752 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2753 }
ed9b544e 2754 if (type == REDIS_EOF) break;
2755 /* Handle SELECT DB opcode as a special case */
2756 if (type == REDIS_SELECTDB) {
e3566d4b 2757 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2758 goto eoferr;
ed9b544e 2759 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 2760 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 2761 exit(1);
2762 }
bb32ede5 2763 db = server.db+dbid;
2764 d = db->dict;
ed9b544e 2765 continue;
2766 }
2767 /* Read key */
f78fd11b 2768 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
ed9b544e 2769
2770 if (type == REDIS_STRING) {
2771 /* Read string value */
f78fd11b 2772 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2773 tryObjectEncoding(o);
ed9b544e 2774 } else if (type == REDIS_LIST || type == REDIS_SET) {
2775 /* Read list/set value */
2776 uint32_t listlen;
f78fd11b 2777
e3566d4b 2778 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
f78fd11b 2779 goto eoferr;
ed9b544e 2780 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2781 /* Load every single element of the list/set */
2782 while(listlen--) {
2783 robj *ele;
2784
f78fd11b 2785 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
942a3961 2786 tryObjectEncoding(ele);
ed9b544e 2787 if (type == REDIS_LIST) {
6b47e12e 2788 listAddNodeTail((list*)o->ptr,ele);
ed9b544e 2789 } else {
6b47e12e 2790 dictAdd((dict*)o->ptr,ele,NULL);
ed9b544e 2791 }
ed9b544e 2792 }
2b59cfdf 2793 } else if (type == REDIS_ZSET) {
2794 /* Read list/set value */
2795 uint32_t zsetlen;
2796 zset *zs;
2797
2798 if ((zsetlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2799 goto eoferr;
2800 o = createZsetObject();
2801 zs = o->ptr;
2802 /* Load every single element of the list/set */
2803 while(zsetlen--) {
2804 robj *ele;
2805 double *score = zmalloc(sizeof(double));
2806
2807 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2808 tryObjectEncoding(ele);
2809 if (rdbLoadDoubleValue(fp,score) == -1) goto eoferr;
2810 dictAdd(zs->dict,ele,score);
2811 zslInsert(zs->zsl,*score,ele);
2812 incrRefCount(ele); /* added to skiplist */
2813 }
ed9b544e 2814 } else {
2815 assert(0 != 0);
2816 }
2817 /* Add the new object in the hash table */
f78fd11b 2818 retval = dictAdd(d,keyobj,o);
ed9b544e 2819 if (retval == DICT_ERR) {
f78fd11b 2820 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
ed9b544e 2821 exit(1);
2822 }
bb32ede5 2823 /* Set the expire time if needed */
2824 if (expiretime != -1) {
2825 setExpire(db,keyobj,expiretime);
2826 /* Delete this key if already expired */
2827 if (expiretime < now) deleteKey(db,keyobj);
2828 expiretime = -1;
2829 }
f78fd11b 2830 keyobj = o = NULL;
ed9b544e 2831 }
2832 fclose(fp);
2833 return REDIS_OK;
2834
2835eoferr: /* unexpected end of file is handled here with a fatal exit */
e3566d4b 2836 if (keyobj) decrRefCount(keyobj);
2837 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, exiting now.");
ed9b544e 2838 exit(1);
2839 return REDIS_ERR; /* Just to avoid warning */
2840}
2841
2842/*================================== Commands =============================== */
2843
abcb223e 2844static void authCommand(redisClient *c) {
2e77c2ee 2845 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
2846 c->authenticated = 1;
2847 addReply(c,shared.ok);
2848 } else {
2849 c->authenticated = 0;
fa4c0aba 2850 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
2851 }
2852}
2853
ed9b544e 2854static void pingCommand(redisClient *c) {
2855 addReply(c,shared.pong);
2856}
2857
2858static void echoCommand(redisClient *c) {
942a3961 2859 addReplyBulkLen(c,c->argv[1]);
ed9b544e 2860 addReply(c,c->argv[1]);
2861 addReply(c,shared.crlf);
2862}
2863
2864/*=================================== Strings =============================== */
2865
2866static void setGenericCommand(redisClient *c, int nx) {
2867 int retval;
2868
3305306f 2869 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2870 if (retval == DICT_ERR) {
2871 if (!nx) {
3305306f 2872 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 2873 incrRefCount(c->argv[2]);
2874 } else {
c937aa89 2875 addReply(c,shared.czero);
ed9b544e 2876 return;
2877 }
2878 } else {
2879 incrRefCount(c->argv[1]);
2880 incrRefCount(c->argv[2]);
2881 }
2882 server.dirty++;
3305306f 2883 removeExpire(c->db,c->argv[1]);
c937aa89 2884 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 2885}
2886
2887static void setCommand(redisClient *c) {
a4d1ba9a 2888 setGenericCommand(c,0);
ed9b544e 2889}
2890
2891static void setnxCommand(redisClient *c) {
a4d1ba9a 2892 setGenericCommand(c,1);
ed9b544e 2893}
2894
2895static void getCommand(redisClient *c) {
3305306f 2896 robj *o = lookupKeyRead(c->db,c->argv[1]);
2897
2898 if (o == NULL) {
c937aa89 2899 addReply(c,shared.nullbulk);
ed9b544e 2900 } else {
ed9b544e 2901 if (o->type != REDIS_STRING) {
c937aa89 2902 addReply(c,shared.wrongtypeerr);
ed9b544e 2903 } else {
942a3961 2904 addReplyBulkLen(c,o);
ed9b544e 2905 addReply(c,o);
2906 addReply(c,shared.crlf);
2907 }
2908 }
2909}
2910
f6b141c5 2911static void getsetCommand(redisClient *c) {
a431eb74 2912 getCommand(c);
2913 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
2914 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2915 } else {
2916 incrRefCount(c->argv[1]);
2917 }
2918 incrRefCount(c->argv[2]);
2919 server.dirty++;
2920 removeExpire(c->db,c->argv[1]);
2921}
2922
70003d28 2923static void mgetCommand(redisClient *c) {
70003d28 2924 int j;
2925
c937aa89 2926 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 2927 for (j = 1; j < c->argc; j++) {
3305306f 2928 robj *o = lookupKeyRead(c->db,c->argv[j]);
2929 if (o == NULL) {
c937aa89 2930 addReply(c,shared.nullbulk);
70003d28 2931 } else {
70003d28 2932 if (o->type != REDIS_STRING) {
c937aa89 2933 addReply(c,shared.nullbulk);
70003d28 2934 } else {
942a3961 2935 addReplyBulkLen(c,o);
70003d28 2936 addReply(c,o);
2937 addReply(c,shared.crlf);
2938 }
2939 }
2940 }
2941}
2942
d68ed120 2943static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 2944 long long value;
2945 int retval;
2946 robj *o;
2947
3305306f 2948 o = lookupKeyWrite(c->db,c->argv[1]);
2949 if (o == NULL) {
ed9b544e 2950 value = 0;
2951 } else {
ed9b544e 2952 if (o->type != REDIS_STRING) {
2953 value = 0;
2954 } else {
2955 char *eptr;
2956
942a3961 2957 if (o->encoding == REDIS_ENCODING_RAW)
2958 value = strtoll(o->ptr, &eptr, 10);
2959 else if (o->encoding == REDIS_ENCODING_INT)
2960 value = (long)o->ptr;
2961 else
2962 assert(1 != 1);
ed9b544e 2963 }
2964 }
2965
2966 value += incr;
2967 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
942a3961 2968 tryObjectEncoding(o);
3305306f 2969 retval = dictAdd(c->db->dict,c->argv[1],o);
ed9b544e 2970 if (retval == DICT_ERR) {
3305306f 2971 dictReplace(c->db->dict,c->argv[1],o);
2972 removeExpire(c->db,c->argv[1]);
ed9b544e 2973 } else {
2974 incrRefCount(c->argv[1]);
2975 }
2976 server.dirty++;
c937aa89 2977 addReply(c,shared.colon);
ed9b544e 2978 addReply(c,o);
2979 addReply(c,shared.crlf);
2980}
2981
2982static void incrCommand(redisClient *c) {
a4d1ba9a 2983 incrDecrCommand(c,1);
ed9b544e 2984}
2985
2986static void decrCommand(redisClient *c) {
a4d1ba9a 2987 incrDecrCommand(c,-1);
ed9b544e 2988}
2989
2990static void incrbyCommand(redisClient *c) {
d68ed120 2991 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 2992 incrDecrCommand(c,incr);
ed9b544e 2993}
2994
2995static void decrbyCommand(redisClient *c) {
d68ed120 2996 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 2997 incrDecrCommand(c,-incr);
ed9b544e 2998}
2999
3000/* ========================= Type agnostic commands ========================= */
3001
3002static void delCommand(redisClient *c) {
5109cdff 3003 int deleted = 0, j;
3004
3005 for (j = 1; j < c->argc; j++) {
3006 if (deleteKey(c->db,c->argv[j])) {
3007 server.dirty++;
3008 deleted++;
3009 }
3010 }
3011 switch(deleted) {
3012 case 0:
c937aa89 3013 addReply(c,shared.czero);
5109cdff 3014 break;
3015 case 1:
3016 addReply(c,shared.cone);
3017 break;
3018 default:
3019 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
3020 break;
ed9b544e 3021 }
3022}
3023
3024static void existsCommand(redisClient *c) {
3305306f 3025 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
ed9b544e 3026}
3027
3028static void selectCommand(redisClient *c) {
3029 int id = atoi(c->argv[1]->ptr);
3030
3031 if (selectDb(c,id) == REDIS_ERR) {
774e3047 3032 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 3033 } else {
3034 addReply(c,shared.ok);
3035 }
3036}
3037
3038static void randomkeyCommand(redisClient *c) {
3039 dictEntry *de;
3305306f 3040
3041 while(1) {
3042 de = dictGetRandomKey(c->db->dict);
ce7bef07 3043 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3305306f 3044 }
ed9b544e 3045 if (de == NULL) {
ce7bef07 3046 addReply(c,shared.plus);
ed9b544e 3047 addReply(c,shared.crlf);
3048 } else {
c937aa89 3049 addReply(c,shared.plus);
ed9b544e 3050 addReply(c,dictGetEntryKey(de));
3051 addReply(c,shared.crlf);
3052 }
3053}
3054
3055static void keysCommand(redisClient *c) {
3056 dictIterator *di;
3057 dictEntry *de;
3058 sds pattern = c->argv[1]->ptr;
3059 int plen = sdslen(pattern);
3060 int numkeys = 0, keyslen = 0;
3061 robj *lenobj = createObject(REDIS_STRING,NULL);
3062
3305306f 3063 di = dictGetIterator(c->db->dict);
ed9b544e 3064 addReply(c,lenobj);
3065 decrRefCount(lenobj);
3066 while((de = dictNext(di)) != NULL) {
3067 robj *keyobj = dictGetEntryKey(de);
3305306f 3068
ed9b544e 3069 sds key = keyobj->ptr;
3070 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3071 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3305306f 3072 if (expireIfNeeded(c->db,keyobj) == 0) {
3073 if (numkeys != 0)
3074 addReply(c,shared.space);
3075 addReply(c,keyobj);
3076 numkeys++;
3077 keyslen += sdslen(key);
3078 }
ed9b544e 3079 }
3080 }
3081 dictReleaseIterator(di);
c937aa89 3082 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
ed9b544e 3083 addReply(c,shared.crlf);
3084}
3085
3086static void dbsizeCommand(redisClient *c) {
3087 addReplySds(c,
3305306f 3088 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 3089}
3090
3091static void lastsaveCommand(redisClient *c) {
3092 addReplySds(c,
c937aa89 3093 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 3094}
3095
3096static void typeCommand(redisClient *c) {
3305306f 3097 robj *o;
ed9b544e 3098 char *type;
3305306f 3099
3100 o = lookupKeyRead(c->db,c->argv[1]);
3101 if (o == NULL) {
c937aa89 3102 type = "+none";
ed9b544e 3103 } else {
ed9b544e 3104 switch(o->type) {
c937aa89 3105 case REDIS_STRING: type = "+string"; break;
3106 case REDIS_LIST: type = "+list"; break;
3107 case REDIS_SET: type = "+set"; break;
ed9b544e 3108 default: type = "unknown"; break;
3109 }
3110 }
3111 addReplySds(c,sdsnew(type));
3112 addReply(c,shared.crlf);
3113}
3114
3115static void saveCommand(redisClient *c) {
05557f6d 3116 if (server.bgsaveinprogress) {
3117 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3118 return;
3119 }
f78fd11b 3120 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 3121 addReply(c,shared.ok);
3122 } else {
3123 addReply(c,shared.err);
3124 }
3125}
3126
3127static void bgsaveCommand(redisClient *c) {
3128 if (server.bgsaveinprogress) {
3129 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3130 return;
3131 }
f78fd11b 3132 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
ed9b544e 3133 addReply(c,shared.ok);
3134 } else {
3135 addReply(c,shared.err);
3136 }
3137}
3138
3139static void shutdownCommand(redisClient *c) {
3140 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
a3b21203 3141 /* Kill the saving child if there is a background saving in progress.
3142 We want to avoid race conditions, for instance our saving child may
3143 overwrite the synchronous saving did by SHUTDOWN. */
9f3c422c 3144 if (server.bgsaveinprogress) {
3145 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3146 kill(server.bgsavechildpid,SIGKILL);
a3b21203 3147 rdbRemoveTempFile(server.bgsavechildpid);
9f3c422c 3148 }
a3b21203 3149 /* SYNC SAVE */
f78fd11b 3150 if (rdbSave(server.dbfilename) == REDIS_OK) {
9f3c422c 3151 if (server.daemonize)
b284af55 3152 unlink(server.pidfile);
b284af55 3153 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
ed9b544e 3154 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3155 exit(1);
3156 } else {
a3b21203 3157 /* Ooops.. error saving! The best we can do is to continue operating.
3158 * Note that if there was a background saving process, in the next
3159 * cron() Redis will be notified that the background saving aborted,
3160 * handling special stuff like slaves pending for synchronization... */
ed9b544e 3161 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3162 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3163 }
3164}
3165
3166static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 3167 robj *o;
3168
3169 /* To use the same key as src and dst is probably an error */
3170 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 3171 addReply(c,shared.sameobjecterr);
ed9b544e 3172 return;
3173 }
3174
3305306f 3175 o = lookupKeyWrite(c->db,c->argv[1]);
3176 if (o == NULL) {
c937aa89 3177 addReply(c,shared.nokeyerr);
ed9b544e 3178 return;
3179 }
ed9b544e 3180 incrRefCount(o);
3305306f 3181 deleteIfVolatile(c->db,c->argv[2]);
3182 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
ed9b544e 3183 if (nx) {
3184 decrRefCount(o);
c937aa89 3185 addReply(c,shared.czero);
ed9b544e 3186 return;
3187 }
3305306f 3188 dictReplace(c->db->dict,c->argv[2],o);
ed9b544e 3189 } else {
3190 incrRefCount(c->argv[2]);
3191 }
3305306f 3192 deleteKey(c->db,c->argv[1]);
ed9b544e 3193 server.dirty++;
c937aa89 3194 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 3195}
3196
3197static void renameCommand(redisClient *c) {
3198 renameGenericCommand(c,0);
3199}
3200
3201static void renamenxCommand(redisClient *c) {
3202 renameGenericCommand(c,1);
3203}
3204
3205static void moveCommand(redisClient *c) {
3305306f 3206 robj *o;
3207 redisDb *src, *dst;
ed9b544e 3208 int srcid;
3209
3210 /* Obtain source and target DB pointers */
3305306f 3211 src = c->db;
3212 srcid = c->db->id;
ed9b544e 3213 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 3214 addReply(c,shared.outofrangeerr);
ed9b544e 3215 return;
3216 }
3305306f 3217 dst = c->db;
3218 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 3219
3220 /* If the user is moving using as target the same
3221 * DB as the source DB it is probably an error. */
3222 if (src == dst) {
c937aa89 3223 addReply(c,shared.sameobjecterr);
ed9b544e 3224 return;
3225 }
3226
3227 /* Check if the element exists and get a reference */
3305306f 3228 o = lookupKeyWrite(c->db,c->argv[1]);
3229 if (!o) {
c937aa89 3230 addReply(c,shared.czero);
ed9b544e 3231 return;
3232 }
3233
3234 /* Try to add the element to the target DB */
3305306f 3235 deleteIfVolatile(dst,c->argv[1]);
3236 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
c937aa89 3237 addReply(c,shared.czero);
ed9b544e 3238 return;
3239 }
3305306f 3240 incrRefCount(c->argv[1]);
ed9b544e 3241 incrRefCount(o);
3242
3243 /* OK! key moved, free the entry in the source DB */
3305306f 3244 deleteKey(src,c->argv[1]);
ed9b544e 3245 server.dirty++;
c937aa89 3246 addReply(c,shared.cone);
ed9b544e 3247}
3248
3249/* =================================== Lists ================================ */
3250static void pushGenericCommand(redisClient *c, int where) {
3251 robj *lobj;
ed9b544e 3252 list *list;
3305306f 3253
3254 lobj = lookupKeyWrite(c->db,c->argv[1]);
3255 if (lobj == NULL) {
ed9b544e 3256 lobj = createListObject();
3257 list = lobj->ptr;
3258 if (where == REDIS_HEAD) {
6b47e12e 3259 listAddNodeHead(list,c->argv[2]);
ed9b544e 3260 } else {
6b47e12e 3261 listAddNodeTail(list,c->argv[2]);
ed9b544e 3262 }
3305306f 3263 dictAdd(c->db->dict,c->argv[1],lobj);
ed9b544e 3264 incrRefCount(c->argv[1]);
3265 incrRefCount(c->argv[2]);
3266 } else {
ed9b544e 3267 if (lobj->type != REDIS_LIST) {
3268 addReply(c,shared.wrongtypeerr);
3269 return;
3270 }
3271 list = lobj->ptr;
3272 if (where == REDIS_HEAD) {
6b47e12e 3273 listAddNodeHead(list,c->argv[2]);
ed9b544e 3274 } else {
6b47e12e 3275 listAddNodeTail(list,c->argv[2]);
ed9b544e 3276 }
3277 incrRefCount(c->argv[2]);
3278 }
3279 server.dirty++;
3280 addReply(c,shared.ok);
3281}
3282
3283static void lpushCommand(redisClient *c) {
3284 pushGenericCommand(c,REDIS_HEAD);
3285}
3286
3287static void rpushCommand(redisClient *c) {
3288 pushGenericCommand(c,REDIS_TAIL);
3289}
3290
3291static void llenCommand(redisClient *c) {
3305306f 3292 robj *o;
ed9b544e 3293 list *l;
3294
3305306f 3295 o = lookupKeyRead(c->db,c->argv[1]);
3296 if (o == NULL) {
c937aa89 3297 addReply(c,shared.czero);
ed9b544e 3298 return;
3299 } else {
ed9b544e 3300 if (o->type != REDIS_LIST) {
c937aa89 3301 addReply(c,shared.wrongtypeerr);
ed9b544e 3302 } else {
3303 l = o->ptr;
c937aa89 3304 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
ed9b544e 3305 }
3306 }
3307}
3308
3309static void lindexCommand(redisClient *c) {
3305306f 3310 robj *o;
ed9b544e 3311 int index = atoi(c->argv[2]->ptr);
3312
3305306f 3313 o = lookupKeyRead(c->db,c->argv[1]);
3314 if (o == NULL) {
c937aa89 3315 addReply(c,shared.nullbulk);
ed9b544e 3316 } else {
ed9b544e 3317 if (o->type != REDIS_LIST) {
c937aa89 3318 addReply(c,shared.wrongtypeerr);
ed9b544e 3319 } else {
3320 list *list = o->ptr;
3321 listNode *ln;
3322
3323 ln = listIndex(list, index);
3324 if (ln == NULL) {
c937aa89 3325 addReply(c,shared.nullbulk);
ed9b544e 3326 } else {
3327 robj *ele = listNodeValue(ln);
942a3961 3328 addReplyBulkLen(c,ele);
ed9b544e 3329 addReply(c,ele);
3330 addReply(c,shared.crlf);
3331 }
3332 }
3333 }
3334}
3335
3336static void lsetCommand(redisClient *c) {
3305306f 3337 robj *o;
ed9b544e 3338 int index = atoi(c->argv[2]->ptr);
3339
3305306f 3340 o = lookupKeyWrite(c->db,c->argv[1]);
3341 if (o == NULL) {
ed9b544e 3342 addReply(c,shared.nokeyerr);
3343 } else {
ed9b544e 3344 if (o->type != REDIS_LIST) {
3345 addReply(c,shared.wrongtypeerr);
3346 } else {
3347 list *list = o->ptr;
3348 listNode *ln;
3349
3350 ln = listIndex(list, index);
3351 if (ln == NULL) {
c937aa89 3352 addReply(c,shared.outofrangeerr);
ed9b544e 3353 } else {
3354 robj *ele = listNodeValue(ln);
3355
3356 decrRefCount(ele);
3357 listNodeValue(ln) = c->argv[3];
3358 incrRefCount(c->argv[3]);
3359 addReply(c,shared.ok);
3360 server.dirty++;
3361 }
3362 }
3363 }
3364}
3365
3366static void popGenericCommand(redisClient *c, int where) {
3305306f 3367 robj *o;
3368
3369 o = lookupKeyWrite(c->db,c->argv[1]);
3370 if (o == NULL) {
c937aa89 3371 addReply(c,shared.nullbulk);
ed9b544e 3372 } else {
ed9b544e 3373 if (o->type != REDIS_LIST) {
c937aa89 3374 addReply(c,shared.wrongtypeerr);
ed9b544e 3375 } else {
3376 list *list = o->ptr;
3377 listNode *ln;
3378
3379 if (where == REDIS_HEAD)
3380 ln = listFirst(list);
3381 else
3382 ln = listLast(list);
3383
3384 if (ln == NULL) {
c937aa89 3385 addReply(c,shared.nullbulk);
ed9b544e 3386 } else {
3387 robj *ele = listNodeValue(ln);
942a3961 3388 addReplyBulkLen(c,ele);
ed9b544e 3389 addReply(c,ele);
3390 addReply(c,shared.crlf);
3391 listDelNode(list,ln);
3392 server.dirty++;
3393 }
3394 }
3395 }
3396}
3397
3398static void lpopCommand(redisClient *c) {
3399 popGenericCommand(c,REDIS_HEAD);
3400}
3401
3402static void rpopCommand(redisClient *c) {
3403 popGenericCommand(c,REDIS_TAIL);
3404}
3405
3406static void lrangeCommand(redisClient *c) {
3305306f 3407 robj *o;
ed9b544e 3408 int start = atoi(c->argv[2]->ptr);
3409 int end = atoi(c->argv[3]->ptr);
3305306f 3410
3411 o = lookupKeyRead(c->db,c->argv[1]);
3412 if (o == NULL) {
c937aa89 3413 addReply(c,shared.nullmultibulk);
ed9b544e 3414 } else {
ed9b544e 3415 if (o->type != REDIS_LIST) {
c937aa89 3416 addReply(c,shared.wrongtypeerr);
ed9b544e 3417 } else {
3418 list *list = o->ptr;
3419 listNode *ln;
3420 int llen = listLength(list);
3421 int rangelen, j;
3422 robj *ele;
3423
3424 /* convert negative indexes */
3425 if (start < 0) start = llen+start;
3426 if (end < 0) end = llen+end;
3427 if (start < 0) start = 0;
3428 if (end < 0) end = 0;
3429
3430 /* indexes sanity checks */
3431 if (start > end || start >= llen) {
3432 /* Out of range start or start > end result in empty list */
c937aa89 3433 addReply(c,shared.emptymultibulk);
ed9b544e 3434 return;
3435 }
3436 if (end >= llen) end = llen-1;
3437 rangelen = (end-start)+1;
3438
3439 /* Return the result in form of a multi-bulk reply */
3440 ln = listIndex(list, start);
c937aa89 3441 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
ed9b544e 3442 for (j = 0; j < rangelen; j++) {
3443 ele = listNodeValue(ln);
942a3961 3444 addReplyBulkLen(c,ele);
ed9b544e 3445 addReply(c,ele);
3446 addReply(c,shared.crlf);
3447 ln = ln->next;
3448 }
3449 }
3450 }
3451}
3452
3453static void ltrimCommand(redisClient *c) {
3305306f 3454 robj *o;
ed9b544e 3455 int start = atoi(c->argv[2]->ptr);
3456 int end = atoi(c->argv[3]->ptr);
3457
3305306f 3458 o = lookupKeyWrite(c->db,c->argv[1]);
3459 if (o == NULL) {
ed9b544e 3460 addReply(c,shared.nokeyerr);
3461 } else {
ed9b544e 3462 if (o->type != REDIS_LIST) {
3463 addReply(c,shared.wrongtypeerr);
3464 } else {
3465 list *list = o->ptr;
3466 listNode *ln;
3467 int llen = listLength(list);
3468 int j, ltrim, rtrim;
3469
3470 /* convert negative indexes */
3471 if (start < 0) start = llen+start;
3472 if (end < 0) end = llen+end;
3473 if (start < 0) start = 0;
3474 if (end < 0) end = 0;
3475
3476 /* indexes sanity checks */
3477 if (start > end || start >= llen) {
3478 /* Out of range start or start > end result in empty list */
3479 ltrim = llen;
3480 rtrim = 0;
3481 } else {
3482 if (end >= llen) end = llen-1;
3483 ltrim = start;
3484 rtrim = llen-end-1;
3485 }
3486
3487 /* Remove list elements to perform the trim */
3488 for (j = 0; j < ltrim; j++) {
3489 ln = listFirst(list);
3490 listDelNode(list,ln);
3491 }
3492 for (j = 0; j < rtrim; j++) {
3493 ln = listLast(list);
3494 listDelNode(list,ln);
3495 }
ed9b544e 3496 server.dirty++;
e59229a2 3497 addReply(c,shared.ok);
ed9b544e 3498 }
3499 }
3500}
3501
3502static void lremCommand(redisClient *c) {
3305306f 3503 robj *o;
ed9b544e 3504
3305306f 3505 o = lookupKeyWrite(c->db,c->argv[1]);
3506 if (o == NULL) {
33c08b39 3507 addReply(c,shared.czero);
ed9b544e 3508 } else {
ed9b544e 3509 if (o->type != REDIS_LIST) {
c937aa89 3510 addReply(c,shared.wrongtypeerr);
ed9b544e 3511 } else {
3512 list *list = o->ptr;
3513 listNode *ln, *next;
3514 int toremove = atoi(c->argv[2]->ptr);
3515 int removed = 0;
3516 int fromtail = 0;
3517
3518 if (toremove < 0) {
3519 toremove = -toremove;
3520 fromtail = 1;
3521 }
3522 ln = fromtail ? list->tail : list->head;
3523 while (ln) {
ed9b544e 3524 robj *ele = listNodeValue(ln);
a4d1ba9a 3525
3526 next = fromtail ? ln->prev : ln->next;
724a51b1 3527 if (compareStringObjects(ele,c->argv[3]) == 0) {
ed9b544e 3528 listDelNode(list,ln);
3529 server.dirty++;
3530 removed++;
3531 if (toremove && removed == toremove) break;
3532 }
3533 ln = next;
3534 }
c937aa89 3535 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 3536 }
3537 }
3538}
3539
3540/* ==================================== Sets ================================ */
3541
3542static void saddCommand(redisClient *c) {
ed9b544e 3543 robj *set;
3544
3305306f 3545 set = lookupKeyWrite(c->db,c->argv[1]);
3546 if (set == NULL) {
ed9b544e 3547 set = createSetObject();
3305306f 3548 dictAdd(c->db->dict,c->argv[1],set);
ed9b544e 3549 incrRefCount(c->argv[1]);
3550 } else {
ed9b544e 3551 if (set->type != REDIS_SET) {
c937aa89 3552 addReply(c,shared.wrongtypeerr);
ed9b544e 3553 return;
3554 }
3555 }
3556 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
3557 incrRefCount(c->argv[2]);
3558 server.dirty++;
c937aa89 3559 addReply(c,shared.cone);
ed9b544e 3560 } else {
c937aa89 3561 addReply(c,shared.czero);
ed9b544e 3562 }
3563}
3564
3565static void sremCommand(redisClient *c) {
3305306f 3566 robj *set;
ed9b544e 3567
3305306f 3568 set = lookupKeyWrite(c->db,c->argv[1]);
3569 if (set == NULL) {
c937aa89 3570 addReply(c,shared.czero);
ed9b544e 3571 } else {
ed9b544e 3572 if (set->type != REDIS_SET) {
c937aa89 3573 addReply(c,shared.wrongtypeerr);
ed9b544e 3574 return;
3575 }
3576 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
3577 server.dirty++;
12fea928 3578 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
c937aa89 3579 addReply(c,shared.cone);
ed9b544e 3580 } else {
c937aa89 3581 addReply(c,shared.czero);
ed9b544e 3582 }
3583 }
3584}
3585
a4460ef4 3586static void smoveCommand(redisClient *c) {
3587 robj *srcset, *dstset;
3588
3589 srcset = lookupKeyWrite(c->db,c->argv[1]);
3590 dstset = lookupKeyWrite(c->db,c->argv[2]);
3591
3592 /* If the source key does not exist return 0, if it's of the wrong type
3593 * raise an error */
3594 if (srcset == NULL || srcset->type != REDIS_SET) {
3595 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
3596 return;
3597 }
3598 /* Error if the destination key is not a set as well */
3599 if (dstset && dstset->type != REDIS_SET) {
3600 addReply(c,shared.wrongtypeerr);
3601 return;
3602 }
3603 /* Remove the element from the source set */
3604 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
3605 /* Key not found in the src set! return zero */
3606 addReply(c,shared.czero);
3607 return;
3608 }
3609 server.dirty++;
3610 /* Add the element to the destination set */
3611 if (!dstset) {
3612 dstset = createSetObject();
3613 dictAdd(c->db->dict,c->argv[2],dstset);
3614 incrRefCount(c->argv[2]);
3615 }
3616 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
3617 incrRefCount(c->argv[3]);
3618 addReply(c,shared.cone);
3619}
3620
ed9b544e 3621static void sismemberCommand(redisClient *c) {
3305306f 3622 robj *set;
ed9b544e 3623
3305306f 3624 set = lookupKeyRead(c->db,c->argv[1]);
3625 if (set == NULL) {
c937aa89 3626 addReply(c,shared.czero);
ed9b544e 3627 } else {
ed9b544e 3628 if (set->type != REDIS_SET) {
c937aa89 3629 addReply(c,shared.wrongtypeerr);
ed9b544e 3630 return;
3631 }
3632 if (dictFind(set->ptr,c->argv[2]))
c937aa89 3633 addReply(c,shared.cone);
ed9b544e 3634 else
c937aa89 3635 addReply(c,shared.czero);
ed9b544e 3636 }
3637}
3638
3639static void scardCommand(redisClient *c) {
3305306f 3640 robj *o;
ed9b544e 3641 dict *s;
3642
3305306f 3643 o = lookupKeyRead(c->db,c->argv[1]);
3644 if (o == NULL) {
c937aa89 3645 addReply(c,shared.czero);
ed9b544e 3646 return;
3647 } else {
ed9b544e 3648 if (o->type != REDIS_SET) {
c937aa89 3649 addReply(c,shared.wrongtypeerr);
ed9b544e 3650 } else {
3651 s = o->ptr;
c937aa89 3652 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3305306f 3653 dictSize(s)));
ed9b544e 3654 }
3655 }
3656}
3657
12fea928 3658static void spopCommand(redisClient *c) {
3659 robj *set;
3660 dictEntry *de;
3661
3662 set = lookupKeyWrite(c->db,c->argv[1]);
3663 if (set == NULL) {
3664 addReply(c,shared.nullbulk);
3665 } else {
3666 if (set->type != REDIS_SET) {
3667 addReply(c,shared.wrongtypeerr);
3668 return;
3669 }
3670 de = dictGetRandomKey(set->ptr);
3671 if (de == NULL) {
3672 addReply(c,shared.nullbulk);
3673 } else {
3674 robj *ele = dictGetEntryKey(de);
3675
942a3961 3676 addReplyBulkLen(c,ele);
12fea928 3677 addReply(c,ele);
3678 addReply(c,shared.crlf);
3679 dictDelete(set->ptr,ele);
3680 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3681 server.dirty++;
3682 }
3683 }
3684}
3685
2abb95a9 3686static void srandmemberCommand(redisClient *c) {
3687 robj *set;
3688 dictEntry *de;
3689
3690 set = lookupKeyRead(c->db,c->argv[1]);
3691 if (set == NULL) {
3692 addReply(c,shared.nullbulk);
3693 } else {
3694 if (set->type != REDIS_SET) {
3695 addReply(c,shared.wrongtypeerr);
3696 return;
3697 }
3698 de = dictGetRandomKey(set->ptr);
3699 if (de == NULL) {
3700 addReply(c,shared.nullbulk);
3701 } else {
3702 robj *ele = dictGetEntryKey(de);
3703
3704 addReplyBulkLen(c,ele);
3705 addReply(c,ele);
3706 addReply(c,shared.crlf);
3707 }
3708 }
3709}
3710
ed9b544e 3711static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
3712 dict **d1 = (void*) s1, **d2 = (void*) s2;
3713
3305306f 3714 return dictSize(*d1)-dictSize(*d2);
ed9b544e 3715}
3716
3717static void sinterGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey) {
3718 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3719 dictIterator *di;
3720 dictEntry *de;
3721 robj *lenobj = NULL, *dstset = NULL;
3722 int j, cardinality = 0;
3723
ed9b544e 3724 for (j = 0; j < setsnum; j++) {
3725 robj *setobj;
3305306f 3726
3727 setobj = dstkey ?
3728 lookupKeyWrite(c->db,setskeys[j]) :
3729 lookupKeyRead(c->db,setskeys[j]);
3730 if (!setobj) {
ed9b544e 3731 zfree(dv);
5faa6025 3732 if (dstkey) {
3733 deleteKey(c->db,dstkey);
3734 addReply(c,shared.ok);
3735 } else {
3736 addReply(c,shared.nullmultibulk);
3737 }
ed9b544e 3738 return;
3739 }
ed9b544e 3740 if (setobj->type != REDIS_SET) {
3741 zfree(dv);
c937aa89 3742 addReply(c,shared.wrongtypeerr);
ed9b544e 3743 return;
3744 }
3745 dv[j] = setobj->ptr;
3746 }
3747 /* Sort sets from the smallest to largest, this will improve our
3748 * algorithm's performace */
3749 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3750
3751 /* The first thing we should output is the total number of elements...
3752 * since this is a multi-bulk write, but at this stage we don't know
3753 * the intersection set size, so we use a trick, append an empty object
3754 * to the output list and save the pointer to later modify it with the
3755 * right length */
3756 if (!dstkey) {
3757 lenobj = createObject(REDIS_STRING,NULL);
3758 addReply(c,lenobj);
3759 decrRefCount(lenobj);
3760 } else {
3761 /* If we have a target key where to store the resulting set
3762 * create this key with an empty set inside */
3763 dstset = createSetObject();
ed9b544e 3764 }
3765
3766 /* Iterate all the elements of the first (smallest) set, and test
3767 * the element against all the other sets, if at least one set does
3768 * not include the element it is discarded */
3769 di = dictGetIterator(dv[0]);
ed9b544e 3770
3771 while((de = dictNext(di)) != NULL) {
3772 robj *ele;
3773
3774 for (j = 1; j < setsnum; j++)
3775 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
3776 if (j != setsnum)
3777 continue; /* at least one set does not contain the member */
3778 ele = dictGetEntryKey(de);
3779 if (!dstkey) {
942a3961 3780 addReplyBulkLen(c,ele);
ed9b544e 3781 addReply(c,ele);
3782 addReply(c,shared.crlf);
3783 cardinality++;
3784 } else {
3785 dictAdd(dstset->ptr,ele,NULL);
3786 incrRefCount(ele);
3787 }
3788 }
3789 dictReleaseIterator(di);
3790
83cdfe18
AG
3791 if (dstkey) {
3792 /* Store the resulting set into the target */
3793 deleteKey(c->db,dstkey);
3794 dictAdd(c->db->dict,dstkey,dstset);
3795 incrRefCount(dstkey);
3796 }
3797
40d224a9 3798 if (!dstkey) {
c937aa89 3799 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",cardinality);
40d224a9 3800 } else {
03fd01c7 3801 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3802 dictSize((dict*)dstset->ptr)));
40d224a9 3803 server.dirty++;
3804 }
ed9b544e 3805 zfree(dv);
3806}
3807
3808static void sinterCommand(redisClient *c) {
3809 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
3810}
3811
3812static void sinterstoreCommand(redisClient *c) {
3813 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
3814}
3815
f4f56e1d 3816#define REDIS_OP_UNION 0
3817#define REDIS_OP_DIFF 1
3818
3819static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 3820 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3821 dictIterator *di;
3822 dictEntry *de;
f4f56e1d 3823 robj *dstset = NULL;
40d224a9 3824 int j, cardinality = 0;
3825
40d224a9 3826 for (j = 0; j < setsnum; j++) {
3827 robj *setobj;
3828
3829 setobj = dstkey ?
3830 lookupKeyWrite(c->db,setskeys[j]) :
3831 lookupKeyRead(c->db,setskeys[j]);
3832 if (!setobj) {
3833 dv[j] = NULL;
3834 continue;
3835 }
3836 if (setobj->type != REDIS_SET) {
3837 zfree(dv);
3838 addReply(c,shared.wrongtypeerr);
3839 return;
3840 }
3841 dv[j] = setobj->ptr;
3842 }
3843
3844 /* We need a temp set object to store our union. If the dstkey
3845 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
3846 * this set object will be the resulting object to set into the target key*/
3847 dstset = createSetObject();
3848
40d224a9 3849 /* Iterate all the elements of all the sets, add every element a single
3850 * time to the result set */
3851 for (j = 0; j < setsnum; j++) {
51829ed3 3852 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 3853 if (!dv[j]) continue; /* non existing keys are like empty sets */
3854
3855 di = dictGetIterator(dv[j]);
40d224a9 3856
3857 while((de = dictNext(di)) != NULL) {
3858 robj *ele;
3859
3860 /* dictAdd will not add the same element multiple times */
3861 ele = dictGetEntryKey(de);
f4f56e1d 3862 if (op == REDIS_OP_UNION || j == 0) {
3863 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
3864 incrRefCount(ele);
40d224a9 3865 cardinality++;
3866 }
f4f56e1d 3867 } else if (op == REDIS_OP_DIFF) {
3868 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
3869 cardinality--;
3870 }
40d224a9 3871 }
3872 }
3873 dictReleaseIterator(di);
51829ed3
AG
3874
3875 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
40d224a9 3876 }
3877
f4f56e1d 3878 /* Output the content of the resulting set, if not in STORE mode */
3879 if (!dstkey) {
3880 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
3881 di = dictGetIterator(dstset->ptr);
f4f56e1d 3882 while((de = dictNext(di)) != NULL) {
3883 robj *ele;
3884
3885 ele = dictGetEntryKey(de);
942a3961 3886 addReplyBulkLen(c,ele);
f4f56e1d 3887 addReply(c,ele);
3888 addReply(c,shared.crlf);
3889 }
3890 dictReleaseIterator(di);
83cdfe18
AG
3891 } else {
3892 /* If we have a target key where to store the resulting set
3893 * create this key with the result set inside */
3894 deleteKey(c->db,dstkey);
3895 dictAdd(c->db->dict,dstkey,dstset);
3896 incrRefCount(dstkey);
f4f56e1d 3897 }
3898
3899 /* Cleanup */
40d224a9 3900 if (!dstkey) {
40d224a9 3901 decrRefCount(dstset);
3902 } else {
03fd01c7 3903 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3904 dictSize((dict*)dstset->ptr)));
40d224a9 3905 server.dirty++;
3906 }
3907 zfree(dv);
3908}
3909
3910static void sunionCommand(redisClient *c) {
f4f56e1d 3911 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 3912}
3913
3914static void sunionstoreCommand(redisClient *c) {
f4f56e1d 3915 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
3916}
3917
3918static void sdiffCommand(redisClient *c) {
3919 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
3920}
3921
3922static void sdiffstoreCommand(redisClient *c) {
3923 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 3924}
3925
6b47e12e 3926/* ==================================== ZSets =============================== */
3927
3928/* ZSETs are ordered sets using two data structures to hold the same elements
3929 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
3930 * data structure.
3931 *
3932 * The elements are added to an hash table mapping Redis objects to scores.
3933 * At the same time the elements are added to a skip list mapping scores
3934 * to Redis objects (so objects are sorted by scores in this "view"). */
3935
3936/* This skiplist implementation is almost a C translation of the original
3937 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
3938 * Alternative to Balanced Trees", modified in three ways:
3939 * a) this implementation allows for repeated values.
3940 * b) the comparison is not just by key (our 'score') but by satellite data.
3941 * c) there is a back pointer, so it's a doubly linked list with the back
3942 * pointers being only at "level 1". This allows to traverse the list
3943 * from tail to head, useful for ZREVRANGE. */
3944
3945static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
3946 zskiplistNode *zn = zmalloc(sizeof(*zn));
3947
3948 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
3949 zn->score = score;
3950 zn->obj = obj;
3951 return zn;
3952}
3953
3954static zskiplist *zslCreate(void) {
3955 int j;
3956 zskiplist *zsl;
3957
3958 zsl = zmalloc(sizeof(*zsl));
3959 zsl->level = 1;
cc812361 3960 zsl->length = 0;
6b47e12e 3961 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
3962 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
3963 zsl->header->forward[j] = NULL;
e3870fab 3964 zsl->header->backward = NULL;
3965 zsl->tail = NULL;
6b47e12e 3966 return zsl;
3967}
3968
fd8ccf44 3969static void zslFreeNode(zskiplistNode *node) {
3970 decrRefCount(node->obj);
ad807e6f 3971 zfree(node->forward);
fd8ccf44 3972 zfree(node);
3973}
3974
3975static void zslFree(zskiplist *zsl) {
ad807e6f 3976 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 3977
ad807e6f 3978 zfree(zsl->header->forward);
3979 zfree(zsl->header);
fd8ccf44 3980 while(node) {
599379dd 3981 next = node->forward[0];
fd8ccf44 3982 zslFreeNode(node);
3983 node = next;
3984 }
ad807e6f 3985 zfree(zsl);
fd8ccf44 3986}
3987
6b47e12e 3988static int zslRandomLevel(void) {
3989 int level = 1;
3990 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
3991 level += 1;
3992 return level;
3993}
3994
3995static void zslInsert(zskiplist *zsl, double score, robj *obj) {
3996 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
3997 int i, level;
3998
3999 x = zsl->header;
4000 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4001 while (x->forward[i] &&
4002 (x->forward[i]->score < score ||
4003 (x->forward[i]->score == score &&
4004 compareStringObjects(x->forward[i]->obj,obj) < 0)))
6b47e12e 4005 x = x->forward[i];
4006 update[i] = x;
4007 }
6b47e12e 4008 /* we assume the key is not already inside, since we allow duplicated
4009 * scores, and the re-insertion of score and redis object should never
4010 * happpen since the caller of zslInsert() should test in the hash table
4011 * if the element is already inside or not. */
4012 level = zslRandomLevel();
4013 if (level > zsl->level) {
4014 for (i = zsl->level; i < level; i++)
4015 update[i] = zsl->header;
4016 zsl->level = level;
4017 }
4018 x = zslCreateNode(level,score,obj);
4019 for (i = 0; i < level; i++) {
4020 x->forward[i] = update[i]->forward[i];
4021 update[i]->forward[i] = x;
4022 }
bb975144 4023 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 4024 if (x->forward[0])
4025 x->forward[0]->backward = x;
4026 else
4027 zsl->tail = x;
cc812361 4028 zsl->length++;
6b47e12e 4029}
4030
50c55df5 4031/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 4032static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 4033 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4034 int i;
4035
4036 x = zsl->header;
4037 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4038 while (x->forward[i] &&
4039 (x->forward[i]->score < score ||
4040 (x->forward[i]->score == score &&
4041 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 4042 x = x->forward[i];
4043 update[i] = x;
4044 }
4045 /* We may have multiple elements with the same score, what we need
4046 * is to find the element with both the right score and object. */
4047 x = x->forward[0];
50c55df5 4048 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
9d60e6e4 4049 for (i = 0; i < zsl->level; i++) {
4050 if (update[i]->forward[i] != x) break;
4051 update[i]->forward[i] = x->forward[i];
4052 }
4053 if (x->forward[0]) {
4054 x->forward[0]->backward = (x->backward == zsl->header) ?
4055 NULL : x->backward;
e197b441 4056 } else {
9d60e6e4 4057 zsl->tail = x->backward;
e197b441 4058 }
9d60e6e4 4059 zslFreeNode(x);
4060 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4061 zsl->level--;
4062 zsl->length--;
4063 return 1;
4064 } else {
4065 return 0; /* not found */
e197b441 4066 }
4067 return 0; /* not found */
fd8ccf44 4068}
4069
1807985b 4070/* Delete all the elements with score between min and max from the skiplist.
4071 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4072 * Note that this function takes the reference to the hash table view of the
4073 * sorted set, in order to remove the elements from the hash table too. */
4074static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4075 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4076 unsigned long removed = 0;
4077 int i;
4078
4079 x = zsl->header;
4080 for (i = zsl->level-1; i >= 0; i--) {
4081 while (x->forward[i] && x->forward[i]->score < min)
4082 x = x->forward[i];
4083 update[i] = x;
4084 }
4085 /* We may have multiple elements with the same score, what we need
4086 * is to find the element with both the right score and object. */
4087 x = x->forward[0];
4088 while (x && x->score <= max) {
4089 zskiplistNode *next;
4090
4091 for (i = 0; i < zsl->level; i++) {
4092 if (update[i]->forward[i] != x) break;
4093 update[i]->forward[i] = x->forward[i];
4094 }
4095 if (x->forward[0]) {
4096 x->forward[0]->backward = (x->backward == zsl->header) ?
4097 NULL : x->backward;
4098 } else {
4099 zsl->tail = x->backward;
4100 }
4101 next = x->forward[0];
4102 dictDelete(dict,x->obj);
4103 zslFreeNode(x);
4104 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4105 zsl->level--;
4106 zsl->length--;
4107 removed++;
4108 x = next;
4109 }
4110 return removed; /* not found */
4111}
4112
50c55df5 4113/* Find the first node having a score equal or greater than the specified one.
4114 * Returns NULL if there is no match. */
4115static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4116 zskiplistNode *x;
4117 int i;
4118
4119 x = zsl->header;
4120 for (i = zsl->level-1; i >= 0; i--) {
4121 while (x->forward[i] && x->forward[i]->score < score)
4122 x = x->forward[i];
4123 }
4124 /* We may have multiple elements with the same score, what we need
4125 * is to find the element with both the right score and object. */
4126 return x->forward[0];
4127}
4128
fd8ccf44 4129/* The actual Z-commands implementations */
4130
4131static void zaddCommand(redisClient *c) {
4132 robj *zsetobj;
4133 zset *zs;
4134 double *score;
4135
4136 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4137 if (zsetobj == NULL) {
4138 zsetobj = createZsetObject();
4139 dictAdd(c->db->dict,c->argv[1],zsetobj);
4140 incrRefCount(c->argv[1]);
4141 } else {
4142 if (zsetobj->type != REDIS_ZSET) {
4143 addReply(c,shared.wrongtypeerr);
4144 return;
4145 }
4146 }
4147 score = zmalloc(sizeof(double));
4148 *score = strtod(c->argv[2]->ptr,NULL);
4149 zs = zsetobj->ptr;
4150 if (dictAdd(zs->dict,c->argv[3],score) == DICT_OK) {
4151 /* case 1: New element */
4152 incrRefCount(c->argv[3]); /* added to hash */
4153 zslInsert(zs->zsl,*score,c->argv[3]);
4154 incrRefCount(c->argv[3]); /* added to skiplist */
4155 server.dirty++;
4156 addReply(c,shared.cone);
4157 } else {
4158 dictEntry *de;
4159 double *oldscore;
4160
4161 /* case 2: Score update operation */
4162 de = dictFind(zs->dict,c->argv[3]);
4163 assert(de != NULL);
4164 oldscore = dictGetEntryVal(de);
4165 if (*score != *oldscore) {
4166 int deleted;
4167
e197b441 4168 deleted = zslDelete(zs->zsl,*oldscore,c->argv[3]);
fd8ccf44 4169 assert(deleted != 0);
4170 zslInsert(zs->zsl,*score,c->argv[3]);
4171 incrRefCount(c->argv[3]);
2161a965 4172 dictReplace(zs->dict,c->argv[3],score);
fd8ccf44 4173 server.dirty++;
2161a965 4174 } else {
4175 zfree(score);
fd8ccf44 4176 }
4177 addReply(c,shared.czero);
4178 }
4179}
4180
1b7106e7 4181static void zremCommand(redisClient *c) {
4182 robj *zsetobj;
4183 zset *zs;
4184
4185 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4186 if (zsetobj == NULL) {
4187 addReply(c,shared.czero);
4188 } else {
4189 dictEntry *de;
4190 double *oldscore;
4191 int deleted;
4192
4193 if (zsetobj->type != REDIS_ZSET) {
4194 addReply(c,shared.wrongtypeerr);
4195 return;
4196 }
4197 zs = zsetobj->ptr;
4198 de = dictFind(zs->dict,c->argv[2]);
4199 if (de == NULL) {
4200 addReply(c,shared.czero);
4201 return;
4202 }
4203 /* Delete from the skiplist */
4204 oldscore = dictGetEntryVal(de);
4205 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
4206 assert(deleted != 0);
4207
4208 /* Delete from the hash table */
4209 dictDelete(zs->dict,c->argv[2]);
4210 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4211 server.dirty++;
4212 addReply(c,shared.cone);
4213 }
4214}
4215
1807985b 4216static void zremrangebyscoreCommand(redisClient *c) {
4217 double min = strtod(c->argv[2]->ptr,NULL);
4218 double max = strtod(c->argv[3]->ptr,NULL);
4219 robj *zsetobj;
4220 zset *zs;
4221
4222 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4223 if (zsetobj == NULL) {
4224 addReply(c,shared.czero);
4225 } else {
4226 long deleted;
4227
4228 if (zsetobj->type != REDIS_ZSET) {
4229 addReply(c,shared.wrongtypeerr);
4230 return;
4231 }
4232 zs = zsetobj->ptr;
4233 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4234 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4235 server.dirty += deleted;
4236 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4237 }
4238}
4239
e3870fab 4240static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 4241 robj *o;
4242 int start = atoi(c->argv[2]->ptr);
4243 int end = atoi(c->argv[3]->ptr);
4244
4245 o = lookupKeyRead(c->db,c->argv[1]);
4246 if (o == NULL) {
4247 addReply(c,shared.nullmultibulk);
4248 } else {
4249 if (o->type != REDIS_ZSET) {
4250 addReply(c,shared.wrongtypeerr);
4251 } else {
4252 zset *zsetobj = o->ptr;
4253 zskiplist *zsl = zsetobj->zsl;
4254 zskiplistNode *ln;
4255
4256 int llen = zsl->length;
4257 int rangelen, j;
4258 robj *ele;
4259
4260 /* convert negative indexes */
4261 if (start < 0) start = llen+start;
4262 if (end < 0) end = llen+end;
4263 if (start < 0) start = 0;
4264 if (end < 0) end = 0;
4265
4266 /* indexes sanity checks */
4267 if (start > end || start >= llen) {
4268 /* Out of range start or start > end result in empty list */
4269 addReply(c,shared.emptymultibulk);
4270 return;
4271 }
4272 if (end >= llen) end = llen-1;
4273 rangelen = (end-start)+1;
4274
4275 /* Return the result in form of a multi-bulk reply */
e3870fab 4276 if (reverse) {
4277 ln = zsl->tail;
4278 while (start--)
4279 ln = ln->backward;
4280 } else {
4281 ln = zsl->header->forward[0];
4282 while (start--)
4283 ln = ln->forward[0];
4284 }
cc812361 4285
4286 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
4287 for (j = 0; j < rangelen; j++) {
0aad7a19 4288 ele = ln->obj;
cc812361 4289 addReplyBulkLen(c,ele);
4290 addReply(c,ele);
4291 addReply(c,shared.crlf);
e3870fab 4292 ln = reverse ? ln->backward : ln->forward[0];
cc812361 4293 }
4294 }
4295 }
4296}
4297
e3870fab 4298static void zrangeCommand(redisClient *c) {
4299 zrangeGenericCommand(c,0);
4300}
4301
4302static void zrevrangeCommand(redisClient *c) {
4303 zrangeGenericCommand(c,1);
4304}
4305
50c55df5 4306static void zrangebyscoreCommand(redisClient *c) {
4307 robj *o;
4308 double min = strtod(c->argv[2]->ptr,NULL);
4309 double max = strtod(c->argv[3]->ptr,NULL);
4310
4311 o = lookupKeyRead(c->db,c->argv[1]);
4312 if (o == NULL) {
4313 addReply(c,shared.nullmultibulk);
4314 } else {
4315 if (o->type != REDIS_ZSET) {
4316 addReply(c,shared.wrongtypeerr);
4317 } else {
4318 zset *zsetobj = o->ptr;
4319 zskiplist *zsl = zsetobj->zsl;
4320 zskiplistNode *ln;
4321 robj *ele, *lenobj;
4322 unsigned int rangelen = 0;
4323
4324 /* Get the first node with the score >= min */
4325 ln = zslFirstWithScore(zsl,min);
4326 if (ln == NULL) {
4327 /* No element matching the speciifed interval */
4328 addReply(c,shared.emptymultibulk);
4329 return;
4330 }
4331
4332 /* We don't know in advance how many matching elements there
4333 * are in the list, so we push this object that will represent
4334 * the multi-bulk length in the output buffer, and will "fix"
4335 * it later */
4336 lenobj = createObject(REDIS_STRING,NULL);
4337 addReply(c,lenobj);
4338
dbbc7285 4339 while(ln && ln->score <= max) {
50c55df5 4340 ele = ln->obj;
4341 addReplyBulkLen(c,ele);
4342 addReply(c,ele);
4343 addReply(c,shared.crlf);
4344 ln = ln->forward[0];
4345 rangelen++;
4346 }
4347 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4348 }
4349 }
4350}
4351
3c41331e 4352static void zcardCommand(redisClient *c) {
e197b441 4353 robj *o;
4354 zset *zs;
4355
4356 o = lookupKeyRead(c->db,c->argv[1]);
4357 if (o == NULL) {
4358 addReply(c,shared.czero);
4359 return;
4360 } else {
4361 if (o->type != REDIS_ZSET) {
4362 addReply(c,shared.wrongtypeerr);
4363 } else {
4364 zs = o->ptr;
4365 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",zs->zsl->length));
4366 }
4367 }
4368}
4369
6e333bbe 4370static void zscoreCommand(redisClient *c) {
4371 robj *o;
4372 zset *zs;
4373
4374 o = lookupKeyRead(c->db,c->argv[1]);
4375 if (o == NULL) {
4376 addReply(c,shared.czero);
4377 return;
4378 } else {
4379 if (o->type != REDIS_ZSET) {
4380 addReply(c,shared.wrongtypeerr);
4381 } else {
4382 dictEntry *de;
4383
4384 zs = o->ptr;
4385 de = dictFind(zs->dict,c->argv[2]);
4386 if (!de) {
4387 addReply(c,shared.nullbulk);
4388 } else {
4389 char buf[128];
4390 double *score = dictGetEntryVal(de);
4391
4392 snprintf(buf,sizeof(buf),"%.16g",*score);
4393 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
4394 strlen(buf),buf));
4395 }
4396 }
4397 }
4398}
4399
6b47e12e 4400/* ========================= Non type-specific commands ==================== */
4401
ed9b544e 4402static void flushdbCommand(redisClient *c) {
ca37e9cd 4403 server.dirty += dictSize(c->db->dict);
3305306f 4404 dictEmpty(c->db->dict);
4405 dictEmpty(c->db->expires);
ed9b544e 4406 addReply(c,shared.ok);
ed9b544e 4407}
4408
4409static void flushallCommand(redisClient *c) {
ca37e9cd 4410 server.dirty += emptyDb();
ed9b544e 4411 addReply(c,shared.ok);
f78fd11b 4412 rdbSave(server.dbfilename);
ca37e9cd 4413 server.dirty++;
ed9b544e 4414}
4415
56906eef 4416static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 4417 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 4418 so->type = type;
4419 so->pattern = pattern;
4420 return so;
4421}
4422
4423/* Return the value associated to the key with a name obtained
4424 * substituting the first occurence of '*' in 'pattern' with 'subst' */
56906eef 4425static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
ed9b544e 4426 char *p;
4427 sds spat, ssub;
4428 robj keyobj;
4429 int prefixlen, sublen, postfixlen;
ed9b544e 4430 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
4431 struct {
f1017b3f 4432 long len;
4433 long free;
ed9b544e 4434 char buf[REDIS_SORTKEY_MAX+1];
4435 } keyname;
4436
942a3961 4437 if (subst->encoding == REDIS_ENCODING_RAW)
4438 incrRefCount(subst);
4439 else {
4440 subst = getDecodedObject(subst);
4441 }
4442
ed9b544e 4443 spat = pattern->ptr;
4444 ssub = subst->ptr;
4445 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
4446 p = strchr(spat,'*');
4447 if (!p) return NULL;
4448
4449 prefixlen = p-spat;
4450 sublen = sdslen(ssub);
4451 postfixlen = sdslen(spat)-(prefixlen+1);
4452 memcpy(keyname.buf,spat,prefixlen);
4453 memcpy(keyname.buf+prefixlen,ssub,sublen);
4454 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
4455 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
4456 keyname.len = prefixlen+sublen+postfixlen;
4457
4458 keyobj.refcount = 1;
4459 keyobj.type = REDIS_STRING;
4460 keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
4461
942a3961 4462 decrRefCount(subst);
4463
a4d1ba9a 4464 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3305306f 4465 return lookupKeyRead(db,&keyobj);
ed9b544e 4466}
4467
4468/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
4469 * the additional parameter is not standard but a BSD-specific we have to
4470 * pass sorting parameters via the global 'server' structure */
4471static int sortCompare(const void *s1, const void *s2) {
4472 const redisSortObject *so1 = s1, *so2 = s2;
4473 int cmp;
4474
4475 if (!server.sort_alpha) {
4476 /* Numeric sorting. Here it's trivial as we precomputed scores */
4477 if (so1->u.score > so2->u.score) {
4478 cmp = 1;
4479 } else if (so1->u.score < so2->u.score) {
4480 cmp = -1;
4481 } else {
4482 cmp = 0;
4483 }
4484 } else {
4485 /* Alphanumeric sorting */
4486 if (server.sort_bypattern) {
4487 if (!so1->u.cmpobj || !so2->u.cmpobj) {
4488 /* At least one compare object is NULL */
4489 if (so1->u.cmpobj == so2->u.cmpobj)
4490 cmp = 0;
4491 else if (so1->u.cmpobj == NULL)
4492 cmp = -1;
4493 else
4494 cmp = 1;
4495 } else {
4496 /* We have both the objects, use strcoll */
4497 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
4498 }
4499 } else {
4500 /* Compare elements directly */
942a3961 4501 if (so1->obj->encoding == REDIS_ENCODING_RAW &&
4502 so2->obj->encoding == REDIS_ENCODING_RAW) {
4503 cmp = strcoll(so1->obj->ptr,so2->obj->ptr);
4504 } else {
4505 robj *dec1, *dec2;
4506
4507 dec1 = so1->obj->encoding == REDIS_ENCODING_RAW ?
4508 so1->obj : getDecodedObject(so1->obj);
4509 dec2 = so2->obj->encoding == REDIS_ENCODING_RAW ?
4510 so2->obj : getDecodedObject(so2->obj);
4511 cmp = strcoll(dec1->ptr,dec2->ptr);
4512 if (dec1 != so1->obj) decrRefCount(dec1);
4513 if (dec2 != so2->obj) decrRefCount(dec2);
4514 }
ed9b544e 4515 }
4516 }
4517 return server.sort_desc ? -cmp : cmp;
4518}
4519
4520/* The SORT command is the most complex command in Redis. Warning: this code
4521 * is optimized for speed and a bit less for readability */
4522static void sortCommand(redisClient *c) {
ed9b544e 4523 list *operations;
4524 int outputlen = 0;
4525 int desc = 0, alpha = 0;
4526 int limit_start = 0, limit_count = -1, start, end;
4527 int j, dontsort = 0, vectorlen;
4528 int getop = 0; /* GET operation counter */
4529 robj *sortval, *sortby = NULL;
4530 redisSortObject *vector; /* Resulting vector to sort */
4531
4532 /* Lookup the key to sort. It must be of the right types */
3305306f 4533 sortval = lookupKeyRead(c->db,c->argv[1]);
4534 if (sortval == NULL) {
c937aa89 4535 addReply(c,shared.nokeyerr);
ed9b544e 4536 return;
4537 }
ed9b544e 4538 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
c937aa89 4539 addReply(c,shared.wrongtypeerr);
ed9b544e 4540 return;
4541 }
4542
4543 /* Create a list of operations to perform for every sorted element.
4544 * Operations can be GET/DEL/INCR/DECR */
4545 operations = listCreate();
092dac2a 4546 listSetFreeMethod(operations,zfree);
ed9b544e 4547 j = 2;
4548
4549 /* Now we need to protect sortval incrementing its count, in the future
4550 * SORT may have options able to overwrite/delete keys during the sorting
4551 * and the sorted key itself may get destroied */
4552 incrRefCount(sortval);
4553
4554 /* The SORT command has an SQL-alike syntax, parse it */
4555 while(j < c->argc) {
4556 int leftargs = c->argc-j-1;
4557 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
4558 desc = 0;
4559 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
4560 desc = 1;
4561 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
4562 alpha = 1;
4563 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
4564 limit_start = atoi(c->argv[j+1]->ptr);
4565 limit_count = atoi(c->argv[j+2]->ptr);
4566 j+=2;
4567 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
4568 sortby = c->argv[j+1];
4569 /* If the BY pattern does not contain '*', i.e. it is constant,
4570 * we don't need to sort nor to lookup the weight keys. */
4571 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
4572 j++;
4573 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4574 listAddNodeTail(operations,createSortOperation(
4575 REDIS_SORT_GET,c->argv[j+1]));
4576 getop++;
4577 j++;
4578 } else if (!strcasecmp(c->argv[j]->ptr,"del") && leftargs >= 1) {
4579 listAddNodeTail(operations,createSortOperation(
4580 REDIS_SORT_DEL,c->argv[j+1]));
4581 j++;
4582 } else if (!strcasecmp(c->argv[j]->ptr,"incr") && leftargs >= 1) {
4583 listAddNodeTail(operations,createSortOperation(
4584 REDIS_SORT_INCR,c->argv[j+1]));
4585 j++;
4586 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4587 listAddNodeTail(operations,createSortOperation(
4588 REDIS_SORT_DECR,c->argv[j+1]));
4589 j++;
4590 } else {
4591 decrRefCount(sortval);
4592 listRelease(operations);
c937aa89 4593 addReply(c,shared.syntaxerr);
ed9b544e 4594 return;
4595 }
4596 j++;
4597 }
4598
4599 /* Load the sorting vector with all the objects to sort */
4600 vectorlen = (sortval->type == REDIS_LIST) ?
4601 listLength((list*)sortval->ptr) :
3305306f 4602 dictSize((dict*)sortval->ptr);
ed9b544e 4603 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 4604 j = 0;
4605 if (sortval->type == REDIS_LIST) {
4606 list *list = sortval->ptr;
6208b3a7 4607 listNode *ln;
4608
4609 listRewind(list);
4610 while((ln = listYield(list))) {
ed9b544e 4611 robj *ele = ln->value;
4612 vector[j].obj = ele;
4613 vector[j].u.score = 0;
4614 vector[j].u.cmpobj = NULL;
ed9b544e 4615 j++;
4616 }
4617 } else {
4618 dict *set = sortval->ptr;
4619 dictIterator *di;
4620 dictEntry *setele;
4621
4622 di = dictGetIterator(set);
ed9b544e 4623 while((setele = dictNext(di)) != NULL) {
4624 vector[j].obj = dictGetEntryKey(setele);
4625 vector[j].u.score = 0;
4626 vector[j].u.cmpobj = NULL;
4627 j++;
4628 }
4629 dictReleaseIterator(di);
4630 }
4631 assert(j == vectorlen);
4632
4633 /* Now it's time to load the right scores in the sorting vector */
4634 if (dontsort == 0) {
4635 for (j = 0; j < vectorlen; j++) {
4636 if (sortby) {
4637 robj *byval;
4638
3305306f 4639 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
ed9b544e 4640 if (!byval || byval->type != REDIS_STRING) continue;
4641 if (alpha) {
942a3961 4642 if (byval->encoding == REDIS_ENCODING_RAW) {
4643 vector[j].u.cmpobj = byval;
4644 incrRefCount(byval);
4645 } else {
4646 vector[j].u.cmpobj = getDecodedObject(byval);
4647 }
ed9b544e 4648 } else {
942a3961 4649 if (byval->encoding == REDIS_ENCODING_RAW) {
4650 vector[j].u.score = strtod(byval->ptr,NULL);
4651 } else {
f1017b3f 4652 if (byval->encoding == REDIS_ENCODING_INT) {
942a3961 4653 vector[j].u.score = (long)byval->ptr;
f1017b3f 4654 } else
942a3961 4655 assert(1 != 1);
4656 }
ed9b544e 4657 }
4658 } else {
942a3961 4659 if (!alpha) {
4660 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
4661 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
4662 else {
4663 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
4664 vector[j].u.score = (long) vector[j].obj->ptr;
4665 else
4666 assert(1 != 1);
4667 }
4668 }
ed9b544e 4669 }
4670 }
4671 }
4672
4673 /* We are ready to sort the vector... perform a bit of sanity check
4674 * on the LIMIT option too. We'll use a partial version of quicksort. */
4675 start = (limit_start < 0) ? 0 : limit_start;
4676 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
4677 if (start >= vectorlen) {
4678 start = vectorlen-1;
4679 end = vectorlen-2;
4680 }
4681 if (end >= vectorlen) end = vectorlen-1;
4682
4683 if (dontsort == 0) {
4684 server.sort_desc = desc;
4685 server.sort_alpha = alpha;
4686 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 4687 if (sortby && (start != 0 || end != vectorlen-1))
4688 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
4689 else
4690 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 4691 }
4692
4693 /* Send command output to the output buffer, performing the specified
4694 * GET/DEL/INCR/DECR operations if any. */
4695 outputlen = getop ? getop*(end-start+1) : end-start+1;
c937aa89 4696 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
ed9b544e 4697 for (j = start; j <= end; j++) {
6208b3a7 4698 listNode *ln;
ed9b544e 4699 if (!getop) {
942a3961 4700 addReplyBulkLen(c,vector[j].obj);
ed9b544e 4701 addReply(c,vector[j].obj);
4702 addReply(c,shared.crlf);
4703 }
6208b3a7 4704 listRewind(operations);
4705 while((ln = listYield(operations))) {
ed9b544e 4706 redisSortOperation *sop = ln->value;
3305306f 4707 robj *val = lookupKeyByPattern(c->db,sop->pattern,
ed9b544e 4708 vector[j].obj);
4709
4710 if (sop->type == REDIS_SORT_GET) {
4711 if (!val || val->type != REDIS_STRING) {
9eb00f21 4712 addReply(c,shared.nullbulk);
ed9b544e 4713 } else {
942a3961 4714 addReplyBulkLen(c,val);
ed9b544e 4715 addReply(c,val);
4716 addReply(c,shared.crlf);
4717 }
4718 } else if (sop->type == REDIS_SORT_DEL) {
4719 /* TODO */
4720 }
ed9b544e 4721 }
4722 }
4723
4724 /* Cleanup */
4725 decrRefCount(sortval);
4726 listRelease(operations);
4727 for (j = 0; j < vectorlen; j++) {
4728 if (sortby && alpha && vector[j].u.cmpobj)
4729 decrRefCount(vector[j].u.cmpobj);
4730 }
4731 zfree(vector);
4732}
4733
4734static void infoCommand(redisClient *c) {
4735 sds info;
4736 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 4737 int j;
ed9b544e 4738
4739 info = sdscatprintf(sdsempty(),
4740 "redis_version:%s\r\n"
f1017b3f 4741 "arch_bits:%s\r\n"
a0f643ea 4742 "uptime_in_seconds:%d\r\n"
4743 "uptime_in_days:%d\r\n"
ed9b544e 4744 "connected_clients:%d\r\n"
4745 "connected_slaves:%d\r\n"
5fba9f71 4746 "used_memory:%zu\r\n"
ed9b544e 4747 "changes_since_last_save:%lld\r\n"
be2bb6b0 4748 "bgsave_in_progress:%d\r\n"
ed9b544e 4749 "last_save_time:%d\r\n"
4750 "total_connections_received:%lld\r\n"
4751 "total_commands_processed:%lld\r\n"
a0f643ea 4752 "role:%s\r\n"
ed9b544e 4753 ,REDIS_VERSION,
f1017b3f 4754 (sizeof(long) == 8) ? "64" : "32",
a0f643ea 4755 uptime,
4756 uptime/(3600*24),
ed9b544e 4757 listLength(server.clients)-listLength(server.slaves),
4758 listLength(server.slaves),
4759 server.usedmemory,
4760 server.dirty,
be2bb6b0 4761 server.bgsaveinprogress,
ed9b544e 4762 server.lastsave,
4763 server.stat_numconnections,
4764 server.stat_numcommands,
a0f643ea 4765 server.masterhost == NULL ? "master" : "slave"
ed9b544e 4766 );
a0f643ea 4767 if (server.masterhost) {
4768 info = sdscatprintf(info,
4769 "master_host:%s\r\n"
4770 "master_port:%d\r\n"
4771 "master_link_status:%s\r\n"
4772 "master_last_io_seconds_ago:%d\r\n"
4773 ,server.masterhost,
4774 server.masterport,
4775 (server.replstate == REDIS_REPL_CONNECTED) ?
4776 "up" : "down",
f72b934d 4777 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 4778 );
4779 }
c3cb078d 4780 for (j = 0; j < server.dbnum; j++) {
4781 long long keys, vkeys;
4782
4783 keys = dictSize(server.db[j].dict);
4784 vkeys = dictSize(server.db[j].expires);
4785 if (keys || vkeys) {
4786 info = sdscatprintf(info, "db%d: keys=%lld,expires=%lld\r\n",
4787 j, keys, vkeys);
4788 }
4789 }
c937aa89 4790 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
ed9b544e 4791 addReplySds(c,info);
70003d28 4792 addReply(c,shared.crlf);
ed9b544e 4793}
4794
3305306f 4795static void monitorCommand(redisClient *c) {
4796 /* ignore MONITOR if aleady slave or in monitor mode */
4797 if (c->flags & REDIS_SLAVE) return;
4798
4799 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
4800 c->slaveseldb = 0;
6b47e12e 4801 listAddNodeTail(server.monitors,c);
3305306f 4802 addReply(c,shared.ok);
4803}
4804
4805/* ================================= Expire ================================= */
4806static int removeExpire(redisDb *db, robj *key) {
4807 if (dictDelete(db->expires,key) == DICT_OK) {
4808 return 1;
4809 } else {
4810 return 0;
4811 }
4812}
4813
4814static int setExpire(redisDb *db, robj *key, time_t when) {
4815 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
4816 return 0;
4817 } else {
4818 incrRefCount(key);
4819 return 1;
4820 }
4821}
4822
bb32ede5 4823/* Return the expire time of the specified key, or -1 if no expire
4824 * is associated with this key (i.e. the key is non volatile) */
4825static time_t getExpire(redisDb *db, robj *key) {
4826 dictEntry *de;
4827
4828 /* No expire? return ASAP */
4829 if (dictSize(db->expires) == 0 ||
4830 (de = dictFind(db->expires,key)) == NULL) return -1;
4831
4832 return (time_t) dictGetEntryVal(de);
4833}
4834
3305306f 4835static int expireIfNeeded(redisDb *db, robj *key) {
4836 time_t when;
4837 dictEntry *de;
4838
4839 /* No expire? return ASAP */
4840 if (dictSize(db->expires) == 0 ||
4841 (de = dictFind(db->expires,key)) == NULL) return 0;
4842
4843 /* Lookup the expire */
4844 when = (time_t) dictGetEntryVal(de);
4845 if (time(NULL) <= when) return 0;
4846
4847 /* Delete the key */
4848 dictDelete(db->expires,key);
4849 return dictDelete(db->dict,key) == DICT_OK;
4850}
4851
4852static int deleteIfVolatile(redisDb *db, robj *key) {
4853 dictEntry *de;
4854
4855 /* No expire? return ASAP */
4856 if (dictSize(db->expires) == 0 ||
4857 (de = dictFind(db->expires,key)) == NULL) return 0;
4858
4859 /* Delete the key */
0c66a471 4860 server.dirty++;
3305306f 4861 dictDelete(db->expires,key);
4862 return dictDelete(db->dict,key) == DICT_OK;
4863}
4864
802e8373 4865static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
3305306f 4866 dictEntry *de;
3305306f 4867
802e8373 4868 de = dictFind(c->db->dict,key);
3305306f 4869 if (de == NULL) {
4870 addReply(c,shared.czero);
4871 return;
4872 }
43e5ccdf 4873 if (seconds < 0) {
4874 if (deleteKey(c->db,key)) server.dirty++;
4875 addReply(c, shared.cone);
3305306f 4876 return;
4877 } else {
4878 time_t when = time(NULL)+seconds;
802e8373 4879 if (setExpire(c->db,key,when)) {
3305306f 4880 addReply(c,shared.cone);
77423026 4881 server.dirty++;
4882 } else {
3305306f 4883 addReply(c,shared.czero);
77423026 4884 }
3305306f 4885 return;
4886 }
4887}
4888
802e8373 4889static void expireCommand(redisClient *c) {
4890 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
4891}
4892
4893static void expireatCommand(redisClient *c) {
4894 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
4895}
4896
fd88489a 4897static void ttlCommand(redisClient *c) {
4898 time_t expire;
4899 int ttl = -1;
4900
4901 expire = getExpire(c->db,c->argv[1]);
4902 if (expire != -1) {
4903 ttl = (int) (expire-time(NULL));
4904 if (ttl < 0) ttl = -1;
4905 }
4906 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
4907}
4908
f6b141c5 4909static void msetGenericCommand(redisClient *c, int nx) {
4910 int j;
4911
4912 if ((c->argc % 2) == 0) {
4913 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
4914 return;
4915 }
4916 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4917 * set nothing at all if at least one already key exists. */
4918 if (nx) {
4919 for (j = 1; j < c->argc; j += 2) {
4920 if (dictFind(c->db->dict,c->argv[j]) != NULL) {
4921 addReply(c, shared.czero);
4922 return;
4923 }
4924 }
4925 }
4926
4927 for (j = 1; j < c->argc; j += 2) {
2ed22c8b 4928 int retval;
4929
4930 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
4931 if (retval == DICT_ERR) {
4932 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
4933 incrRefCount(c->argv[j+1]);
4934 } else {
4935 incrRefCount(c->argv[j]);
4936 incrRefCount(c->argv[j+1]);
4937 }
f6b141c5 4938 removeExpire(c->db,c->argv[j]);
4939 }
4940 server.dirty += (c->argc-1)/2;
4941 addReply(c, nx ? shared.cone : shared.ok);
4942}
4943
4944static void msetCommand(redisClient *c) {
4945 msetGenericCommand(c,0);
4946}
4947
4948static void msetnxCommand(redisClient *c) {
4949 msetGenericCommand(c,1);
4950}
4951
ed9b544e 4952/* =============================== Replication ============================= */
4953
a4d1ba9a 4954static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 4955 ssize_t nwritten, ret = size;
4956 time_t start = time(NULL);
4957
4958 timeout++;
4959 while(size) {
4960 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
4961 nwritten = write(fd,ptr,size);
4962 if (nwritten == -1) return -1;
4963 ptr += nwritten;
4964 size -= nwritten;
4965 }
4966 if ((time(NULL)-start) > timeout) {
4967 errno = ETIMEDOUT;
4968 return -1;
4969 }
4970 }
4971 return ret;
4972}
4973
a4d1ba9a 4974static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 4975 ssize_t nread, totread = 0;
4976 time_t start = time(NULL);
4977
4978 timeout++;
4979 while(size) {
4980 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
4981 nread = read(fd,ptr,size);
4982 if (nread == -1) return -1;
4983 ptr += nread;
4984 size -= nread;
4985 totread += nread;
4986 }
4987 if ((time(NULL)-start) > timeout) {
4988 errno = ETIMEDOUT;
4989 return -1;
4990 }
4991 }
4992 return totread;
4993}
4994
4995static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
4996 ssize_t nread = 0;
4997
4998 size--;
4999 while(size) {
5000 char c;
5001
5002 if (syncRead(fd,&c,1,timeout) == -1) return -1;
5003 if (c == '\n') {
5004 *ptr = '\0';
5005 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
5006 return nread;
5007 } else {
5008 *ptr++ = c;
5009 *ptr = '\0';
5010 nread++;
5011 }
5012 }
5013 return nread;
5014}
5015
5016static void syncCommand(redisClient *c) {
40d224a9 5017 /* ignore SYNC if aleady slave or in monitor mode */
5018 if (c->flags & REDIS_SLAVE) return;
5019
5020 /* SYNC can't be issued when the server has pending data to send to
5021 * the client about already issued commands. We need a fresh reply
5022 * buffer registering the differences between the BGSAVE and the current
5023 * dataset, so that we can copy to other slaves if needed. */
5024 if (listLength(c->reply) != 0) {
5025 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
5026 return;
5027 }
5028
5029 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
5030 /* Here we need to check if there is a background saving operation
5031 * in progress, or if it is required to start one */
5032 if (server.bgsaveinprogress) {
5033 /* Ok a background save is in progress. Let's check if it is a good
5034 * one for replication, i.e. if there is another slave that is
5035 * registering differences since the server forked to save */
5036 redisClient *slave;
5037 listNode *ln;
5038
6208b3a7 5039 listRewind(server.slaves);
5040 while((ln = listYield(server.slaves))) {
40d224a9 5041 slave = ln->value;
5042 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 5043 }
5044 if (ln) {
5045 /* Perfect, the server is already registering differences for
5046 * another slave. Set the right state, and copy the buffer. */
5047 listRelease(c->reply);
5048 c->reply = listDup(slave->reply);
40d224a9 5049 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5050 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
5051 } else {
5052 /* No way, we need to wait for the next BGSAVE in order to
5053 * register differences */
5054 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5055 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5056 }
5057 } else {
5058 /* Ok we don't have a BGSAVE in progress, let's start one */
5059 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
5060 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5061 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
5062 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
5063 return;
5064 }
5065 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5066 }
6208b3a7 5067 c->repldbfd = -1;
40d224a9 5068 c->flags |= REDIS_SLAVE;
5069 c->slaveseldb = 0;
6b47e12e 5070 listAddNodeTail(server.slaves,c);
40d224a9 5071 return;
5072}
5073
6208b3a7 5074static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
5075 redisClient *slave = privdata;
5076 REDIS_NOTUSED(el);
5077 REDIS_NOTUSED(mask);
5078 char buf[REDIS_IOBUF_LEN];
5079 ssize_t nwritten, buflen;
5080
5081 if (slave->repldboff == 0) {
5082 /* Write the bulk write count before to transfer the DB. In theory here
5083 * we don't know how much room there is in the output buffer of the
5084 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
5085 * operations) will never be smaller than the few bytes we need. */
5086 sds bulkcount;
5087
5088 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
5089 slave->repldbsize);
5090 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
5091 {
5092 sdsfree(bulkcount);
5093 freeClient(slave);
5094 return;
5095 }
5096 sdsfree(bulkcount);
5097 }
5098 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
5099 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
5100 if (buflen <= 0) {
5101 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
5102 (buflen == 0) ? "premature EOF" : strerror(errno));
5103 freeClient(slave);
5104 return;
5105 }
5106 if ((nwritten = write(fd,buf,buflen)) == -1) {
5107 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
5108 strerror(errno));
5109 freeClient(slave);
5110 return;
5111 }
5112 slave->repldboff += nwritten;
5113 if (slave->repldboff == slave->repldbsize) {
5114 close(slave->repldbfd);
5115 slave->repldbfd = -1;
5116 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5117 slave->replstate = REDIS_REPL_ONLINE;
5118 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
5119 sendReplyToClient, slave, NULL) == AE_ERR) {
5120 freeClient(slave);
5121 return;
5122 }
5123 addReplySds(slave,sdsempty());
5124 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
5125 }
5126}
ed9b544e 5127
a3b21203 5128/* This function is called at the end of every backgrond saving.
5129 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
5130 * otherwise REDIS_ERR is passed to the function.
5131 *
5132 * The goal of this function is to handle slaves waiting for a successful
5133 * background saving in order to perform non-blocking synchronization. */
5134static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 5135 listNode *ln;
5136 int startbgsave = 0;
ed9b544e 5137
6208b3a7 5138 listRewind(server.slaves);
5139 while((ln = listYield(server.slaves))) {
5140 redisClient *slave = ln->value;
ed9b544e 5141
6208b3a7 5142 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
5143 startbgsave = 1;
5144 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5145 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 5146 struct redis_stat buf;
6208b3a7 5147
5148 if (bgsaveerr != REDIS_OK) {
5149 freeClient(slave);
5150 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
5151 continue;
5152 }
5153 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 5154 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 5155 freeClient(slave);
5156 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
5157 continue;
5158 }
5159 slave->repldboff = 0;
5160 slave->repldbsize = buf.st_size;
5161 slave->replstate = REDIS_REPL_SEND_BULK;
5162 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5163 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave, NULL) == AE_ERR) {
5164 freeClient(slave);
5165 continue;
5166 }
5167 }
ed9b544e 5168 }
6208b3a7 5169 if (startbgsave) {
5170 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5171 listRewind(server.slaves);
5172 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
5173 while((ln = listYield(server.slaves))) {
5174 redisClient *slave = ln->value;
ed9b544e 5175
6208b3a7 5176 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
5177 freeClient(slave);
5178 }
5179 }
5180 }
ed9b544e 5181}
5182
5183static int syncWithMaster(void) {
5184 char buf[1024], tmpfile[256];
5185 int dumpsize;
5186 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
5187 int dfd;
5188
5189 if (fd == -1) {
5190 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
5191 strerror(errno));
5192 return REDIS_ERR;
5193 }
5194 /* Issue the SYNC command */
5195 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
5196 close(fd);
5197 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
5198 strerror(errno));
5199 return REDIS_ERR;
5200 }
5201 /* Read the bulk write count */
8c4d91fc 5202 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 5203 close(fd);
5204 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
5205 strerror(errno));
5206 return REDIS_ERR;
5207 }
4aa701c1 5208 if (buf[0] != '$') {
5209 close(fd);
5210 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
5211 return REDIS_ERR;
5212 }
c937aa89 5213 dumpsize = atoi(buf+1);
ed9b544e 5214 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
5215 /* Read the bulk write data on a temp file */
5216 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
5217 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
5218 if (dfd == -1) {
5219 close(fd);
5220 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
5221 return REDIS_ERR;
5222 }
5223 while(dumpsize) {
5224 int nread, nwritten;
5225
5226 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
5227 if (nread == -1) {
5228 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
5229 strerror(errno));
5230 close(fd);
5231 close(dfd);
5232 return REDIS_ERR;
5233 }
5234 nwritten = write(dfd,buf,nread);
5235 if (nwritten == -1) {
5236 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
5237 close(fd);
5238 close(dfd);
5239 return REDIS_ERR;
5240 }
5241 dumpsize -= nread;
5242 }
5243 close(dfd);
5244 if (rename(tmpfile,server.dbfilename) == -1) {
5245 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
5246 unlink(tmpfile);
5247 close(fd);
5248 return REDIS_ERR;
5249 }
5250 emptyDb();
f78fd11b 5251 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 5252 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
5253 close(fd);
5254 return REDIS_ERR;
5255 }
5256 server.master = createClient(fd);
5257 server.master->flags |= REDIS_MASTER;
5258 server.replstate = REDIS_REPL_CONNECTED;
5259 return REDIS_OK;
5260}
5261
321b0e13 5262static void slaveofCommand(redisClient *c) {
5263 if (!strcasecmp(c->argv[1]->ptr,"no") &&
5264 !strcasecmp(c->argv[2]->ptr,"one")) {
5265 if (server.masterhost) {
5266 sdsfree(server.masterhost);
5267 server.masterhost = NULL;
5268 if (server.master) freeClient(server.master);
5269 server.replstate = REDIS_REPL_NONE;
5270 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
5271 }
5272 } else {
5273 sdsfree(server.masterhost);
5274 server.masterhost = sdsdup(c->argv[1]->ptr);
5275 server.masterport = atoi(c->argv[2]->ptr);
5276 if (server.master) freeClient(server.master);
5277 server.replstate = REDIS_REPL_CONNECT;
5278 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
5279 server.masterhost, server.masterport);
5280 }
5281 addReply(c,shared.ok);
5282}
5283
3fd78bcd 5284/* ============================ Maxmemory directive ======================== */
5285
5286/* This function gets called when 'maxmemory' is set on the config file to limit
5287 * the max memory used by the server, and we are out of memory.
5288 * This function will try to, in order:
5289 *
5290 * - Free objects from the free list
5291 * - Try to remove keys with an EXPIRE set
5292 *
5293 * It is not possible to free enough memory to reach used-memory < maxmemory
5294 * the server will start refusing commands that will enlarge even more the
5295 * memory usage.
5296 */
5297static void freeMemoryIfNeeded(void) {
5298 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
5299 if (listLength(server.objfreelist)) {
5300 robj *o;
5301
5302 listNode *head = listFirst(server.objfreelist);
5303 o = listNodeValue(head);
5304 listDelNode(server.objfreelist,head);
5305 zfree(o);
5306 } else {
5307 int j, k, freed = 0;
5308
5309 for (j = 0; j < server.dbnum; j++) {
5310 int minttl = -1;
5311 robj *minkey = NULL;
5312 struct dictEntry *de;
5313
5314 if (dictSize(server.db[j].expires)) {
5315 freed = 1;
5316 /* From a sample of three keys drop the one nearest to
5317 * the natural expire */
5318 for (k = 0; k < 3; k++) {
5319 time_t t;
5320
5321 de = dictGetRandomKey(server.db[j].expires);
5322 t = (time_t) dictGetEntryVal(de);
5323 if (minttl == -1 || t < minttl) {
5324 minkey = dictGetEntryKey(de);
5325 minttl = t;
5326 }
5327 }
5328 deleteKey(server.db+j,minkey);
5329 }
5330 }
5331 if (!freed) return; /* nothing to free... */
5332 }
5333 }
5334}
5335
7f957c92 5336/* ================================= Debugging ============================== */
5337
5338static void debugCommand(redisClient *c) {
5339 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
5340 *((char*)-1) = 'x';
333298da 5341 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
5342 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
5343 robj *key, *val;
5344
5345 if (!de) {
5346 addReply(c,shared.nokeyerr);
5347 return;
5348 }
5349 key = dictGetEntryKey(de);
5350 val = dictGetEntryVal(de);
5351 addReplySds(c,sdscatprintf(sdsempty(),
942a3961 5352 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
5353 key, key->refcount, val, val->refcount, val->encoding));
7f957c92 5354 } else {
333298da 5355 addReplySds(c,sdsnew(
5356 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>]\r\n"));
7f957c92 5357 }
5358}
56906eef 5359
d76412d1 5360#ifdef HAVE_BACKTRACE
56906eef 5361static struct redisFunctionSym symsTable[] = {
724a51b1 5362{"compareStringObjects", (unsigned long)compareStringObjects},
5363{"isStringRepresentableAsLong", (unsigned long)isStringRepresentableAsLong},
942a3961 5364{"dictEncObjKeyCompare", (unsigned long)dictEncObjKeyCompare},
5365{"dictEncObjHash", (unsigned long)dictEncObjHash},
5366{"incrDecrCommand", (unsigned long)incrDecrCommand},
56906eef 5367{"freeStringObject", (unsigned long)freeStringObject},
5368{"freeListObject", (unsigned long)freeListObject},
5369{"freeSetObject", (unsigned long)freeSetObject},
5370{"decrRefCount", (unsigned long)decrRefCount},
5371{"createObject", (unsigned long)createObject},
5372{"freeClient", (unsigned long)freeClient},
5373{"rdbLoad", (unsigned long)rdbLoad},
942a3961 5374{"rdbSaveStringObject", (unsigned long)rdbSaveStringObject},
5375{"rdbSaveStringObjectRaw", (unsigned long)rdbSaveStringObjectRaw},
56906eef 5376{"addReply", (unsigned long)addReply},
5377{"addReplySds", (unsigned long)addReplySds},
5378{"incrRefCount", (unsigned long)incrRefCount},
5379{"rdbSaveBackground", (unsigned long)rdbSaveBackground},
5380{"createStringObject", (unsigned long)createStringObject},
5381{"replicationFeedSlaves", (unsigned long)replicationFeedSlaves},
5382{"syncWithMaster", (unsigned long)syncWithMaster},
5383{"tryObjectSharing", (unsigned long)tryObjectSharing},
942a3961 5384{"tryObjectEncoding", (unsigned long)tryObjectEncoding},
5385{"getDecodedObject", (unsigned long)getDecodedObject},
56906eef 5386{"removeExpire", (unsigned long)removeExpire},
5387{"expireIfNeeded", (unsigned long)expireIfNeeded},
5388{"deleteIfVolatile", (unsigned long)deleteIfVolatile},
5389{"deleteKey", (unsigned long)deleteKey},
5390{"getExpire", (unsigned long)getExpire},
5391{"setExpire", (unsigned long)setExpire},
a3b21203 5392{"updateSlavesWaitingBgsave", (unsigned long)updateSlavesWaitingBgsave},
56906eef 5393{"freeMemoryIfNeeded", (unsigned long)freeMemoryIfNeeded},
5394{"authCommand", (unsigned long)authCommand},
5395{"pingCommand", (unsigned long)pingCommand},
5396{"echoCommand", (unsigned long)echoCommand},
5397{"setCommand", (unsigned long)setCommand},
5398{"setnxCommand", (unsigned long)setnxCommand},
5399{"getCommand", (unsigned long)getCommand},
5400{"delCommand", (unsigned long)delCommand},
5401{"existsCommand", (unsigned long)existsCommand},
5402{"incrCommand", (unsigned long)incrCommand},
5403{"decrCommand", (unsigned long)decrCommand},
5404{"incrbyCommand", (unsigned long)incrbyCommand},
5405{"decrbyCommand", (unsigned long)decrbyCommand},
5406{"selectCommand", (unsigned long)selectCommand},
5407{"randomkeyCommand", (unsigned long)randomkeyCommand},
5408{"keysCommand", (unsigned long)keysCommand},
5409{"dbsizeCommand", (unsigned long)dbsizeCommand},
5410{"lastsaveCommand", (unsigned long)lastsaveCommand},
5411{"saveCommand", (unsigned long)saveCommand},
5412{"bgsaveCommand", (unsigned long)bgsaveCommand},
5413{"shutdownCommand", (unsigned long)shutdownCommand},
5414{"moveCommand", (unsigned long)moveCommand},
5415{"renameCommand", (unsigned long)renameCommand},
5416{"renamenxCommand", (unsigned long)renamenxCommand},
5417{"lpushCommand", (unsigned long)lpushCommand},
5418{"rpushCommand", (unsigned long)rpushCommand},
5419{"lpopCommand", (unsigned long)lpopCommand},
5420{"rpopCommand", (unsigned long)rpopCommand},
5421{"llenCommand", (unsigned long)llenCommand},
5422{"lindexCommand", (unsigned long)lindexCommand},
5423{"lrangeCommand", (unsigned long)lrangeCommand},
5424{"ltrimCommand", (unsigned long)ltrimCommand},
5425{"typeCommand", (unsigned long)typeCommand},
5426{"lsetCommand", (unsigned long)lsetCommand},
5427{"saddCommand", (unsigned long)saddCommand},
5428{"sremCommand", (unsigned long)sremCommand},
5429{"smoveCommand", (unsigned long)smoveCommand},
5430{"sismemberCommand", (unsigned long)sismemberCommand},
5431{"scardCommand", (unsigned long)scardCommand},
12fea928 5432{"spopCommand", (unsigned long)spopCommand},
2abb95a9 5433{"srandmemberCommand", (unsigned long)srandmemberCommand},
56906eef 5434{"sinterCommand", (unsigned long)sinterCommand},
5435{"sinterstoreCommand", (unsigned long)sinterstoreCommand},
5436{"sunionCommand", (unsigned long)sunionCommand},
5437{"sunionstoreCommand", (unsigned long)sunionstoreCommand},
5438{"sdiffCommand", (unsigned long)sdiffCommand},
5439{"sdiffstoreCommand", (unsigned long)sdiffstoreCommand},
5440{"syncCommand", (unsigned long)syncCommand},
5441{"flushdbCommand", (unsigned long)flushdbCommand},
5442{"flushallCommand", (unsigned long)flushallCommand},
5443{"sortCommand", (unsigned long)sortCommand},
5444{"lremCommand", (unsigned long)lremCommand},
5445{"infoCommand", (unsigned long)infoCommand},
5446{"mgetCommand", (unsigned long)mgetCommand},
5447{"monitorCommand", (unsigned long)monitorCommand},
5448{"expireCommand", (unsigned long)expireCommand},
802e8373 5449{"expireatCommand", (unsigned long)expireatCommand},
f6b141c5 5450{"getsetCommand", (unsigned long)getsetCommand},
56906eef 5451{"ttlCommand", (unsigned long)ttlCommand},
5452{"slaveofCommand", (unsigned long)slaveofCommand},
5453{"debugCommand", (unsigned long)debugCommand},
5454{"processCommand", (unsigned long)processCommand},
5455{"setupSigSegvAction", (unsigned long)setupSigSegvAction},
56906eef 5456{"readQueryFromClient", (unsigned long)readQueryFromClient},
a3b21203 5457{"rdbRemoveTempFile", (unsigned long)rdbRemoveTempFile},
f6b141c5 5458{"msetGenericCommand", (unsigned long)msetGenericCommand},
5459{"msetCommand", (unsigned long)msetCommand},
5460{"msetnxCommand", (unsigned long)msetnxCommand},
ace4ee54 5461{"zslCreateNode", (unsigned long)zslCreateNode},
5462{"zslCreate", (unsigned long)zslCreate},
5463{"zslFreeNode",(unsigned long)zslFreeNode},
5464{"zslFree",(unsigned long)zslFree},
5465{"zslRandomLevel",(unsigned long)zslRandomLevel},
5466{"zslInsert",(unsigned long)zslInsert},
5467{"zslDelete",(unsigned long)zslDelete},
5468{"createZsetObject",(unsigned long)createZsetObject},
5469{"zaddCommand",(unsigned long)zaddCommand},
e3870fab 5470{"zrangeGenericCommand",(unsigned long)zrangeGenericCommand},
cc812361 5471{"zrangeCommand",(unsigned long)zrangeCommand},
e3870fab 5472{"zrevrangeCommand",(unsigned long)zrevrangeCommand},
1b7106e7 5473{"zremCommand",(unsigned long)zremCommand},
2b59cfdf 5474{"rdbSaveDoubleValue",(unsigned long)rdbSaveDoubleValue},
5475{"rdbLoadDoubleValue",(unsigned long)rdbLoadDoubleValue},
4e141d5a 5476{"feedAppendOnlyFile",(unsigned long)feedAppendOnlyFile},
56906eef 5477{NULL,0}
5478};
5479
5480/* This function try to convert a pointer into a function name. It's used in
5481 * oreder to provide a backtrace under segmentation fault that's able to
5482 * display functions declared as static (otherwise the backtrace is useless). */
5483static char *findFuncName(void *pointer, unsigned long *offset){
5484 int i, ret = -1;
5485 unsigned long off, minoff = 0;
5486
5487 /* Try to match against the Symbol with the smallest offset */
5488 for (i=0; symsTable[i].pointer; i++) {
5489 unsigned long lp = (unsigned long) pointer;
5490
5491 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
5492 off=lp-symsTable[i].pointer;
5493 if (ret < 0 || off < minoff) {
5494 minoff=off;
5495 ret=i;
5496 }
5497 }
5498 }
5499 if (ret == -1) return NULL;
5500 *offset = minoff;
5501 return symsTable[ret].name;
5502}
5503
5504static void *getMcontextEip(ucontext_t *uc) {
5505#if defined(__FreeBSD__)
5506 return (void*) uc->uc_mcontext.mc_eip;
5507#elif defined(__dietlibc__)
5508 return (void*) uc->uc_mcontext.eip;
06db1f50 5509#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
56906eef 5510 return (void*) uc->uc_mcontext->__ss.__eip;
06db1f50 5511#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 5512 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 5513 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 5514 #else
5515 return (void*) uc->uc_mcontext->__ss.__eip;
5516 #endif
b91cf5ef 5517#elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
56906eef 5518 return (void*) uc->uc_mcontext.gregs[REG_EIP];
b91cf5ef 5519#elif defined(__ia64__) /* Linux IA64 */
5520 return (void*) uc->uc_mcontext.sc_ip;
5521#else
5522 return NULL;
56906eef 5523#endif
5524}
5525
5526static void segvHandler(int sig, siginfo_t *info, void *secret) {
5527 void *trace[100];
5528 char **messages = NULL;
5529 int i, trace_size = 0;
5530 unsigned long offset=0;
5531 time_t uptime = time(NULL)-server.stat_starttime;
5532 ucontext_t *uc = (ucontext_t*) secret;
5533 REDIS_NOTUSED(info);
5534
5535 redisLog(REDIS_WARNING,
5536 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
5537 redisLog(REDIS_WARNING, "%s", sdscatprintf(sdsempty(),
433cc893 5538 "redis_version:%s; "
56906eef 5539 "uptime_in_seconds:%d; "
433cc893 5540 "connected_clients:%d; "
5541 "connected_slaves:%d; "
5542 "used_memory:%zu; "
5543 "changes_since_last_save:%lld; "
5544 "bgsave_in_progress:%d; "
5545 "last_save_time:%d; "
5546 "total_connections_received:%lld; "
5547 "total_commands_processed:%lld; "
5548 "role:%s;"
5549 ,REDIS_VERSION,
5550 uptime,
5551 listLength(server.clients)-listLength(server.slaves),
5552 listLength(server.slaves),
5553 server.usedmemory,
5554 server.dirty,
5555 server.bgsaveinprogress,
5556 server.lastsave,
5557 server.stat_numconnections,
5558 server.stat_numcommands,
5559 server.masterhost == NULL ? "master" : "slave"
5560 ));
56906eef 5561
5562 trace_size = backtrace(trace, 100);
de96dbfe 5563 /* overwrite sigaction with caller's address */
b91cf5ef 5564 if (getMcontextEip(uc) != NULL) {
5565 trace[1] = getMcontextEip(uc);
5566 }
56906eef 5567 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 5568
d76412d1 5569 for (i=1; i<trace_size; ++i) {
56906eef 5570 char *fn = findFuncName(trace[i], &offset), *p;
5571
5572 p = strchr(messages[i],'+');
5573 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
5574 redisLog(REDIS_WARNING,"%s", messages[i]);
5575 } else {
5576 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
5577 }
5578 }
5579 free(messages);
5580 exit(0);
fe3bbfbe 5581}
56906eef 5582
5583static void setupSigSegvAction(void) {
5584 struct sigaction act;
5585
5586 sigemptyset (&act.sa_mask);
5587 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
5588 * is used. Otherwise, sa_handler is used */
5589 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
5590 act.sa_sigaction = segvHandler;
5591 sigaction (SIGSEGV, &act, NULL);
5592 sigaction (SIGBUS, &act, NULL);
12fea928 5593 sigaction (SIGFPE, &act, NULL);
5594 sigaction (SIGILL, &act, NULL);
5595 sigaction (SIGBUS, &act, NULL);
e65fdc78 5596 return;
56906eef 5597}
d76412d1 5598#else /* HAVE_BACKTRACE */
5599static void setupSigSegvAction(void) {
5600}
5601#endif /* HAVE_BACKTRACE */
e65fdc78 5602
ed9b544e 5603/* =================================== Main! ================================ */
5604
0bc03378 5605#ifdef __linux__
5606int linuxOvercommitMemoryValue(void) {
5607 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
5608 char buf[64];
5609
5610 if (!fp) return -1;
5611 if (fgets(buf,64,fp) == NULL) {
5612 fclose(fp);
5613 return -1;
5614 }
5615 fclose(fp);
5616
5617 return atoi(buf);
5618}
5619
5620void linuxOvercommitMemoryWarning(void) {
5621 if (linuxOvercommitMemoryValue() == 0) {
b91cf5ef 5622 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
0bc03378 5623 }
5624}
5625#endif /* __linux__ */
5626
ed9b544e 5627static void daemonize(void) {
5628 int fd;
5629 FILE *fp;
5630
5631 if (fork() != 0) exit(0); /* parent exits */
5632 setsid(); /* create a new session */
5633
5634 /* Every output goes to /dev/null. If Redis is daemonized but
5635 * the 'logfile' is set to 'stdout' in the configuration file
5636 * it will not log at all. */
5637 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
5638 dup2(fd, STDIN_FILENO);
5639 dup2(fd, STDOUT_FILENO);
5640 dup2(fd, STDERR_FILENO);
5641 if (fd > STDERR_FILENO) close(fd);
5642 }
5643 /* Try to write the pid file */
ed329fcf 5644 fp = fopen(server.pidfile,"w");
ed9b544e 5645 if (fp) {
5646 fprintf(fp,"%d\n",getpid());
5647 fclose(fp);
5648 }
5649}
5650
5651int main(int argc, char **argv) {
5652 initServerConfig();
5653 if (argc == 2) {
5654 ResetServerSaveParams();
5655 loadServerConfig(argv[1]);
5656 } else if (argc > 2) {
5657 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
5658 exit(1);
8cca9b82 5659 } else {
5660 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
ed9b544e 5661 }
5662 initServer();
5663 if (server.daemonize) daemonize();
5664 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
b91cf5ef 5665#ifdef __linux__
5666 linuxOvercommitMemoryWarning();
5667#endif
f78fd11b 5668 if (rdbLoad(server.dbfilename) == REDIS_OK)
ed9b544e 5669 redisLog(REDIS_NOTICE,"DB loaded from disk");
5670 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
5671 acceptHandler, NULL, NULL) == AE_ERR) oom("creating file event");
46713f83 5672 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
ed9b544e 5673 aeMain(server.el);
5674 aeDeleteEventLoop(server.el);
5675 return 0;
5676}