]> git.saurik.com Git - redis.git/blame - redis.c
VM now swaps objects out while loading datasets not fitting into vm-max-memory bytes...
[redis.git] / redis.c
CommitLineData
ed9b544e 1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
5dc70bff 30#define REDIS_VERSION "1.3.2"
23d4709d 31
32#include "fmacros.h"
fbf9bcdb 33#include "config.h"
ed9b544e 34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <time.h>
39#include <unistd.h>
c9468bcf 40#define __USE_POSIX199309
ed9b544e 41#include <signal.h>
fbf9bcdb 42
43#ifdef HAVE_BACKTRACE
c9468bcf 44#include <execinfo.h>
45#include <ucontext.h>
fbf9bcdb 46#endif /* HAVE_BACKTRACE */
47
ed9b544e 48#include <sys/wait.h>
49#include <errno.h>
50#include <assert.h>
51#include <ctype.h>
52#include <stdarg.h>
53#include <inttypes.h>
54#include <arpa/inet.h>
55#include <sys/stat.h>
56#include <fcntl.h>
57#include <sys/time.h>
58#include <sys/resource.h>
2895e862 59#include <sys/uio.h>
f78fd11b 60#include <limits.h>
a7866db6 61#include <math.h>
0bc1b2f6 62
63#if defined(__sun)
5043dff3 64#include "solarisfixes.h"
65#endif
ed9b544e 66
c9468bcf 67#include "redis.h"
ed9b544e 68#include "ae.h" /* Event driven programming library */
69#include "sds.h" /* Dynamic safe strings */
70#include "anet.h" /* Networking the easy way */
71#include "dict.h" /* Hash tables */
72#include "adlist.h" /* Linked lists */
73#include "zmalloc.h" /* total memory usage aware version of malloc/free */
5f5b9840 74#include "lzf.h" /* LZF compression library */
75#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
ed9b544e 76
77/* Error codes */
78#define REDIS_OK 0
79#define REDIS_ERR -1
80
81/* Static server configuration */
82#define REDIS_SERVERPORT 6379 /* TCP port */
83#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
6208b3a7 84#define REDIS_IOBUF_LEN 1024
ed9b544e 85#define REDIS_LOADBUF_LEN 1024
93ea3759 86#define REDIS_STATIC_ARGS 4
ed9b544e 87#define REDIS_DEFAULT_DBNUM 16
88#define REDIS_CONFIGLINE_MAX 1024
89#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
90#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
94754ccc 91#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
6f376729 92#define REDIS_MAX_WRITE_PER_EVENT (1024*64)
2895e862 93#define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
94
95/* If more then REDIS_WRITEV_THRESHOLD write packets are pending use writev */
96#define REDIS_WRITEV_THRESHOLD 3
97/* Max number of iovecs used for each writev call */
98#define REDIS_WRITEV_IOVEC_COUNT 256
ed9b544e 99
100/* Hash table parameters */
101#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
ed9b544e 102
103/* Command flags */
3fd78bcd 104#define REDIS_CMD_BULK 1 /* Bulk write command */
105#define REDIS_CMD_INLINE 2 /* Inline command */
106/* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
107 this flags will return an error when the 'maxmemory' option is set in the
108 config file and the server is using more than maxmemory bytes of memory.
109 In short this commands are denied on low memory conditions. */
110#define REDIS_CMD_DENYOOM 4
ed9b544e 111
112/* Object types */
113#define REDIS_STRING 0
114#define REDIS_LIST 1
115#define REDIS_SET 2
1812e024 116#define REDIS_ZSET 3
117#define REDIS_HASH 4
f78fd11b 118
942a3961 119/* Objects encoding */
120#define REDIS_ENCODING_RAW 0 /* Raw representation */
121#define REDIS_ENCODING_INT 1 /* Encoded as integer */
122
f78fd11b 123/* Object types only used for dumping to disk */
bb32ede5 124#define REDIS_EXPIRETIME 253
ed9b544e 125#define REDIS_SELECTDB 254
126#define REDIS_EOF 255
127
f78fd11b 128/* Defines related to the dump file format. To store 32 bits lengths for short
129 * keys requires a lot of space, so we check the most significant 2 bits of
130 * the first byte to interpreter the length:
131 *
132 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
133 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
134 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
a4d1ba9a 135 * 11|000000 this means: specially encoded object will follow. The six bits
136 * number specify the kind of object that follows.
137 * See the REDIS_RDB_ENC_* defines.
f78fd11b 138 *
10c43610 139 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
140 * values, will fit inside. */
f78fd11b 141#define REDIS_RDB_6BITLEN 0
142#define REDIS_RDB_14BITLEN 1
143#define REDIS_RDB_32BITLEN 2
17be1a4a 144#define REDIS_RDB_ENCVAL 3
f78fd11b 145#define REDIS_RDB_LENERR UINT_MAX
146
a4d1ba9a 147/* When a length of a string object stored on disk has the first two bits
148 * set, the remaining two bits specify a special encoding for the object
149 * accordingly to the following defines: */
150#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
151#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
152#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
774e3047 153#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
a4d1ba9a 154
75680a3c 155/* Virtual memory object->where field. */
156#define REDIS_VM_MEMORY 0 /* The object is on memory */
157#define REDIS_VM_SWAPPED 1 /* The object is on disk */
158#define REDIS_VM_SWAPPING 2 /* Redis is swapping this object on disk */
159#define REDIS_VM_LOADING 3 /* Redis is loading this object from disk */
160
06224fec 161/* Virtual memory static configuration stuff.
162 * Check vmFindContiguousPages() to know more about this magic numbers. */
163#define REDIS_VM_MAX_NEAR_PAGES 65536
164#define REDIS_VM_MAX_RANDOM_JUMP 4096
165
ed9b544e 166/* Client flags */
167#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
168#define REDIS_SLAVE 2 /* This client is a slave server */
169#define REDIS_MASTER 4 /* This client is a master server */
87eca727 170#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
6e469882 171#define REDIS_MULTI 16 /* This client is in a MULTI context */
4409877e 172#define REDIS_BLOCKED 32 /* The client is waiting in a blocking operation */
ed9b544e 173
40d224a9 174/* Slave replication state - slave side */
ed9b544e 175#define REDIS_REPL_NONE 0 /* No active replication */
176#define REDIS_REPL_CONNECT 1 /* Must connect to master */
177#define REDIS_REPL_CONNECTED 2 /* Connected to master */
178
40d224a9 179/* Slave replication state - from the point of view of master
180 * Note that in SEND_BULK and ONLINE state the slave receives new updates
181 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
182 * to start the next background saving in order to send updates to it. */
183#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
184#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
185#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
186#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
187
ed9b544e 188/* List related stuff */
189#define REDIS_HEAD 0
190#define REDIS_TAIL 1
191
192/* Sort operations */
193#define REDIS_SORT_GET 0
443c6409 194#define REDIS_SORT_ASC 1
195#define REDIS_SORT_DESC 2
ed9b544e 196#define REDIS_SORTKEY_MAX 1024
197
198/* Log levels */
199#define REDIS_DEBUG 0
f870935d 200#define REDIS_VERBOSE 1
201#define REDIS_NOTICE 2
202#define REDIS_WARNING 3
ed9b544e 203
204/* Anti-warning macro... */
205#define REDIS_NOTUSED(V) ((void) V)
206
6b47e12e 207#define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
208#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
ed9b544e 209
48f0308a 210/* Append only defines */
211#define APPENDFSYNC_NO 0
212#define APPENDFSYNC_ALWAYS 1
213#define APPENDFSYNC_EVERYSEC 2
214
dfc5e96c 215/* We can print the stacktrace, so our assert is defined this way: */
216#define redisAssert(_e) ((_e)?(void)0 : (_redisAssert(#_e),exit(1)))
217static void _redisAssert(char *estr);
218
ed9b544e 219/*================================= Data types ============================== */
220
221/* A redis object, that is a type able to hold a string / list / set */
75680a3c 222
223/* The VM object structure */
224struct redisObjectVM {
3a66edc7 225 off_t page; /* the page at witch the object is stored on disk */
226 off_t usedpages; /* number of pages used on disk */
227 time_t atime; /* Last access time */
75680a3c 228} vm;
229
230/* The actual Redis Object */
ed9b544e 231typedef struct redisObject {
ed9b544e 232 void *ptr;
942a3961 233 unsigned char type;
234 unsigned char encoding;
d894161b 235 unsigned char storage; /* If this object is a key, where is the value?
236 * REDIS_VM_MEMORY, REDIS_VM_SWAPPED, ... */
237 unsigned char vtype; /* If this object is a key, and value is swapped out,
238 * this is the type of the swapped out object. */
ed9b544e 239 int refcount;
75680a3c 240 /* VM fields, this are only allocated if VM is active, otherwise the
241 * object allocation function will just allocate
242 * sizeof(redisObjct) minus sizeof(redisObjectVM), so using
243 * Redis without VM active will not have any overhead. */
244 struct redisObjectVM vm;
ed9b544e 245} robj;
246
dfc5e96c 247/* Macro used to initalize a Redis object allocated on the stack.
248 * Note that this macro is taken near the structure definition to make sure
249 * we'll update it when the structure is changed, to avoid bugs like
250 * bug #85 introduced exactly in this way. */
251#define initStaticStringObject(_var,_ptr) do { \
252 _var.refcount = 1; \
253 _var.type = REDIS_STRING; \
254 _var.encoding = REDIS_ENCODING_RAW; \
255 _var.ptr = _ptr; \
3a66edc7 256 if (server.vm_enabled) _var.storage = REDIS_VM_MEMORY; \
dfc5e96c 257} while(0);
258
3305306f 259typedef struct redisDb {
4409877e 260 dict *dict; /* The keyspace for this DB */
261 dict *expires; /* Timeout of keys with a timeout set */
262 dict *blockingkeys; /* Keys with clients waiting for data (BLPOP) */
3305306f 263 int id;
264} redisDb;
265
6e469882 266/* Client MULTI/EXEC state */
267typedef struct multiCmd {
268 robj **argv;
269 int argc;
270 struct redisCommand *cmd;
271} multiCmd;
272
273typedef struct multiState {
274 multiCmd *commands; /* Array of MULTI commands */
275 int count; /* Total number of MULTI commands */
276} multiState;
277
ed9b544e 278/* With multiplexing we need to take per-clinet state.
279 * Clients are taken in a liked list. */
280typedef struct redisClient {
281 int fd;
3305306f 282 redisDb *db;
ed9b544e 283 int dictid;
284 sds querybuf;
e8a74421 285 robj **argv, **mbargv;
286 int argc, mbargc;
40d224a9 287 int bulklen; /* bulk read len. -1 if not in bulk read mode */
e8a74421 288 int multibulk; /* multi bulk command format active */
ed9b544e 289 list *reply;
290 int sentlen;
291 time_t lastinteraction; /* time of the last interaction, used for timeout */
40d224a9 292 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
6e469882 293 /* REDIS_MULTI */
40d224a9 294 int slaveseldb; /* slave selected db, if this client is a slave */
295 int authenticated; /* when requirepass is non-NULL */
296 int replstate; /* replication state if this is a slave */
297 int repldbfd; /* replication DB file descriptor */
6e469882 298 long repldboff; /* replication DB file offset */
40d224a9 299 off_t repldbsize; /* replication DB file size */
6e469882 300 multiState mstate; /* MULTI/EXEC state */
b177fd30 301 robj **blockingkeys; /* The key we waiting to terminate a blocking
4409877e 302 * operation such as BLPOP. Otherwise NULL. */
b177fd30 303 int blockingkeysnum; /* Number of blocking keys */
4409877e 304 time_t blockingto; /* Blocking operation timeout. If UNIX current time
305 * is >= blockingto then the operation timed out. */
ed9b544e 306} redisClient;
307
308struct saveparam {
309 time_t seconds;
310 int changes;
311};
312
313/* Global server state structure */
314struct redisServer {
315 int port;
316 int fd;
3305306f 317 redisDb *db;
4409877e 318 dict *sharingpool; /* Poll used for object sharing */
10c43610 319 unsigned int sharingpoolsize;
ed9b544e 320 long long dirty; /* changes to DB from the last save */
321 list *clients;
87eca727 322 list *slaves, *monitors;
ed9b544e 323 char neterr[ANET_ERR_LEN];
324 aeEventLoop *el;
325 int cronloops; /* number of times the cron function run */
326 list *objfreelist; /* A list of freed objects to avoid malloc() */
327 time_t lastsave; /* Unix time of last save succeeede */
5fba9f71 328 size_t usedmemory; /* Used memory in megabytes */
ed9b544e 329 /* Fields used only for stats */
330 time_t stat_starttime; /* server start time */
331 long long stat_numcommands; /* number of processed commands */
332 long long stat_numconnections; /* number of connections received */
333 /* Configuration */
334 int verbosity;
335 int glueoutputbuf;
336 int maxidletime;
337 int dbnum;
338 int daemonize;
44b38ef4 339 int appendonly;
48f0308a 340 int appendfsync;
341 time_t lastfsync;
44b38ef4 342 int appendfd;
343 int appendseldb;
ed329fcf 344 char *pidfile;
9f3c422c 345 pid_t bgsavechildpid;
9d65a1bb 346 pid_t bgrewritechildpid;
347 sds bgrewritebuf; /* buffer taken by parent during oppend only rewrite */
ed9b544e 348 struct saveparam *saveparams;
349 int saveparamslen;
350 char *logfile;
351 char *bindaddr;
352 char *dbfilename;
44b38ef4 353 char *appendfilename;
abcb223e 354 char *requirepass;
10c43610 355 int shareobjects;
121f70cf 356 int rdbcompression;
ed9b544e 357 /* Replication related */
358 int isslave;
d0ccebcf 359 char *masterauth;
ed9b544e 360 char *masterhost;
361 int masterport;
40d224a9 362 redisClient *master; /* client that is master for this slave */
ed9b544e 363 int replstate;
285add55 364 unsigned int maxclients;
4ef8de8a 365 unsigned long long maxmemory;
f86a74e9 366 unsigned int blockedclients;
ed9b544e 367 /* Sort parameters - qsort_r() is only available under BSD so we
368 * have to take this state global, in order to pass it to sortCompare() */
369 int sort_desc;
370 int sort_alpha;
371 int sort_bypattern;
75680a3c 372 /* Virtual memory configuration */
373 int vm_enabled;
374 off_t vm_page_size;
375 off_t vm_pages;
4ef8de8a 376 unsigned long long vm_max_memory;
75680a3c 377 /* Virtual memory state */
378 FILE *vm_fp;
379 int vm_fd;
380 off_t vm_next_page; /* Next probably empty page */
381 off_t vm_near_pages; /* Number of pages allocated sequentially */
06224fec 382 unsigned char *vm_bitmap; /* Bitmap of free/used pages */
3a66edc7 383 time_t unixtime; /* Unix time sampled every second. */
7d98e08c 384 /* Virtual memory stats */
385 unsigned long long vm_stats_used_pages;
386 unsigned long long vm_stats_swapped_objects;
387 unsigned long long vm_stats_swapouts;
388 unsigned long long vm_stats_swapins;
ed9b544e 389};
390
391typedef void redisCommandProc(redisClient *c);
392struct redisCommand {
393 char *name;
394 redisCommandProc *proc;
395 int arity;
396 int flags;
397};
398
de96dbfe 399struct redisFunctionSym {
400 char *name;
56906eef 401 unsigned long pointer;
de96dbfe 402};
403
ed9b544e 404typedef struct _redisSortObject {
405 robj *obj;
406 union {
407 double score;
408 robj *cmpobj;
409 } u;
410} redisSortObject;
411
412typedef struct _redisSortOperation {
413 int type;
414 robj *pattern;
415} redisSortOperation;
416
6b47e12e 417/* ZSETs use a specialized version of Skiplists */
418
419typedef struct zskiplistNode {
420 struct zskiplistNode **forward;
e3870fab 421 struct zskiplistNode *backward;
6b47e12e 422 double score;
423 robj *obj;
424} zskiplistNode;
425
426typedef struct zskiplist {
e3870fab 427 struct zskiplistNode *header, *tail;
d13f767c 428 unsigned long length;
6b47e12e 429 int level;
430} zskiplist;
431
1812e024 432typedef struct zset {
433 dict *dict;
6b47e12e 434 zskiplist *zsl;
1812e024 435} zset;
436
6b47e12e 437/* Our shared "common" objects */
438
ed9b544e 439struct sharedObjectsStruct {
c937aa89 440 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
6e469882 441 *colon, *nullbulk, *nullmultibulk, *queued,
c937aa89 442 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
443 *outofrangeerr, *plus,
ed9b544e 444 *select0, *select1, *select2, *select3, *select4,
445 *select5, *select6, *select7, *select8, *select9;
446} shared;
447
a7866db6 448/* Global vars that are actally used as constants. The following double
449 * values are used for double on-disk serialization, and are initialized
450 * at runtime to avoid strange compiler optimizations. */
451
452static double R_Zero, R_PosInf, R_NegInf, R_Nan;
453
ed9b544e 454/*================================ Prototypes =============================== */
455
456static void freeStringObject(robj *o);
457static void freeListObject(robj *o);
458static void freeSetObject(robj *o);
459static void decrRefCount(void *o);
460static robj *createObject(int type, void *ptr);
461static void freeClient(redisClient *c);
f78fd11b 462static int rdbLoad(char *filename);
ed9b544e 463static void addReply(redisClient *c, robj *obj);
464static void addReplySds(redisClient *c, sds s);
465static void incrRefCount(robj *o);
f78fd11b 466static int rdbSaveBackground(char *filename);
ed9b544e 467static robj *createStringObject(char *ptr, size_t len);
4ef8de8a 468static robj *dupStringObject(robj *o);
87eca727 469static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
44b38ef4 470static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
ed9b544e 471static int syncWithMaster(void);
10c43610 472static robj *tryObjectSharing(robj *o);
942a3961 473static int tryObjectEncoding(robj *o);
9d65a1bb 474static robj *getDecodedObject(robj *o);
3305306f 475static int removeExpire(redisDb *db, robj *key);
476static int expireIfNeeded(redisDb *db, robj *key);
477static int deleteIfVolatile(redisDb *db, robj *key);
1b03836c 478static int deleteIfSwapped(redisDb *db, robj *key);
94754ccc 479static int deleteKey(redisDb *db, robj *key);
bb32ede5 480static time_t getExpire(redisDb *db, robj *key);
481static int setExpire(redisDb *db, robj *key, time_t when);
a3b21203 482static void updateSlavesWaitingBgsave(int bgsaveerr);
3fd78bcd 483static void freeMemoryIfNeeded(void);
de96dbfe 484static int processCommand(redisClient *c);
56906eef 485static void setupSigSegvAction(void);
a3b21203 486static void rdbRemoveTempFile(pid_t childpid);
9d65a1bb 487static void aofRemoveTempFile(pid_t childpid);
0ea663ea 488static size_t stringObjectLen(robj *o);
638e42ac 489static void processInputBuffer(redisClient *c);
6b47e12e 490static zskiplist *zslCreate(void);
fd8ccf44 491static void zslFree(zskiplist *zsl);
2b59cfdf 492static void zslInsert(zskiplist *zsl, double score, robj *obj);
2895e862 493static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask);
6e469882 494static void initClientMultiState(redisClient *c);
495static void freeClientMultiState(redisClient *c);
496static void queueMultiCommand(redisClient *c, struct redisCommand *cmd);
4409877e 497static void unblockClient(redisClient *c);
498static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele);
75680a3c 499static void vmInit(void);
a35ddf12 500static void vmMarkPagesFree(off_t page, off_t count);
55cf8433 501static robj *vmLoadObject(robj *key);
7e69548d 502static robj *vmPreviewObject(robj *key);
4ef8de8a 503static int vmSwapOneObject(void);
7e69548d 504static int vmCanSwapOut(void);
f870935d 505static void freeOneObjectFromFreelist(void);
ed9b544e 506
abcb223e 507static void authCommand(redisClient *c);
ed9b544e 508static void pingCommand(redisClient *c);
509static void echoCommand(redisClient *c);
510static void setCommand(redisClient *c);
511static void setnxCommand(redisClient *c);
512static void getCommand(redisClient *c);
513static void delCommand(redisClient *c);
514static void existsCommand(redisClient *c);
515static void incrCommand(redisClient *c);
516static void decrCommand(redisClient *c);
517static void incrbyCommand(redisClient *c);
518static void decrbyCommand(redisClient *c);
519static void selectCommand(redisClient *c);
520static void randomkeyCommand(redisClient *c);
521static void keysCommand(redisClient *c);
522static void dbsizeCommand(redisClient *c);
523static void lastsaveCommand(redisClient *c);
524static void saveCommand(redisClient *c);
525static void bgsaveCommand(redisClient *c);
9d65a1bb 526static void bgrewriteaofCommand(redisClient *c);
ed9b544e 527static void shutdownCommand(redisClient *c);
528static void moveCommand(redisClient *c);
529static void renameCommand(redisClient *c);
530static void renamenxCommand(redisClient *c);
531static void lpushCommand(redisClient *c);
532static void rpushCommand(redisClient *c);
533static void lpopCommand(redisClient *c);
534static void rpopCommand(redisClient *c);
535static void llenCommand(redisClient *c);
536static void lindexCommand(redisClient *c);
537static void lrangeCommand(redisClient *c);
538static void ltrimCommand(redisClient *c);
539static void typeCommand(redisClient *c);
540static void lsetCommand(redisClient *c);
541static void saddCommand(redisClient *c);
542static void sremCommand(redisClient *c);
a4460ef4 543static void smoveCommand(redisClient *c);
ed9b544e 544static void sismemberCommand(redisClient *c);
545static void scardCommand(redisClient *c);
12fea928 546static void spopCommand(redisClient *c);
2abb95a9 547static void srandmemberCommand(redisClient *c);
ed9b544e 548static void sinterCommand(redisClient *c);
549static void sinterstoreCommand(redisClient *c);
40d224a9 550static void sunionCommand(redisClient *c);
551static void sunionstoreCommand(redisClient *c);
f4f56e1d 552static void sdiffCommand(redisClient *c);
553static void sdiffstoreCommand(redisClient *c);
ed9b544e 554static void syncCommand(redisClient *c);
555static void flushdbCommand(redisClient *c);
556static void flushallCommand(redisClient *c);
557static void sortCommand(redisClient *c);
558static void lremCommand(redisClient *c);
0f5f7e9a 559static void rpoplpushcommand(redisClient *c);
ed9b544e 560static void infoCommand(redisClient *c);
70003d28 561static void mgetCommand(redisClient *c);
87eca727 562static void monitorCommand(redisClient *c);
3305306f 563static void expireCommand(redisClient *c);
802e8373 564static void expireatCommand(redisClient *c);
f6b141c5 565static void getsetCommand(redisClient *c);
fd88489a 566static void ttlCommand(redisClient *c);
321b0e13 567static void slaveofCommand(redisClient *c);
7f957c92 568static void debugCommand(redisClient *c);
f6b141c5 569static void msetCommand(redisClient *c);
570static void msetnxCommand(redisClient *c);
fd8ccf44 571static void zaddCommand(redisClient *c);
7db723ad 572static void zincrbyCommand(redisClient *c);
cc812361 573static void zrangeCommand(redisClient *c);
50c55df5 574static void zrangebyscoreCommand(redisClient *c);
e3870fab 575static void zrevrangeCommand(redisClient *c);
3c41331e 576static void zcardCommand(redisClient *c);
1b7106e7 577static void zremCommand(redisClient *c);
6e333bbe 578static void zscoreCommand(redisClient *c);
1807985b 579static void zremrangebyscoreCommand(redisClient *c);
6e469882 580static void multiCommand(redisClient *c);
581static void execCommand(redisClient *c);
4409877e 582static void blpopCommand(redisClient *c);
583static void brpopCommand(redisClient *c);
f6b141c5 584
ed9b544e 585/*================================= Globals ================================= */
586
587/* Global vars */
588static struct redisServer server; /* server global state */
589static struct redisCommand cmdTable[] = {
590 {"get",getCommand,2,REDIS_CMD_INLINE},
3fd78bcd 591 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
592 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
5109cdff 593 {"del",delCommand,-2,REDIS_CMD_INLINE},
ed9b544e 594 {"exists",existsCommand,2,REDIS_CMD_INLINE},
3fd78bcd 595 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
596 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
70003d28 597 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
3fd78bcd 598 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
599 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 600 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
601 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
b177fd30 602 {"brpop",brpopCommand,-3,REDIS_CMD_INLINE},
603 {"blpop",blpopCommand,-3,REDIS_CMD_INLINE},
ed9b544e 604 {"llen",llenCommand,2,REDIS_CMD_INLINE},
605 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
3fd78bcd 606 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 607 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
608 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
609 {"lrem",lremCommand,4,REDIS_CMD_BULK},
0b13687c 610 {"rpoplpush",rpoplpushcommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 611 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 612 {"srem",sremCommand,3,REDIS_CMD_BULK},
a4460ef4 613 {"smove",smoveCommand,4,REDIS_CMD_BULK},
ed9b544e 614 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
615 {"scard",scardCommand,2,REDIS_CMD_INLINE},
12fea928 616 {"spop",spopCommand,2,REDIS_CMD_INLINE},
2abb95a9 617 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
3fd78bcd 618 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
619 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
620 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
621 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
622 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
623 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 624 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
fd8ccf44 625 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
7db723ad 626 {"zincrby",zincrbyCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
1b7106e7 627 {"zrem",zremCommand,3,REDIS_CMD_BULK},
1807985b 628 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
752da584 629 {"zrange",zrangeCommand,-4,REDIS_CMD_INLINE},
80181f78 630 {"zrangebyscore",zrangebyscoreCommand,-4,REDIS_CMD_INLINE},
752da584 631 {"zrevrange",zrevrangeCommand,-4,REDIS_CMD_INLINE},
3c41331e 632 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
6e333bbe 633 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
3fd78bcd 634 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
635 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
f6b141c5 636 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
637 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
638 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
ed9b544e 639 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
640 {"select",selectCommand,2,REDIS_CMD_INLINE},
641 {"move",moveCommand,3,REDIS_CMD_INLINE},
642 {"rename",renameCommand,3,REDIS_CMD_INLINE},
643 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
321b0e13 644 {"expire",expireCommand,3,REDIS_CMD_INLINE},
802e8373 645 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
ed9b544e 646 {"keys",keysCommand,2,REDIS_CMD_INLINE},
647 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
abcb223e 648 {"auth",authCommand,2,REDIS_CMD_INLINE},
ed9b544e 649 {"ping",pingCommand,1,REDIS_CMD_INLINE},
650 {"echo",echoCommand,2,REDIS_CMD_BULK},
651 {"save",saveCommand,1,REDIS_CMD_INLINE},
652 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
9d65a1bb 653 {"bgrewriteaof",bgrewriteaofCommand,1,REDIS_CMD_INLINE},
ed9b544e 654 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
655 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
656 {"type",typeCommand,2,REDIS_CMD_INLINE},
6e469882 657 {"multi",multiCommand,1,REDIS_CMD_INLINE},
658 {"exec",execCommand,1,REDIS_CMD_INLINE},
ed9b544e 659 {"sync",syncCommand,1,REDIS_CMD_INLINE},
660 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
661 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
3fd78bcd 662 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
ed9b544e 663 {"info",infoCommand,1,REDIS_CMD_INLINE},
87eca727 664 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
fd88489a 665 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
321b0e13 666 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
7f957c92 667 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
ed9b544e 668 {NULL,NULL,0,0}
669};
bcfc686d 670
ed9b544e 671/*============================ Utility functions ============================ */
672
673/* Glob-style pattern matching. */
674int stringmatchlen(const char *pattern, int patternLen,
675 const char *string, int stringLen, int nocase)
676{
677 while(patternLen) {
678 switch(pattern[0]) {
679 case '*':
680 while (pattern[1] == '*') {
681 pattern++;
682 patternLen--;
683 }
684 if (patternLen == 1)
685 return 1; /* match */
686 while(stringLen) {
687 if (stringmatchlen(pattern+1, patternLen-1,
688 string, stringLen, nocase))
689 return 1; /* match */
690 string++;
691 stringLen--;
692 }
693 return 0; /* no match */
694 break;
695 case '?':
696 if (stringLen == 0)
697 return 0; /* no match */
698 string++;
699 stringLen--;
700 break;
701 case '[':
702 {
703 int not, match;
704
705 pattern++;
706 patternLen--;
707 not = pattern[0] == '^';
708 if (not) {
709 pattern++;
710 patternLen--;
711 }
712 match = 0;
713 while(1) {
714 if (pattern[0] == '\\') {
715 pattern++;
716 patternLen--;
717 if (pattern[0] == string[0])
718 match = 1;
719 } else if (pattern[0] == ']') {
720 break;
721 } else if (patternLen == 0) {
722 pattern--;
723 patternLen++;
724 break;
725 } else if (pattern[1] == '-' && patternLen >= 3) {
726 int start = pattern[0];
727 int end = pattern[2];
728 int c = string[0];
729 if (start > end) {
730 int t = start;
731 start = end;
732 end = t;
733 }
734 if (nocase) {
735 start = tolower(start);
736 end = tolower(end);
737 c = tolower(c);
738 }
739 pattern += 2;
740 patternLen -= 2;
741 if (c >= start && c <= end)
742 match = 1;
743 } else {
744 if (!nocase) {
745 if (pattern[0] == string[0])
746 match = 1;
747 } else {
748 if (tolower((int)pattern[0]) == tolower((int)string[0]))
749 match = 1;
750 }
751 }
752 pattern++;
753 patternLen--;
754 }
755 if (not)
756 match = !match;
757 if (!match)
758 return 0; /* no match */
759 string++;
760 stringLen--;
761 break;
762 }
763 case '\\':
764 if (patternLen >= 2) {
765 pattern++;
766 patternLen--;
767 }
768 /* fall through */
769 default:
770 if (!nocase) {
771 if (pattern[0] != string[0])
772 return 0; /* no match */
773 } else {
774 if (tolower((int)pattern[0]) != tolower((int)string[0]))
775 return 0; /* no match */
776 }
777 string++;
778 stringLen--;
779 break;
780 }
781 pattern++;
782 patternLen--;
783 if (stringLen == 0) {
784 while(*pattern == '*') {
785 pattern++;
786 patternLen--;
787 }
788 break;
789 }
790 }
791 if (patternLen == 0 && stringLen == 0)
792 return 1;
793 return 0;
794}
795
56906eef 796static void redisLog(int level, const char *fmt, ...) {
ed9b544e 797 va_list ap;
798 FILE *fp;
799
800 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
801 if (!fp) return;
802
803 va_start(ap, fmt);
804 if (level >= server.verbosity) {
805 char *c = ".-*";
1904ecc1 806 char buf[64];
807 time_t now;
808
809 now = time(NULL);
6c9385e0 810 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
1904ecc1 811 fprintf(fp,"%s %c ",buf,c[level]);
ed9b544e 812 vfprintf(fp, fmt, ap);
813 fprintf(fp,"\n");
814 fflush(fp);
815 }
816 va_end(ap);
817
818 if (server.logfile) fclose(fp);
819}
820
821/*====================== Hash table type implementation ==================== */
822
823/* This is an hash table type that uses the SDS dynamic strings libary as
824 * keys and radis objects as values (objects can hold SDS strings,
825 * lists, sets). */
826
1812e024 827static void dictVanillaFree(void *privdata, void *val)
828{
829 DICT_NOTUSED(privdata);
830 zfree(val);
831}
832
4409877e 833static void dictListDestructor(void *privdata, void *val)
834{
835 DICT_NOTUSED(privdata);
836 listRelease((list*)val);
837}
838
ed9b544e 839static int sdsDictKeyCompare(void *privdata, const void *key1,
840 const void *key2)
841{
842 int l1,l2;
843 DICT_NOTUSED(privdata);
844
845 l1 = sdslen((sds)key1);
846 l2 = sdslen((sds)key2);
847 if (l1 != l2) return 0;
848 return memcmp(key1, key2, l1) == 0;
849}
850
851static void dictRedisObjectDestructor(void *privdata, void *val)
852{
853 DICT_NOTUSED(privdata);
854
a35ddf12 855 if (val == NULL) return; /* Values of swapped out keys as set to NULL */
ed9b544e 856 decrRefCount(val);
857}
858
942a3961 859static int dictObjKeyCompare(void *privdata, const void *key1,
ed9b544e 860 const void *key2)
861{
862 const robj *o1 = key1, *o2 = key2;
863 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
864}
865
942a3961 866static unsigned int dictObjHash(const void *key) {
ed9b544e 867 const robj *o = key;
868 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
869}
870
942a3961 871static int dictEncObjKeyCompare(void *privdata, const void *key1,
872 const void *key2)
873{
9d65a1bb 874 robj *o1 = (robj*) key1, *o2 = (robj*) key2;
875 int cmp;
942a3961 876
9d65a1bb 877 o1 = getDecodedObject(o1);
878 o2 = getDecodedObject(o2);
879 cmp = sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
880 decrRefCount(o1);
881 decrRefCount(o2);
882 return cmp;
942a3961 883}
884
885static unsigned int dictEncObjHash(const void *key) {
9d65a1bb 886 robj *o = (robj*) key;
942a3961 887
9d65a1bb 888 o = getDecodedObject(o);
889 unsigned int hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
890 decrRefCount(o);
891 return hash;
942a3961 892}
893
ed9b544e 894static dictType setDictType = {
942a3961 895 dictEncObjHash, /* hash function */
ed9b544e 896 NULL, /* key dup */
897 NULL, /* val dup */
942a3961 898 dictEncObjKeyCompare, /* key compare */
ed9b544e 899 dictRedisObjectDestructor, /* key destructor */
900 NULL /* val destructor */
901};
902
1812e024 903static dictType zsetDictType = {
904 dictEncObjHash, /* hash function */
905 NULL, /* key dup */
906 NULL, /* val dup */
907 dictEncObjKeyCompare, /* key compare */
908 dictRedisObjectDestructor, /* key destructor */
da0a1620 909 dictVanillaFree /* val destructor of malloc(sizeof(double)) */
1812e024 910};
911
ed9b544e 912static dictType hashDictType = {
942a3961 913 dictObjHash, /* hash function */
ed9b544e 914 NULL, /* key dup */
915 NULL, /* val dup */
942a3961 916 dictObjKeyCompare, /* key compare */
ed9b544e 917 dictRedisObjectDestructor, /* key destructor */
918 dictRedisObjectDestructor /* val destructor */
919};
920
4409877e 921/* Keylist hash table type has unencoded redis objects as keys and
922 * lists as values. It's used for blocking operations (BLPOP) */
923static dictType keylistDictType = {
924 dictObjHash, /* hash function */
925 NULL, /* key dup */
926 NULL, /* val dup */
927 dictObjKeyCompare, /* key compare */
928 dictRedisObjectDestructor, /* key destructor */
929 dictListDestructor /* val destructor */
930};
931
ed9b544e 932/* ========================= Random utility functions ======================= */
933
934/* Redis generally does not try to recover from out of memory conditions
935 * when allocating objects or strings, it is not clear if it will be possible
936 * to report this condition to the client since the networking layer itself
937 * is based on heap allocation for send buffers, so we simply abort.
938 * At least the code will be simpler to read... */
939static void oom(const char *msg) {
71c54b21 940 redisLog(REDIS_WARNING, "%s: Out of memory\n",msg);
ed9b544e 941 sleep(1);
942 abort();
943}
944
945/* ====================== Redis server networking stuff ===================== */
56906eef 946static void closeTimedoutClients(void) {
ed9b544e 947 redisClient *c;
ed9b544e 948 listNode *ln;
949 time_t now = time(NULL);
950
6208b3a7 951 listRewind(server.clients);
952 while ((ln = listYield(server.clients)) != NULL) {
ed9b544e 953 c = listNodeValue(ln);
f86a74e9 954 if (server.maxidletime &&
955 !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
c7cf2ec9 956 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
f86a74e9 957 (now - c->lastinteraction > server.maxidletime))
958 {
f870935d 959 redisLog(REDIS_VERBOSE,"Closing idle client");
ed9b544e 960 freeClient(c);
f86a74e9 961 } else if (c->flags & REDIS_BLOCKED) {
58d976b8 962 if (c->blockingto != 0 && c->blockingto < now) {
b177fd30 963 addReply(c,shared.nullmultibulk);
f86a74e9 964 unblockClient(c);
965 }
ed9b544e 966 }
967 }
ed9b544e 968}
969
12fea928 970static int htNeedsResize(dict *dict) {
971 long long size, used;
972
973 size = dictSlots(dict);
974 used = dictSize(dict);
975 return (size && used && size > DICT_HT_INITIAL_SIZE &&
976 (used*100/size < REDIS_HT_MINFILL));
977}
978
0bc03378 979/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
980 * we resize the hash table to save memory */
56906eef 981static void tryResizeHashTables(void) {
0bc03378 982 int j;
983
984 for (j = 0; j < server.dbnum; j++) {
12fea928 985 if (htNeedsResize(server.db[j].dict)) {
f870935d 986 redisLog(REDIS_VERBOSE,"The hash table %d is too sparse, resize it...",j);
0bc03378 987 dictResize(server.db[j].dict);
f870935d 988 redisLog(REDIS_VERBOSE,"Hash table %d resized.",j);
0bc03378 989 }
12fea928 990 if (htNeedsResize(server.db[j].expires))
991 dictResize(server.db[j].expires);
0bc03378 992 }
993}
994
9d65a1bb 995/* A background saving child (BGSAVE) terminated its work. Handle this. */
996void backgroundSaveDoneHandler(int statloc) {
997 int exitcode = WEXITSTATUS(statloc);
998 int bysignal = WIFSIGNALED(statloc);
999
1000 if (!bysignal && exitcode == 0) {
1001 redisLog(REDIS_NOTICE,
1002 "Background saving terminated with success");
1003 server.dirty = 0;
1004 server.lastsave = time(NULL);
1005 } else if (!bysignal && exitcode != 0) {
1006 redisLog(REDIS_WARNING, "Background saving error");
1007 } else {
1008 redisLog(REDIS_WARNING,
1009 "Background saving terminated by signal");
1010 rdbRemoveTempFile(server.bgsavechildpid);
1011 }
1012 server.bgsavechildpid = -1;
1013 /* Possibly there are slaves waiting for a BGSAVE in order to be served
1014 * (the first stage of SYNC is a bulk transfer of dump.rdb) */
1015 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
1016}
1017
1018/* A background append only file rewriting (BGREWRITEAOF) terminated its work.
1019 * Handle this. */
1020void backgroundRewriteDoneHandler(int statloc) {
1021 int exitcode = WEXITSTATUS(statloc);
1022 int bysignal = WIFSIGNALED(statloc);
1023
1024 if (!bysignal && exitcode == 0) {
1025 int fd;
1026 char tmpfile[256];
1027
1028 redisLog(REDIS_NOTICE,
1029 "Background append only file rewriting terminated with success");
1030 /* Now it's time to flush the differences accumulated by the parent */
1031 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) server.bgrewritechildpid);
1032 fd = open(tmpfile,O_WRONLY|O_APPEND);
1033 if (fd == -1) {
1034 redisLog(REDIS_WARNING, "Not able to open the temp append only file produced by the child: %s", strerror(errno));
1035 goto cleanup;
1036 }
1037 /* Flush our data... */
1038 if (write(fd,server.bgrewritebuf,sdslen(server.bgrewritebuf)) !=
1039 (signed) sdslen(server.bgrewritebuf)) {
1040 redisLog(REDIS_WARNING, "Error or short write trying to flush the parent diff of the append log file in the child temp file: %s", strerror(errno));
1041 close(fd);
1042 goto cleanup;
1043 }
b32627cd 1044 redisLog(REDIS_NOTICE,"Parent diff flushed into the new append log file with success (%lu bytes)",sdslen(server.bgrewritebuf));
9d65a1bb 1045 /* Now our work is to rename the temp file into the stable file. And
1046 * switch the file descriptor used by the server for append only. */
1047 if (rename(tmpfile,server.appendfilename) == -1) {
1048 redisLog(REDIS_WARNING,"Can't rename the temp append only file into the stable one: %s", strerror(errno));
1049 close(fd);
1050 goto cleanup;
1051 }
1052 /* Mission completed... almost */
1053 redisLog(REDIS_NOTICE,"Append only file successfully rewritten.");
1054 if (server.appendfd != -1) {
1055 /* If append only is actually enabled... */
1056 close(server.appendfd);
1057 server.appendfd = fd;
1058 fsync(fd);
85a83172 1059 server.appendseldb = -1; /* Make sure it will issue SELECT */
9d65a1bb 1060 redisLog(REDIS_NOTICE,"The new append only file was selected for future appends.");
1061 } else {
1062 /* If append only is disabled we just generate a dump in this
1063 * format. Why not? */
1064 close(fd);
1065 }
1066 } else if (!bysignal && exitcode != 0) {
1067 redisLog(REDIS_WARNING, "Background append only file rewriting error");
1068 } else {
1069 redisLog(REDIS_WARNING,
1070 "Background append only file rewriting terminated by signal");
1071 }
1072cleanup:
1073 sdsfree(server.bgrewritebuf);
1074 server.bgrewritebuf = sdsempty();
1075 aofRemoveTempFile(server.bgrewritechildpid);
1076 server.bgrewritechildpid = -1;
1077}
1078
56906eef 1079static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
94754ccc 1080 int j, loops = server.cronloops++;
ed9b544e 1081 REDIS_NOTUSED(eventLoop);
1082 REDIS_NOTUSED(id);
1083 REDIS_NOTUSED(clientData);
1084
3a66edc7 1085 /* We take a cached value of the unix time in the global state because
1086 * with virtual memory and aging there is to store the current time
1087 * in objects at every object access, and accuracy is not needed.
1088 * To access a global var is faster than calling time(NULL) */
1089 server.unixtime = time(NULL);
1090
ed9b544e 1091 /* Update the global state with the amount of used memory */
1092 server.usedmemory = zmalloc_used_memory();
1093
0bc03378 1094 /* Show some info about non-empty databases */
ed9b544e 1095 for (j = 0; j < server.dbnum; j++) {
dec423d9 1096 long long size, used, vkeys;
94754ccc 1097
3305306f 1098 size = dictSlots(server.db[j].dict);
1099 used = dictSize(server.db[j].dict);
94754ccc 1100 vkeys = dictSize(server.db[j].expires);
c3cb078d 1101 if (!(loops % 5) && (used || vkeys)) {
f870935d 1102 redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
a4d1ba9a 1103 /* dictPrintStats(server.dict); */
ed9b544e 1104 }
ed9b544e 1105 }
1106
0bc03378 1107 /* We don't want to resize the hash tables while a bacground saving
1108 * is in progress: the saving child is created using fork() that is
1109 * implemented with a copy-on-write semantic in most modern systems, so
1110 * if we resize the HT while there is the saving child at work actually
1111 * a lot of memory movements in the parent will cause a lot of pages
1112 * copied. */
9d65a1bb 1113 if (server.bgsavechildpid == -1) tryResizeHashTables();
0bc03378 1114
ed9b544e 1115 /* Show information about connected clients */
1116 if (!(loops % 5)) {
f870935d 1117 redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
ed9b544e 1118 listLength(server.clients)-listLength(server.slaves),
1119 listLength(server.slaves),
10c43610 1120 server.usedmemory,
3305306f 1121 dictSize(server.sharingpool));
ed9b544e 1122 }
1123
1124 /* Close connections of timedout clients */
f86a74e9 1125 if ((server.maxidletime && !(loops % 10)) || server.blockedclients)
ed9b544e 1126 closeTimedoutClients();
1127
9d65a1bb 1128 /* Check if a background saving or AOF rewrite in progress terminated */
1129 if (server.bgsavechildpid != -1 || server.bgrewritechildpid != -1) {
ed9b544e 1130 int statloc;
9d65a1bb 1131 pid_t pid;
1132
1133 if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
1134 if (pid == server.bgsavechildpid) {
1135 backgroundSaveDoneHandler(statloc);
ed9b544e 1136 } else {
9d65a1bb 1137 backgroundRewriteDoneHandler(statloc);
ed9b544e 1138 }
ed9b544e 1139 }
1140 } else {
1141 /* If there is not a background saving in progress check if
1142 * we have to save now */
1143 time_t now = time(NULL);
1144 for (j = 0; j < server.saveparamslen; j++) {
1145 struct saveparam *sp = server.saveparams+j;
1146
1147 if (server.dirty >= sp->changes &&
1148 now-server.lastsave > sp->seconds) {
1149 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
1150 sp->changes, sp->seconds);
f78fd11b 1151 rdbSaveBackground(server.dbfilename);
ed9b544e 1152 break;
1153 }
1154 }
1155 }
94754ccc 1156
f2324293 1157 /* Try to expire a few timed out keys. The algorithm used is adaptive and
1158 * will use few CPU cycles if there are few expiring keys, otherwise
1159 * it will get more aggressive to avoid that too much memory is used by
1160 * keys that can be removed from the keyspace. */
94754ccc 1161 for (j = 0; j < server.dbnum; j++) {
f2324293 1162 int expired;
94754ccc 1163 redisDb *db = server.db+j;
94754ccc 1164
f2324293 1165 /* Continue to expire if at the end of the cycle more than 25%
1166 * of the keys were expired. */
1167 do {
4ef8de8a 1168 long num = dictSize(db->expires);
94754ccc 1169 time_t now = time(NULL);
1170
f2324293 1171 expired = 0;
94754ccc 1172 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
1173 num = REDIS_EXPIRELOOKUPS_PER_CRON;
1174 while (num--) {
1175 dictEntry *de;
1176 time_t t;
1177
1178 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
1179 t = (time_t) dictGetEntryVal(de);
1180 if (now > t) {
1181 deleteKey(db,dictGetEntryKey(de));
f2324293 1182 expired++;
94754ccc 1183 }
1184 }
f2324293 1185 } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4);
94754ccc 1186 }
1187
4ef8de8a 1188 /* Swap a few keys on disk if we are over the memory limit and VM
f870935d 1189 * is enbled. Try to free objects from the free list first. */
7e69548d 1190 if (vmCanSwapOut()) {
1191 while (server.vm_enabled && zmalloc_used_memory() >
f870935d 1192 server.vm_max_memory)
1193 {
1194 if (listLength(server.objfreelist)) {
1195 freeOneObjectFromFreelist();
1196 } else if (vmSwapOneObject() == REDIS_ERR) {
1197 if ((loops % 30) == 0 && zmalloc_used_memory() >
7e69548d 1198 (server.vm_max_memory+server.vm_max_memory/10)) {
1199 redisLog(REDIS_WARNING,"WARNING: vm-max-memory limit exceeded by more than 10%% but unable to swap more objects out!");
1200 }
1201 break;
1202 }
4ef8de8a 1203 }
1204 }
1205
ed9b544e 1206 /* Check if we should connect to a MASTER */
1207 if (server.replstate == REDIS_REPL_CONNECT) {
1208 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
1209 if (syncWithMaster() == REDIS_OK) {
1210 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
1211 }
1212 }
1213 return 1000;
1214}
1215
1216static void createSharedObjects(void) {
1217 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
1218 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
1219 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
c937aa89 1220 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
1221 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
1222 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
1223 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
1224 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
1225 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
ed9b544e 1226 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
6e469882 1227 shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n"));
ed9b544e 1228 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
1229 "-ERR Operation against a key holding the wrong kind of value\r\n"));
ed9b544e 1230 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
1231 "-ERR no such key\r\n"));
ed9b544e 1232 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
1233 "-ERR syntax error\r\n"));
c937aa89 1234 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
1235 "-ERR source and destination objects are the same\r\n"));
1236 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
1237 "-ERR index out of range\r\n"));
ed9b544e 1238 shared.space = createObject(REDIS_STRING,sdsnew(" "));
c937aa89 1239 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
1240 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
ed9b544e 1241 shared.select0 = createStringObject("select 0\r\n",10);
1242 shared.select1 = createStringObject("select 1\r\n",10);
1243 shared.select2 = createStringObject("select 2\r\n",10);
1244 shared.select3 = createStringObject("select 3\r\n",10);
1245 shared.select4 = createStringObject("select 4\r\n",10);
1246 shared.select5 = createStringObject("select 5\r\n",10);
1247 shared.select6 = createStringObject("select 6\r\n",10);
1248 shared.select7 = createStringObject("select 7\r\n",10);
1249 shared.select8 = createStringObject("select 8\r\n",10);
1250 shared.select9 = createStringObject("select 9\r\n",10);
1251}
1252
1253static void appendServerSaveParams(time_t seconds, int changes) {
1254 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
ed9b544e 1255 server.saveparams[server.saveparamslen].seconds = seconds;
1256 server.saveparams[server.saveparamslen].changes = changes;
1257 server.saveparamslen++;
1258}
1259
bcfc686d 1260static void resetServerSaveParams() {
ed9b544e 1261 zfree(server.saveparams);
1262 server.saveparams = NULL;
1263 server.saveparamslen = 0;
1264}
1265
1266static void initServerConfig() {
1267 server.dbnum = REDIS_DEFAULT_DBNUM;
1268 server.port = REDIS_SERVERPORT;
f870935d 1269 server.verbosity = REDIS_VERBOSE;
ed9b544e 1270 server.maxidletime = REDIS_MAXIDLETIME;
1271 server.saveparams = NULL;
1272 server.logfile = NULL; /* NULL = log on standard output */
1273 server.bindaddr = NULL;
1274 server.glueoutputbuf = 1;
1275 server.daemonize = 0;
44b38ef4 1276 server.appendonly = 0;
4e141d5a 1277 server.appendfsync = APPENDFSYNC_ALWAYS;
48f0308a 1278 server.lastfsync = time(NULL);
44b38ef4 1279 server.appendfd = -1;
1280 server.appendseldb = -1; /* Make sure the first time will not match */
ed329fcf 1281 server.pidfile = "/var/run/redis.pid";
ed9b544e 1282 server.dbfilename = "dump.rdb";
9d65a1bb 1283 server.appendfilename = "appendonly.aof";
abcb223e 1284 server.requirepass = NULL;
10c43610 1285 server.shareobjects = 0;
b0553789 1286 server.rdbcompression = 1;
21aecf4b 1287 server.sharingpoolsize = 1024;
285add55 1288 server.maxclients = 0;
f86a74e9 1289 server.blockedclients = 0;
3fd78bcd 1290 server.maxmemory = 0;
75680a3c 1291 server.vm_enabled = 0;
1292 server.vm_page_size = 256; /* 256 bytes per page */
1293 server.vm_pages = 1024*1024*100; /* 104 millions of pages */
1294 server.vm_max_memory = 1024LL*1024*1024*1; /* 1 GB of RAM */
1295
bcfc686d 1296 resetServerSaveParams();
ed9b544e 1297
1298 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1299 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1300 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1301 /* Replication related */
1302 server.isslave = 0;
d0ccebcf 1303 server.masterauth = NULL;
ed9b544e 1304 server.masterhost = NULL;
1305 server.masterport = 6379;
1306 server.master = NULL;
1307 server.replstate = REDIS_REPL_NONE;
a7866db6 1308
1309 /* Double constants initialization */
1310 R_Zero = 0.0;
1311 R_PosInf = 1.0/R_Zero;
1312 R_NegInf = -1.0/R_Zero;
1313 R_Nan = R_Zero/R_Zero;
ed9b544e 1314}
1315
1316static void initServer() {
1317 int j;
1318
1319 signal(SIGHUP, SIG_IGN);
1320 signal(SIGPIPE, SIG_IGN);
fe3bbfbe 1321 setupSigSegvAction();
ed9b544e 1322
1323 server.clients = listCreate();
1324 server.slaves = listCreate();
87eca727 1325 server.monitors = listCreate();
ed9b544e 1326 server.objfreelist = listCreate();
1327 createSharedObjects();
1328 server.el = aeCreateEventLoop();
3305306f 1329 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
10c43610 1330 server.sharingpool = dictCreate(&setDictType,NULL);
ed9b544e 1331 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1332 if (server.fd == -1) {
1333 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1334 exit(1);
1335 }
3305306f 1336 for (j = 0; j < server.dbnum; j++) {
1337 server.db[j].dict = dictCreate(&hashDictType,NULL);
1338 server.db[j].expires = dictCreate(&setDictType,NULL);
4409877e 1339 server.db[j].blockingkeys = dictCreate(&keylistDictType,NULL);
3305306f 1340 server.db[j].id = j;
1341 }
ed9b544e 1342 server.cronloops = 0;
9f3c422c 1343 server.bgsavechildpid = -1;
9d65a1bb 1344 server.bgrewritechildpid = -1;
1345 server.bgrewritebuf = sdsempty();
ed9b544e 1346 server.lastsave = time(NULL);
1347 server.dirty = 0;
1348 server.usedmemory = 0;
1349 server.stat_numcommands = 0;
1350 server.stat_numconnections = 0;
1351 server.stat_starttime = time(NULL);
3a66edc7 1352 server.unixtime = time(NULL);
d8f8b666 1353 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
44b38ef4 1354
1355 if (server.appendonly) {
71eba477 1356 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
44b38ef4 1357 if (server.appendfd == -1) {
1358 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1359 strerror(errno));
1360 exit(1);
1361 }
1362 }
75680a3c 1363
1364 if (server.vm_enabled) vmInit();
ed9b544e 1365}
1366
1367/* Empty the whole database */
ca37e9cd 1368static long long emptyDb() {
ed9b544e 1369 int j;
ca37e9cd 1370 long long removed = 0;
ed9b544e 1371
3305306f 1372 for (j = 0; j < server.dbnum; j++) {
ca37e9cd 1373 removed += dictSize(server.db[j].dict);
3305306f 1374 dictEmpty(server.db[j].dict);
1375 dictEmpty(server.db[j].expires);
1376 }
ca37e9cd 1377 return removed;
ed9b544e 1378}
1379
85dd2f3a 1380static int yesnotoi(char *s) {
1381 if (!strcasecmp(s,"yes")) return 1;
1382 else if (!strcasecmp(s,"no")) return 0;
1383 else return -1;
1384}
1385
ed9b544e 1386/* I agree, this is a very rudimental way to load a configuration...
1387 will improve later if the config gets more complex */
1388static void loadServerConfig(char *filename) {
c9a111ac 1389 FILE *fp;
ed9b544e 1390 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1391 int linenum = 0;
1392 sds line = NULL;
c9a111ac 1393
1394 if (filename[0] == '-' && filename[1] == '\0')
1395 fp = stdin;
1396 else {
1397 if ((fp = fopen(filename,"r")) == NULL) {
1398 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1399 exit(1);
1400 }
ed9b544e 1401 }
c9a111ac 1402
ed9b544e 1403 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1404 sds *argv;
1405 int argc, j;
1406
1407 linenum++;
1408 line = sdsnew(buf);
1409 line = sdstrim(line," \t\r\n");
1410
1411 /* Skip comments and blank lines*/
1412 if (line[0] == '#' || line[0] == '\0') {
1413 sdsfree(line);
1414 continue;
1415 }
1416
1417 /* Split into arguments */
1418 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1419 sdstolower(argv[0]);
1420
1421 /* Execute config directives */
bb0b03a3 1422 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
ed9b544e 1423 server.maxidletime = atoi(argv[1]);
0150db36 1424 if (server.maxidletime < 0) {
ed9b544e 1425 err = "Invalid timeout value"; goto loaderr;
1426 }
bb0b03a3 1427 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
ed9b544e 1428 server.port = atoi(argv[1]);
1429 if (server.port < 1 || server.port > 65535) {
1430 err = "Invalid port"; goto loaderr;
1431 }
bb0b03a3 1432 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
ed9b544e 1433 server.bindaddr = zstrdup(argv[1]);
bb0b03a3 1434 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
ed9b544e 1435 int seconds = atoi(argv[1]);
1436 int changes = atoi(argv[2]);
1437 if (seconds < 1 || changes < 0) {
1438 err = "Invalid save parameters"; goto loaderr;
1439 }
1440 appendServerSaveParams(seconds,changes);
bb0b03a3 1441 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
ed9b544e 1442 if (chdir(argv[1]) == -1) {
1443 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1444 argv[1], strerror(errno));
1445 exit(1);
1446 }
bb0b03a3 1447 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1448 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
f870935d 1449 else if (!strcasecmp(argv[1],"verbose")) server.verbosity = REDIS_VERBOSE;
bb0b03a3 1450 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1451 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
ed9b544e 1452 else {
1453 err = "Invalid log level. Must be one of debug, notice, warning";
1454 goto loaderr;
1455 }
bb0b03a3 1456 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
c9a111ac 1457 FILE *logfp;
ed9b544e 1458
1459 server.logfile = zstrdup(argv[1]);
bb0b03a3 1460 if (!strcasecmp(server.logfile,"stdout")) {
ed9b544e 1461 zfree(server.logfile);
1462 server.logfile = NULL;
1463 }
1464 if (server.logfile) {
1465 /* Test if we are able to open the file. The server will not
1466 * be able to abort just for this problem later... */
c9a111ac 1467 logfp = fopen(server.logfile,"a");
1468 if (logfp == NULL) {
ed9b544e 1469 err = sdscatprintf(sdsempty(),
1470 "Can't open the log file: %s", strerror(errno));
1471 goto loaderr;
1472 }
c9a111ac 1473 fclose(logfp);
ed9b544e 1474 }
bb0b03a3 1475 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
ed9b544e 1476 server.dbnum = atoi(argv[1]);
1477 if (server.dbnum < 1) {
1478 err = "Invalid number of databases"; goto loaderr;
1479 }
285add55 1480 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1481 server.maxclients = atoi(argv[1]);
3fd78bcd 1482 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
d4465900 1483 server.maxmemory = strtoll(argv[1], NULL, 10);
bb0b03a3 1484 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
ed9b544e 1485 server.masterhost = sdsnew(argv[1]);
1486 server.masterport = atoi(argv[2]);
1487 server.replstate = REDIS_REPL_CONNECT;
d0ccebcf 1488 } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
1489 server.masterauth = zstrdup(argv[1]);
bb0b03a3 1490 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
85dd2f3a 1491 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
ed9b544e 1492 err = "argument must be 'yes' or 'no'"; goto loaderr;
1493 }
bb0b03a3 1494 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
85dd2f3a 1495 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
10c43610 1496 err = "argument must be 'yes' or 'no'"; goto loaderr;
1497 }
121f70cf 1498 } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) {
1499 if ((server.rdbcompression = yesnotoi(argv[1])) == -1) {
1500 err = "argument must be 'yes' or 'no'"; goto loaderr;
1501 }
e52c65b9 1502 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1503 server.sharingpoolsize = atoi(argv[1]);
1504 if (server.sharingpoolsize < 1) {
1505 err = "invalid object sharing pool size"; goto loaderr;
1506 }
bb0b03a3 1507 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
85dd2f3a 1508 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
ed9b544e 1509 err = "argument must be 'yes' or 'no'"; goto loaderr;
1510 }
44b38ef4 1511 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1512 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1513 err = "argument must be 'yes' or 'no'"; goto loaderr;
1514 }
48f0308a 1515 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1766c6da 1516 if (!strcasecmp(argv[1],"no")) {
48f0308a 1517 server.appendfsync = APPENDFSYNC_NO;
1766c6da 1518 } else if (!strcasecmp(argv[1],"always")) {
48f0308a 1519 server.appendfsync = APPENDFSYNC_ALWAYS;
1766c6da 1520 } else if (!strcasecmp(argv[1],"everysec")) {
48f0308a 1521 server.appendfsync = APPENDFSYNC_EVERYSEC;
1522 } else {
1523 err = "argument must be 'no', 'always' or 'everysec'";
1524 goto loaderr;
1525 }
bb0b03a3 1526 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
abcb223e 1527 server.requirepass = zstrdup(argv[1]);
bb0b03a3 1528 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
ed329fcf 1529 server.pidfile = zstrdup(argv[1]);
bb0b03a3 1530 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
b8b553c8 1531 server.dbfilename = zstrdup(argv[1]);
75680a3c 1532 } else if (!strcasecmp(argv[0],"vm-enabled") && argc == 2) {
1533 if ((server.vm_enabled = yesnotoi(argv[1])) == -1) {
1534 err = "argument must be 'yes' or 'no'"; goto loaderr;
1535 }
4ef8de8a 1536 } else if (!strcasecmp(argv[0],"vm-max-memory") && argc == 2) {
1537 server.vm_max_memory = strtoll(argv[1], NULL, 10);
1538 } else if (!strcasecmp(argv[0],"vm-page-size") && argc == 2) {
1539 server.vm_page_size = strtoll(argv[1], NULL, 10);
1540 } else if (!strcasecmp(argv[0],"vm-pages") && argc == 2) {
1541 server.vm_pages = strtoll(argv[1], NULL, 10);
ed9b544e 1542 } else {
1543 err = "Bad directive or wrong number of arguments"; goto loaderr;
1544 }
1545 for (j = 0; j < argc; j++)
1546 sdsfree(argv[j]);
1547 zfree(argv);
1548 sdsfree(line);
1549 }
c9a111ac 1550 if (fp != stdin) fclose(fp);
ed9b544e 1551 return;
1552
1553loaderr:
1554 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1555 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1556 fprintf(stderr, ">>> '%s'\n", line);
1557 fprintf(stderr, "%s\n", err);
1558 exit(1);
1559}
1560
1561static void freeClientArgv(redisClient *c) {
1562 int j;
1563
1564 for (j = 0; j < c->argc; j++)
1565 decrRefCount(c->argv[j]);
e8a74421 1566 for (j = 0; j < c->mbargc; j++)
1567 decrRefCount(c->mbargv[j]);
ed9b544e 1568 c->argc = 0;
e8a74421 1569 c->mbargc = 0;
ed9b544e 1570}
1571
1572static void freeClient(redisClient *c) {
1573 listNode *ln;
1574
4409877e 1575 /* Note that if the client we are freeing is blocked into a blocking
1576 * call, we have to set querybuf to NULL *before* to call unblockClient()
1577 * to avoid processInputBuffer() will get called. Also it is important
1578 * to remove the file events after this, because this call adds
1579 * the READABLE event. */
1580 sdsfree(c->querybuf);
1581 c->querybuf = NULL;
1582 if (c->flags & REDIS_BLOCKED)
1583 unblockClient(c);
1584
ed9b544e 1585 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1586 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
ed9b544e 1587 listRelease(c->reply);
1588 freeClientArgv(c);
1589 close(c->fd);
1590 ln = listSearchKey(server.clients,c);
dfc5e96c 1591 redisAssert(ln != NULL);
ed9b544e 1592 listDelNode(server.clients,ln);
1593 if (c->flags & REDIS_SLAVE) {
6208b3a7 1594 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1595 close(c->repldbfd);
87eca727 1596 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1597 ln = listSearchKey(l,c);
dfc5e96c 1598 redisAssert(ln != NULL);
87eca727 1599 listDelNode(l,ln);
ed9b544e 1600 }
1601 if (c->flags & REDIS_MASTER) {
1602 server.master = NULL;
1603 server.replstate = REDIS_REPL_CONNECT;
1604 }
93ea3759 1605 zfree(c->argv);
e8a74421 1606 zfree(c->mbargv);
6e469882 1607 freeClientMultiState(c);
ed9b544e 1608 zfree(c);
1609}
1610
cc30e368 1611#define GLUEREPLY_UP_TO (1024)
ed9b544e 1612static void glueReplyBuffersIfNeeded(redisClient *c) {
c28b42ac 1613 int copylen = 0;
1614 char buf[GLUEREPLY_UP_TO];
6208b3a7 1615 listNode *ln;
ed9b544e 1616 robj *o;
1617
6208b3a7 1618 listRewind(c->reply);
1619 while((ln = listYield(c->reply))) {
c28b42ac 1620 int objlen;
1621
ed9b544e 1622 o = ln->value;
c28b42ac 1623 objlen = sdslen(o->ptr);
1624 if (copylen + objlen <= GLUEREPLY_UP_TO) {
1625 memcpy(buf+copylen,o->ptr,objlen);
1626 copylen += objlen;
ed9b544e 1627 listDelNode(c->reply,ln);
c28b42ac 1628 } else {
1629 if (copylen == 0) return;
1630 break;
ed9b544e 1631 }
ed9b544e 1632 }
c28b42ac 1633 /* Now the output buffer is empty, add the new single element */
1634 o = createObject(REDIS_STRING,sdsnewlen(buf,copylen));
1635 listAddNodeHead(c->reply,o);
ed9b544e 1636}
1637
1638static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1639 redisClient *c = privdata;
1640 int nwritten = 0, totwritten = 0, objlen;
1641 robj *o;
1642 REDIS_NOTUSED(el);
1643 REDIS_NOTUSED(mask);
1644
2895e862 1645 /* Use writev() if we have enough buffers to send */
7ea870c0 1646 if (!server.glueoutputbuf &&
1647 listLength(c->reply) > REDIS_WRITEV_THRESHOLD &&
1648 !(c->flags & REDIS_MASTER))
2895e862 1649 {
1650 sendReplyToClientWritev(el, fd, privdata, mask);
1651 return;
1652 }
2895e862 1653
ed9b544e 1654 while(listLength(c->reply)) {
c28b42ac 1655 if (server.glueoutputbuf && listLength(c->reply) > 1)
1656 glueReplyBuffersIfNeeded(c);
1657
ed9b544e 1658 o = listNodeValue(listFirst(c->reply));
1659 objlen = sdslen(o->ptr);
1660
1661 if (objlen == 0) {
1662 listDelNode(c->reply,listFirst(c->reply));
1663 continue;
1664 }
1665
1666 if (c->flags & REDIS_MASTER) {
6f376729 1667 /* Don't reply to a master */
ed9b544e 1668 nwritten = objlen - c->sentlen;
1669 } else {
a4d1ba9a 1670 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
ed9b544e 1671 if (nwritten <= 0) break;
1672 }
1673 c->sentlen += nwritten;
1674 totwritten += nwritten;
1675 /* If we fully sent the object on head go to the next one */
1676 if (c->sentlen == objlen) {
1677 listDelNode(c->reply,listFirst(c->reply));
1678 c->sentlen = 0;
1679 }
6f376729 1680 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
12f9d551 1681 * bytes, in a single threaded server it's a good idea to serve
6f376729 1682 * other clients as well, even if a very large request comes from
1683 * super fast link that is always able to accept data (in real world
12f9d551 1684 * scenario think about 'KEYS *' against the loopback interfae) */
6f376729 1685 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
ed9b544e 1686 }
1687 if (nwritten == -1) {
1688 if (errno == EAGAIN) {
1689 nwritten = 0;
1690 } else {
f870935d 1691 redisLog(REDIS_VERBOSE,
ed9b544e 1692 "Error writing to client: %s", strerror(errno));
1693 freeClient(c);
1694 return;
1695 }
1696 }
1697 if (totwritten > 0) c->lastinteraction = time(NULL);
1698 if (listLength(c->reply) == 0) {
1699 c->sentlen = 0;
1700 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1701 }
1702}
1703
2895e862 1704static void sendReplyToClientWritev(aeEventLoop *el, int fd, void *privdata, int mask)
1705{
1706 redisClient *c = privdata;
1707 int nwritten = 0, totwritten = 0, objlen, willwrite;
1708 robj *o;
1709 struct iovec iov[REDIS_WRITEV_IOVEC_COUNT];
1710 int offset, ion = 0;
1711 REDIS_NOTUSED(el);
1712 REDIS_NOTUSED(mask);
1713
1714 listNode *node;
1715 while (listLength(c->reply)) {
1716 offset = c->sentlen;
1717 ion = 0;
1718 willwrite = 0;
1719
1720 /* fill-in the iov[] array */
1721 for(node = listFirst(c->reply); node; node = listNextNode(node)) {
1722 o = listNodeValue(node);
1723 objlen = sdslen(o->ptr);
1724
1725 if (totwritten + objlen - offset > REDIS_MAX_WRITE_PER_EVENT)
1726 break;
1727
1728 if(ion == REDIS_WRITEV_IOVEC_COUNT)
1729 break; /* no more iovecs */
1730
1731 iov[ion].iov_base = ((char*)o->ptr) + offset;
1732 iov[ion].iov_len = objlen - offset;
1733 willwrite += objlen - offset;
1734 offset = 0; /* just for the first item */
1735 ion++;
1736 }
1737
1738 if(willwrite == 0)
1739 break;
1740
1741 /* write all collected blocks at once */
1742 if((nwritten = writev(fd, iov, ion)) < 0) {
1743 if (errno != EAGAIN) {
f870935d 1744 redisLog(REDIS_VERBOSE,
2895e862 1745 "Error writing to client: %s", strerror(errno));
1746 freeClient(c);
1747 return;
1748 }
1749 break;
1750 }
1751
1752 totwritten += nwritten;
1753 offset = c->sentlen;
1754
1755 /* remove written robjs from c->reply */
1756 while (nwritten && listLength(c->reply)) {
1757 o = listNodeValue(listFirst(c->reply));
1758 objlen = sdslen(o->ptr);
1759
1760 if(nwritten >= objlen - offset) {
1761 listDelNode(c->reply, listFirst(c->reply));
1762 nwritten -= objlen - offset;
1763 c->sentlen = 0;
1764 } else {
1765 /* partial write */
1766 c->sentlen += nwritten;
1767 break;
1768 }
1769 offset = 0;
1770 }
1771 }
1772
1773 if (totwritten > 0)
1774 c->lastinteraction = time(NULL);
1775
1776 if (listLength(c->reply) == 0) {
1777 c->sentlen = 0;
1778 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1779 }
1780}
1781
ed9b544e 1782static struct redisCommand *lookupCommand(char *name) {
1783 int j = 0;
1784 while(cmdTable[j].name != NULL) {
bb0b03a3 1785 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
ed9b544e 1786 j++;
1787 }
1788 return NULL;
1789}
1790
1791/* resetClient prepare the client to process the next command */
1792static void resetClient(redisClient *c) {
1793 freeClientArgv(c);
1794 c->bulklen = -1;
e8a74421 1795 c->multibulk = 0;
ed9b544e 1796}
1797
6e469882 1798/* Call() is the core of Redis execution of a command */
1799static void call(redisClient *c, struct redisCommand *cmd) {
1800 long long dirty;
1801
1802 dirty = server.dirty;
1803 cmd->proc(c);
1804 if (server.appendonly && server.dirty-dirty)
1805 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
1806 if (server.dirty-dirty && listLength(server.slaves))
1807 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
1808 if (listLength(server.monitors))
1809 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
1810 server.stat_numcommands++;
1811}
1812
ed9b544e 1813/* If this function gets called we already read a whole
1814 * command, argments are in the client argv/argc fields.
1815 * processCommand() execute the command or prepare the
1816 * server for a bulk read from the client.
1817 *
1818 * If 1 is returned the client is still alive and valid and
1819 * and other operations can be performed by the caller. Otherwise
1820 * if 0 is returned the client was destroied (i.e. after QUIT). */
1821static int processCommand(redisClient *c) {
1822 struct redisCommand *cmd;
ed9b544e 1823
3fd78bcd 1824 /* Free some memory if needed (maxmemory setting) */
1825 if (server.maxmemory) freeMemoryIfNeeded();
1826
e8a74421 1827 /* Handle the multi bulk command type. This is an alternative protocol
1828 * supported by Redis in order to receive commands that are composed of
1829 * multiple binary-safe "bulk" arguments. The latency of processing is
1830 * a bit higher but this allows things like multi-sets, so if this
1831 * protocol is used only for MSET and similar commands this is a big win. */
1832 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1833 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1834 if (c->multibulk <= 0) {
1835 resetClient(c);
1836 return 1;
1837 } else {
1838 decrRefCount(c->argv[c->argc-1]);
1839 c->argc--;
1840 return 1;
1841 }
1842 } else if (c->multibulk) {
1843 if (c->bulklen == -1) {
1844 if (((char*)c->argv[0]->ptr)[0] != '$') {
1845 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1846 resetClient(c);
1847 return 1;
1848 } else {
1849 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1850 decrRefCount(c->argv[0]);
1851 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1852 c->argc--;
1853 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1854 resetClient(c);
1855 return 1;
1856 }
1857 c->argc--;
1858 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1859 return 1;
1860 }
1861 } else {
1862 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1863 c->mbargv[c->mbargc] = c->argv[0];
1864 c->mbargc++;
1865 c->argc--;
1866 c->multibulk--;
1867 if (c->multibulk == 0) {
1868 robj **auxargv;
1869 int auxargc;
1870
1871 /* Here we need to swap the multi-bulk argc/argv with the
1872 * normal argc/argv of the client structure. */
1873 auxargv = c->argv;
1874 c->argv = c->mbargv;
1875 c->mbargv = auxargv;
1876
1877 auxargc = c->argc;
1878 c->argc = c->mbargc;
1879 c->mbargc = auxargc;
1880
1881 /* We need to set bulklen to something different than -1
1882 * in order for the code below to process the command without
1883 * to try to read the last argument of a bulk command as
1884 * a special argument. */
1885 c->bulklen = 0;
1886 /* continue below and process the command */
1887 } else {
1888 c->bulklen = -1;
1889 return 1;
1890 }
1891 }
1892 }
1893 /* -- end of multi bulk commands processing -- */
1894
ed9b544e 1895 /* The QUIT command is handled as a special case. Normal command
1896 * procs are unable to close the client connection safely */
bb0b03a3 1897 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
ed9b544e 1898 freeClient(c);
1899 return 0;
1900 }
1901 cmd = lookupCommand(c->argv[0]->ptr);
1902 if (!cmd) {
2c14807b 1903 addReplySds(c,
1904 sdscatprintf(sdsempty(), "-ERR unknown command '%s'\r\n",
1905 (char*)c->argv[0]->ptr));
ed9b544e 1906 resetClient(c);
1907 return 1;
1908 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1909 (c->argc < -cmd->arity)) {
454d4e43 1910 addReplySds(c,
1911 sdscatprintf(sdsempty(),
1912 "-ERR wrong number of arguments for '%s' command\r\n",
1913 cmd->name));
ed9b544e 1914 resetClient(c);
1915 return 1;
3fd78bcd 1916 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1917 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1918 resetClient(c);
1919 return 1;
ed9b544e 1920 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1921 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1922
1923 decrRefCount(c->argv[c->argc-1]);
1924 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1925 c->argc--;
1926 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1927 resetClient(c);
1928 return 1;
1929 }
1930 c->argc--;
1931 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1932 /* It is possible that the bulk read is already in the
8d0490e7 1933 * buffer. Check this condition and handle it accordingly.
1934 * This is just a fast path, alternative to call processInputBuffer().
1935 * It's a good idea since the code is small and this condition
1936 * happens most of the times. */
ed9b544e 1937 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1938 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1939 c->argc++;
1940 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1941 } else {
1942 return 1;
1943 }
1944 }
10c43610 1945 /* Let's try to share objects on the command arguments vector */
1946 if (server.shareobjects) {
1947 int j;
1948 for(j = 1; j < c->argc; j++)
1949 c->argv[j] = tryObjectSharing(c->argv[j]);
1950 }
942a3961 1951 /* Let's try to encode the bulk object to save space. */
1952 if (cmd->flags & REDIS_CMD_BULK)
1953 tryObjectEncoding(c->argv[c->argc-1]);
1954
e63943a4 1955 /* Check if the user is authenticated */
1956 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1957 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1958 resetClient(c);
1959 return 1;
1960 }
1961
ed9b544e 1962 /* Exec the command */
6e469882 1963 if (c->flags & REDIS_MULTI && cmd->proc != execCommand) {
1964 queueMultiCommand(c,cmd);
1965 addReply(c,shared.queued);
1966 } else {
1967 call(c,cmd);
1968 }
ed9b544e 1969
1970 /* Prepare the client for the next command */
1971 if (c->flags & REDIS_CLOSE) {
1972 freeClient(c);
1973 return 0;
1974 }
1975 resetClient(c);
1976 return 1;
1977}
1978
87eca727 1979static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6208b3a7 1980 listNode *ln;
ed9b544e 1981 int outc = 0, j;
93ea3759 1982 robj **outv;
1983 /* (args*2)+1 is enough room for args, spaces, newlines */
1984 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1985
1986 if (argc <= REDIS_STATIC_ARGS) {
1987 outv = static_outv;
1988 } else {
1989 outv = zmalloc(sizeof(robj*)*(argc*2+1));
93ea3759 1990 }
ed9b544e 1991
1992 for (j = 0; j < argc; j++) {
1993 if (j != 0) outv[outc++] = shared.space;
1994 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1995 robj *lenobj;
1996
1997 lenobj = createObject(REDIS_STRING,
682ac724 1998 sdscatprintf(sdsempty(),"%lu\r\n",
83c6a618 1999 (unsigned long) stringObjectLen(argv[j])));
ed9b544e 2000 lenobj->refcount = 0;
2001 outv[outc++] = lenobj;
2002 }
2003 outv[outc++] = argv[j];
2004 }
2005 outv[outc++] = shared.crlf;
2006
40d224a9 2007 /* Increment all the refcounts at start and decrement at end in order to
2008 * be sure to free objects if there is no slave in a replication state
2009 * able to be feed with commands */
2010 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
6208b3a7 2011 listRewind(slaves);
2012 while((ln = listYield(slaves))) {
ed9b544e 2013 redisClient *slave = ln->value;
40d224a9 2014
2015 /* Don't feed slaves that are still waiting for BGSAVE to start */
6208b3a7 2016 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
40d224a9 2017
2018 /* Feed all the other slaves, MONITORs and so on */
ed9b544e 2019 if (slave->slaveseldb != dictid) {
2020 robj *selectcmd;
2021
2022 switch(dictid) {
2023 case 0: selectcmd = shared.select0; break;
2024 case 1: selectcmd = shared.select1; break;
2025 case 2: selectcmd = shared.select2; break;
2026 case 3: selectcmd = shared.select3; break;
2027 case 4: selectcmd = shared.select4; break;
2028 case 5: selectcmd = shared.select5; break;
2029 case 6: selectcmd = shared.select6; break;
2030 case 7: selectcmd = shared.select7; break;
2031 case 8: selectcmd = shared.select8; break;
2032 case 9: selectcmd = shared.select9; break;
2033 default:
2034 selectcmd = createObject(REDIS_STRING,
2035 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
2036 selectcmd->refcount = 0;
2037 break;
2038 }
2039 addReply(slave,selectcmd);
2040 slave->slaveseldb = dictid;
2041 }
2042 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
ed9b544e 2043 }
40d224a9 2044 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
93ea3759 2045 if (outv != static_outv) zfree(outv);
ed9b544e 2046}
2047
638e42ac 2048static void processInputBuffer(redisClient *c) {
ed9b544e 2049again:
4409877e 2050 /* Before to process the input buffer, make sure the client is not
2051 * waitig for a blocking operation such as BLPOP. Note that the first
2052 * iteration the client is never blocked, otherwise the processInputBuffer
2053 * would not be called at all, but after the execution of the first commands
2054 * in the input buffer the client may be blocked, and the "goto again"
2055 * will try to reiterate. The following line will make it return asap. */
2056 if (c->flags & REDIS_BLOCKED) return;
ed9b544e 2057 if (c->bulklen == -1) {
2058 /* Read the first line of the query */
2059 char *p = strchr(c->querybuf,'\n');
2060 size_t querylen;
644fafa3 2061
ed9b544e 2062 if (p) {
2063 sds query, *argv;
2064 int argc, j;
2065
2066 query = c->querybuf;
2067 c->querybuf = sdsempty();
2068 querylen = 1+(p-(query));
2069 if (sdslen(query) > querylen) {
2070 /* leave data after the first line of the query in the buffer */
2071 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
2072 }
2073 *p = '\0'; /* remove "\n" */
2074 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
2075 sdsupdatelen(query);
2076
2077 /* Now we can split the query in arguments */
ed9b544e 2078 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
93ea3759 2079 sdsfree(query);
2080
2081 if (c->argv) zfree(c->argv);
2082 c->argv = zmalloc(sizeof(robj*)*argc);
93ea3759 2083
2084 for (j = 0; j < argc; j++) {
ed9b544e 2085 if (sdslen(argv[j])) {
2086 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
2087 c->argc++;
2088 } else {
2089 sdsfree(argv[j]);
2090 }
2091 }
2092 zfree(argv);
7c49733c 2093 if (c->argc) {
2094 /* Execute the command. If the client is still valid
2095 * after processCommand() return and there is something
2096 * on the query buffer try to process the next command. */
2097 if (processCommand(c) && sdslen(c->querybuf)) goto again;
2098 } else {
2099 /* Nothing to process, argc == 0. Just process the query
2100 * buffer if it's not empty or return to the caller */
2101 if (sdslen(c->querybuf)) goto again;
2102 }
ed9b544e 2103 return;
644fafa3 2104 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
f870935d 2105 redisLog(REDIS_VERBOSE, "Client protocol error");
ed9b544e 2106 freeClient(c);
2107 return;
2108 }
2109 } else {
2110 /* Bulk read handling. Note that if we are at this point
2111 the client already sent a command terminated with a newline,
2112 we are reading the bulk data that is actually the last
2113 argument of the command. */
2114 int qbl = sdslen(c->querybuf);
2115
2116 if (c->bulklen <= qbl) {
2117 /* Copy everything but the final CRLF as final argument */
2118 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
2119 c->argc++;
2120 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
638e42ac 2121 /* Process the command. If the client is still valid after
2122 * the processing and there is more data in the buffer
2123 * try to parse it. */
2124 if (processCommand(c) && sdslen(c->querybuf)) goto again;
ed9b544e 2125 return;
2126 }
2127 }
2128}
2129
638e42ac 2130static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
2131 redisClient *c = (redisClient*) privdata;
2132 char buf[REDIS_IOBUF_LEN];
2133 int nread;
2134 REDIS_NOTUSED(el);
2135 REDIS_NOTUSED(mask);
2136
2137 nread = read(fd, buf, REDIS_IOBUF_LEN);
2138 if (nread == -1) {
2139 if (errno == EAGAIN) {
2140 nread = 0;
2141 } else {
f870935d 2142 redisLog(REDIS_VERBOSE, "Reading from client: %s",strerror(errno));
638e42ac 2143 freeClient(c);
2144 return;
2145 }
2146 } else if (nread == 0) {
f870935d 2147 redisLog(REDIS_VERBOSE, "Client closed connection");
638e42ac 2148 freeClient(c);
2149 return;
2150 }
2151 if (nread) {
2152 c->querybuf = sdscatlen(c->querybuf, buf, nread);
2153 c->lastinteraction = time(NULL);
2154 } else {
2155 return;
2156 }
2157 processInputBuffer(c);
2158}
2159
ed9b544e 2160static int selectDb(redisClient *c, int id) {
2161 if (id < 0 || id >= server.dbnum)
2162 return REDIS_ERR;
3305306f 2163 c->db = &server.db[id];
ed9b544e 2164 return REDIS_OK;
2165}
2166
40d224a9 2167static void *dupClientReplyValue(void *o) {
2168 incrRefCount((robj*)o);
2169 return 0;
2170}
2171
ed9b544e 2172static redisClient *createClient(int fd) {
2173 redisClient *c = zmalloc(sizeof(*c));
2174
2175 anetNonBlock(NULL,fd);
2176 anetTcpNoDelay(NULL,fd);
2177 if (!c) return NULL;
2178 selectDb(c,0);
2179 c->fd = fd;
2180 c->querybuf = sdsempty();
2181 c->argc = 0;
93ea3759 2182 c->argv = NULL;
ed9b544e 2183 c->bulklen = -1;
e8a74421 2184 c->multibulk = 0;
2185 c->mbargc = 0;
2186 c->mbargv = NULL;
ed9b544e 2187 c->sentlen = 0;
2188 c->flags = 0;
2189 c->lastinteraction = time(NULL);
abcb223e 2190 c->authenticated = 0;
40d224a9 2191 c->replstate = REDIS_REPL_NONE;
6b47e12e 2192 c->reply = listCreate();
b177fd30 2193 c->blockingkeys = NULL;
2194 c->blockingkeysnum = 0;
ed9b544e 2195 listSetFreeMethod(c->reply,decrRefCount);
40d224a9 2196 listSetDupMethod(c->reply,dupClientReplyValue);
ed9b544e 2197 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
266373b2 2198 readQueryFromClient, c) == AE_ERR) {
ed9b544e 2199 freeClient(c);
2200 return NULL;
2201 }
6b47e12e 2202 listAddNodeTail(server.clients,c);
6e469882 2203 initClientMultiState(c);
ed9b544e 2204 return c;
2205}
2206
2207static void addReply(redisClient *c, robj *obj) {
2208 if (listLength(c->reply) == 0 &&
6208b3a7 2209 (c->replstate == REDIS_REPL_NONE ||
2210 c->replstate == REDIS_REPL_ONLINE) &&
ed9b544e 2211 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
266373b2 2212 sendReplyToClient, c) == AE_ERR) return;
e3cadb8a 2213
2214 if (server.vm_enabled && obj->storage != REDIS_VM_MEMORY) {
2215 obj = dupStringObject(obj);
2216 obj->refcount = 0; /* getDecodedObject() will increment the refcount */
2217 }
9d65a1bb 2218 listAddNodeTail(c->reply,getDecodedObject(obj));
ed9b544e 2219}
2220
2221static void addReplySds(redisClient *c, sds s) {
2222 robj *o = createObject(REDIS_STRING,s);
2223 addReply(c,o);
2224 decrRefCount(o);
2225}
2226
e2665397 2227static void addReplyDouble(redisClient *c, double d) {
2228 char buf[128];
2229
2230 snprintf(buf,sizeof(buf),"%.17g",d);
682ac724 2231 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n%s\r\n",
83c6a618 2232 (unsigned long) strlen(buf),buf));
e2665397 2233}
2234
942a3961 2235static void addReplyBulkLen(redisClient *c, robj *obj) {
2236 size_t len;
2237
2238 if (obj->encoding == REDIS_ENCODING_RAW) {
2239 len = sdslen(obj->ptr);
2240 } else {
2241 long n = (long)obj->ptr;
2242
e054afda 2243 /* Compute how many bytes will take this integer as a radix 10 string */
942a3961 2244 len = 1;
2245 if (n < 0) {
2246 len++;
2247 n = -n;
2248 }
2249 while((n = n/10) != 0) {
2250 len++;
2251 }
2252 }
83c6a618 2253 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",(unsigned long)len));
942a3961 2254}
2255
ed9b544e 2256static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
2257 int cport, cfd;
2258 char cip[128];
285add55 2259 redisClient *c;
ed9b544e 2260 REDIS_NOTUSED(el);
2261 REDIS_NOTUSED(mask);
2262 REDIS_NOTUSED(privdata);
2263
2264 cfd = anetAccept(server.neterr, fd, cip, &cport);
2265 if (cfd == AE_ERR) {
f870935d 2266 redisLog(REDIS_VERBOSE,"Accepting client connection: %s", server.neterr);
ed9b544e 2267 return;
2268 }
f870935d 2269 redisLog(REDIS_VERBOSE,"Accepted %s:%d", cip, cport);
285add55 2270 if ((c = createClient(cfd)) == NULL) {
ed9b544e 2271 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
2272 close(cfd); /* May be already closed, just ingore errors */
2273 return;
2274 }
285add55 2275 /* If maxclient directive is set and this is one client more... close the
2276 * connection. Note that we create the client instead to check before
2277 * for this condition, since now the socket is already set in nonblocking
2278 * mode and we can send an error for free using the Kernel I/O */
2279 if (server.maxclients && listLength(server.clients) > server.maxclients) {
2280 char *err = "-ERR max number of clients reached\r\n";
2281
2282 /* That's a best effort error message, don't check write errors */
fee803ba 2283 if (write(c->fd,err,strlen(err)) == -1) {
2284 /* Nothing to do, Just to avoid the warning... */
2285 }
285add55 2286 freeClient(c);
2287 return;
2288 }
ed9b544e 2289 server.stat_numconnections++;
2290}
2291
2292/* ======================= Redis objects implementation ===================== */
2293
2294static robj *createObject(int type, void *ptr) {
2295 robj *o;
2296
2297 if (listLength(server.objfreelist)) {
2298 listNode *head = listFirst(server.objfreelist);
2299 o = listNodeValue(head);
2300 listDelNode(server.objfreelist,head);
2301 } else {
75680a3c 2302 if (server.vm_enabled) {
2303 o = zmalloc(sizeof(*o));
2304 } else {
2305 o = zmalloc(sizeof(*o)-sizeof(struct redisObjectVM));
2306 }
ed9b544e 2307 }
ed9b544e 2308 o->type = type;
942a3961 2309 o->encoding = REDIS_ENCODING_RAW;
ed9b544e 2310 o->ptr = ptr;
2311 o->refcount = 1;
3a66edc7 2312 if (server.vm_enabled) {
2313 o->vm.atime = server.unixtime;
2314 o->storage = REDIS_VM_MEMORY;
2315 }
ed9b544e 2316 return o;
2317}
2318
2319static robj *createStringObject(char *ptr, size_t len) {
2320 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
2321}
2322
4ef8de8a 2323static robj *dupStringObject(robj *o) {
2324 return createStringObject(o->ptr,sdslen(o->ptr));
2325}
2326
ed9b544e 2327static robj *createListObject(void) {
2328 list *l = listCreate();
2329
ed9b544e 2330 listSetFreeMethod(l,decrRefCount);
2331 return createObject(REDIS_LIST,l);
2332}
2333
2334static robj *createSetObject(void) {
2335 dict *d = dictCreate(&setDictType,NULL);
ed9b544e 2336 return createObject(REDIS_SET,d);
2337}
2338
1812e024 2339static robj *createZsetObject(void) {
6b47e12e 2340 zset *zs = zmalloc(sizeof(*zs));
2341
2342 zs->dict = dictCreate(&zsetDictType,NULL);
2343 zs->zsl = zslCreate();
2344 return createObject(REDIS_ZSET,zs);
1812e024 2345}
2346
ed9b544e 2347static void freeStringObject(robj *o) {
942a3961 2348 if (o->encoding == REDIS_ENCODING_RAW) {
2349 sdsfree(o->ptr);
2350 }
ed9b544e 2351}
2352
2353static void freeListObject(robj *o) {
2354 listRelease((list*) o->ptr);
2355}
2356
2357static void freeSetObject(robj *o) {
2358 dictRelease((dict*) o->ptr);
2359}
2360
fd8ccf44 2361static void freeZsetObject(robj *o) {
2362 zset *zs = o->ptr;
2363
2364 dictRelease(zs->dict);
2365 zslFree(zs->zsl);
2366 zfree(zs);
2367}
2368
ed9b544e 2369static void freeHashObject(robj *o) {
2370 dictRelease((dict*) o->ptr);
2371}
2372
2373static void incrRefCount(robj *o) {
f2b8ab34 2374 redisAssert(!server.vm_enabled || o->storage == REDIS_VM_MEMORY);
ed9b544e 2375 o->refcount++;
2376}
2377
2378static void decrRefCount(void *obj) {
2379 robj *o = obj;
94754ccc 2380
a35ddf12 2381 /* REDIS_VM_SWAPPED */
2382 if (server.vm_enabled && o->storage == REDIS_VM_SWAPPED) {
f2b8ab34 2383 redisAssert(o->refcount == 1);
2384 redisAssert(o->type == REDIS_STRING);
a35ddf12 2385 freeStringObject(o);
2386 vmMarkPagesFree(o->vm.page,o->vm.usedpages);
2387 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2388 !listAddNodeHead(server.objfreelist,o))
2389 zfree(o);
7d98e08c 2390 server.vm_stats_swapped_objects--;
a35ddf12 2391 return;
2392 }
2393 /* REDIS_VM_MEMORY */
ed9b544e 2394 if (--(o->refcount) == 0) {
2395 switch(o->type) {
2396 case REDIS_STRING: freeStringObject(o); break;
2397 case REDIS_LIST: freeListObject(o); break;
2398 case REDIS_SET: freeSetObject(o); break;
fd8ccf44 2399 case REDIS_ZSET: freeZsetObject(o); break;
ed9b544e 2400 case REDIS_HASH: freeHashObject(o); break;
dfc5e96c 2401 default: redisAssert(0 != 0); break;
ed9b544e 2402 }
2403 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
2404 !listAddNodeHead(server.objfreelist,o))
2405 zfree(o);
2406 }
2407}
2408
942a3961 2409static robj *lookupKey(redisDb *db, robj *key) {
2410 dictEntry *de = dictFind(db->dict,key);
3a66edc7 2411 if (de) {
55cf8433 2412 robj *key = dictGetEntryKey(de);
2413 robj *val = dictGetEntryVal(de);
3a66edc7 2414
55cf8433 2415 if (server.vm_enabled) {
2416 if (key->storage == REDIS_VM_MEMORY) {
2417 /* Update the access time of the key for the aging algorithm. */
2418 key->vm.atime = server.unixtime;
2419 } else {
2420 /* Our value was swapped on disk. Bring it at home. */
f2b8ab34 2421 redisAssert(val == NULL);
55cf8433 2422 val = vmLoadObject(key);
2423 dictGetEntryVal(de) = val;
2424 }
2425 }
2426 return val;
3a66edc7 2427 } else {
2428 return NULL;
2429 }
942a3961 2430}
2431
2432static robj *lookupKeyRead(redisDb *db, robj *key) {
2433 expireIfNeeded(db,key);
2434 return lookupKey(db,key);
2435}
2436
2437static robj *lookupKeyWrite(redisDb *db, robj *key) {
2438 deleteIfVolatile(db,key);
2439 return lookupKey(db,key);
2440}
2441
2442static int deleteKey(redisDb *db, robj *key) {
2443 int retval;
2444
2445 /* We need to protect key from destruction: after the first dictDelete()
2446 * it may happen that 'key' is no longer valid if we don't increment
2447 * it's count. This may happen when we get the object reference directly
2448 * from the hash table with dictRandomKey() or dict iterators */
2449 incrRefCount(key);
2450 if (dictSize(db->expires)) dictDelete(db->expires,key);
2451 retval = dictDelete(db->dict,key);
2452 decrRefCount(key);
2453
2454 return retval == DICT_OK;
2455}
2456
10c43610 2457/* Try to share an object against the shared objects pool */
2458static robj *tryObjectSharing(robj *o) {
2459 struct dictEntry *de;
2460 unsigned long c;
2461
3305306f 2462 if (o == NULL || server.shareobjects == 0) return o;
10c43610 2463
dfc5e96c 2464 redisAssert(o->type == REDIS_STRING);
10c43610 2465 de = dictFind(server.sharingpool,o);
2466 if (de) {
2467 robj *shared = dictGetEntryKey(de);
2468
2469 c = ((unsigned long) dictGetEntryVal(de))+1;
2470 dictGetEntryVal(de) = (void*) c;
2471 incrRefCount(shared);
2472 decrRefCount(o);
2473 return shared;
2474 } else {
2475 /* Here we are using a stream algorihtm: Every time an object is
2476 * shared we increment its count, everytime there is a miss we
2477 * recrement the counter of a random object. If this object reaches
2478 * zero we remove the object and put the current object instead. */
3305306f 2479 if (dictSize(server.sharingpool) >=
10c43610 2480 server.sharingpoolsize) {
2481 de = dictGetRandomKey(server.sharingpool);
dfc5e96c 2482 redisAssert(de != NULL);
10c43610 2483 c = ((unsigned long) dictGetEntryVal(de))-1;
2484 dictGetEntryVal(de) = (void*) c;
2485 if (c == 0) {
2486 dictDelete(server.sharingpool,de->key);
2487 }
2488 } else {
2489 c = 0; /* If the pool is empty we want to add this object */
2490 }
2491 if (c == 0) {
2492 int retval;
2493
2494 retval = dictAdd(server.sharingpool,o,(void*)1);
dfc5e96c 2495 redisAssert(retval == DICT_OK);
10c43610 2496 incrRefCount(o);
2497 }
2498 return o;
2499 }
2500}
2501
724a51b1 2502/* Check if the nul-terminated string 's' can be represented by a long
2503 * (that is, is a number that fits into long without any other space or
2504 * character before or after the digits).
2505 *
2506 * If so, the function returns REDIS_OK and *longval is set to the value
2507 * of the number. Otherwise REDIS_ERR is returned */
f69f2cba 2508static int isStringRepresentableAsLong(sds s, long *longval) {
724a51b1 2509 char buf[32], *endptr;
2510 long value;
2511 int slen;
2512
2513 value = strtol(s, &endptr, 10);
2514 if (endptr[0] != '\0') return REDIS_ERR;
2515 slen = snprintf(buf,32,"%ld",value);
2516
2517 /* If the number converted back into a string is not identical
2518 * then it's not possible to encode the string as integer */
f69f2cba 2519 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
724a51b1 2520 if (longval) *longval = value;
2521 return REDIS_OK;
2522}
2523
942a3961 2524/* Try to encode a string object in order to save space */
2525static int tryObjectEncoding(robj *o) {
2526 long value;
942a3961 2527 sds s = o->ptr;
3305306f 2528
942a3961 2529 if (o->encoding != REDIS_ENCODING_RAW)
2530 return REDIS_ERR; /* Already encoded */
3305306f 2531
942a3961 2532 /* It's not save to encode shared objects: shared objects can be shared
2533 * everywhere in the "object space" of Redis. Encoded objects can only
2534 * appear as "values" (and not, for instance, as keys) */
2535 if (o->refcount > 1) return REDIS_ERR;
3305306f 2536
942a3961 2537 /* Currently we try to encode only strings */
dfc5e96c 2538 redisAssert(o->type == REDIS_STRING);
94754ccc 2539
724a51b1 2540 /* Check if we can represent this string as a long integer */
2541 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
942a3961 2542
2543 /* Ok, this object can be encoded */
2544 o->encoding = REDIS_ENCODING_INT;
2545 sdsfree(o->ptr);
2546 o->ptr = (void*) value;
2547 return REDIS_OK;
2548}
2549
9d65a1bb 2550/* Get a decoded version of an encoded object (returned as a new object).
2551 * If the object is already raw-encoded just increment the ref count. */
2552static robj *getDecodedObject(robj *o) {
942a3961 2553 robj *dec;
2554
9d65a1bb 2555 if (o->encoding == REDIS_ENCODING_RAW) {
2556 incrRefCount(o);
2557 return o;
2558 }
942a3961 2559 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2560 char buf[32];
2561
2562 snprintf(buf,32,"%ld",(long)o->ptr);
2563 dec = createStringObject(buf,strlen(buf));
2564 return dec;
2565 } else {
dfc5e96c 2566 redisAssert(1 != 1);
942a3961 2567 }
3305306f 2568}
2569
d7f43c08 2570/* Compare two string objects via strcmp() or alike.
2571 * Note that the objects may be integer-encoded. In such a case we
2572 * use snprintf() to get a string representation of the numbers on the stack
1fd9bc8a 2573 * and compare the strings, it's much faster than calling getDecodedObject().
2574 *
2575 * Important note: if objects are not integer encoded, but binary-safe strings,
2576 * sdscmp() from sds.c will apply memcmp() so this function ca be considered
2577 * binary safe. */
724a51b1 2578static int compareStringObjects(robj *a, robj *b) {
dfc5e96c 2579 redisAssert(a->type == REDIS_STRING && b->type == REDIS_STRING);
d7f43c08 2580 char bufa[128], bufb[128], *astr, *bstr;
2581 int bothsds = 1;
724a51b1 2582
e197b441 2583 if (a == b) return 0;
d7f43c08 2584 if (a->encoding != REDIS_ENCODING_RAW) {
2585 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2586 astr = bufa;
2587 bothsds = 0;
724a51b1 2588 } else {
d7f43c08 2589 astr = a->ptr;
724a51b1 2590 }
d7f43c08 2591 if (b->encoding != REDIS_ENCODING_RAW) {
2592 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2593 bstr = bufb;
2594 bothsds = 0;
2595 } else {
2596 bstr = b->ptr;
2597 }
2598 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
724a51b1 2599}
2600
0ea663ea 2601static size_t stringObjectLen(robj *o) {
dfc5e96c 2602 redisAssert(o->type == REDIS_STRING);
0ea663ea 2603 if (o->encoding == REDIS_ENCODING_RAW) {
2604 return sdslen(o->ptr);
2605 } else {
2606 char buf[32];
2607
2608 return snprintf(buf,32,"%ld",(long)o->ptr);
2609 }
2610}
2611
06233c45 2612/*============================ RDB saving/loading =========================== */
ed9b544e 2613
f78fd11b 2614static int rdbSaveType(FILE *fp, unsigned char type) {
2615 if (fwrite(&type,1,1,fp) == 0) return -1;
2616 return 0;
2617}
2618
bb32ede5 2619static int rdbSaveTime(FILE *fp, time_t t) {
2620 int32_t t32 = (int32_t) t;
2621 if (fwrite(&t32,4,1,fp) == 0) return -1;
2622 return 0;
2623}
2624
e3566d4b 2625/* check rdbLoadLen() comments for more info */
f78fd11b 2626static int rdbSaveLen(FILE *fp, uint32_t len) {
2627 unsigned char buf[2];
2628
2629 if (len < (1<<6)) {
2630 /* Save a 6 bit len */
10c43610 2631 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
f78fd11b 2632 if (fwrite(buf,1,1,fp) == 0) return -1;
2633 } else if (len < (1<<14)) {
2634 /* Save a 14 bit len */
10c43610 2635 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
f78fd11b 2636 buf[1] = len&0xFF;
17be1a4a 2637 if (fwrite(buf,2,1,fp) == 0) return -1;
f78fd11b 2638 } else {
2639 /* Save a 32 bit len */
10c43610 2640 buf[0] = (REDIS_RDB_32BITLEN<<6);
f78fd11b 2641 if (fwrite(buf,1,1,fp) == 0) return -1;
2642 len = htonl(len);
2643 if (fwrite(&len,4,1,fp) == 0) return -1;
2644 }
2645 return 0;
2646}
2647
e3566d4b 2648/* String objects in the form "2391" "-100" without any space and with a
2649 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2650 * encoded as integers to save space */
56906eef 2651static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
e3566d4b 2652 long long value;
2653 char *endptr, buf[32];
2654
2655 /* Check if it's possible to encode this value as a number */
2656 value = strtoll(s, &endptr, 10);
2657 if (endptr[0] != '\0') return 0;
2658 snprintf(buf,32,"%lld",value);
2659
2660 /* If the number converted back into a string is not identical
2661 * then it's not possible to encode the string as integer */
2662 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2663
2664 /* Finally check if it fits in our ranges */
2665 if (value >= -(1<<7) && value <= (1<<7)-1) {
2666 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2667 enc[1] = value&0xFF;
2668 return 2;
2669 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2670 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2671 enc[1] = value&0xFF;
2672 enc[2] = (value>>8)&0xFF;
2673 return 3;
2674 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2675 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2676 enc[1] = value&0xFF;
2677 enc[2] = (value>>8)&0xFF;
2678 enc[3] = (value>>16)&0xFF;
2679 enc[4] = (value>>24)&0xFF;
2680 return 5;
2681 } else {
2682 return 0;
2683 }
2684}
2685
774e3047 2686static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2687 unsigned int comprlen, outlen;
2688 unsigned char byte;
2689 void *out;
2690
2691 /* We require at least four bytes compression for this to be worth it */
2692 outlen = sdslen(obj->ptr)-4;
2693 if (outlen <= 0) return 0;
3a2694c4 2694 if ((out = zmalloc(outlen+1)) == NULL) return 0;
774e3047 2695 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2696 if (comprlen == 0) {
88e85998 2697 zfree(out);
774e3047 2698 return 0;
2699 }
2700 /* Data compressed! Let's save it on disk */
2701 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2702 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2703 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2704 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2705 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
88e85998 2706 zfree(out);
774e3047 2707 return comprlen;
2708
2709writeerr:
88e85998 2710 zfree(out);
774e3047 2711 return -1;
2712}
2713
e3566d4b 2714/* Save a string objet as [len][data] on disk. If the object is a string
2715 * representation of an integer value we try to safe it in a special form */
942a3961 2716static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2717 size_t len;
e3566d4b 2718 int enclen;
10c43610 2719
942a3961 2720 len = sdslen(obj->ptr);
2721
774e3047 2722 /* Try integer encoding */
e3566d4b 2723 if (len <= 11) {
2724 unsigned char buf[5];
2725 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2726 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2727 return 0;
2728 }
2729 }
774e3047 2730
2731 /* Try LZF compression - under 20 bytes it's unable to compress even
88e85998 2732 * aaaaaaaaaaaaaaaaaa so skip it */
121f70cf 2733 if (server.rdbcompression && len > 20) {
774e3047 2734 int retval;
2735
2736 retval = rdbSaveLzfStringObject(fp,obj);
2737 if (retval == -1) return -1;
2738 if (retval > 0) return 0;
2739 /* retval == 0 means data can't be compressed, save the old way */
2740 }
2741
2742 /* Store verbatim */
10c43610 2743 if (rdbSaveLen(fp,len) == -1) return -1;
2744 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2745 return 0;
2746}
2747
942a3961 2748/* Like rdbSaveStringObjectRaw() but handle encoded objects */
2749static int rdbSaveStringObject(FILE *fp, robj *obj) {
2750 int retval;
942a3961 2751
9d65a1bb 2752 obj = getDecodedObject(obj);
2753 retval = rdbSaveStringObjectRaw(fp,obj);
2754 decrRefCount(obj);
2755 return retval;
942a3961 2756}
2757
a7866db6 2758/* Save a double value. Doubles are saved as strings prefixed by an unsigned
2759 * 8 bit integer specifing the length of the representation.
2760 * This 8 bit integer has special values in order to specify the following
2761 * conditions:
2762 * 253: not a number
2763 * 254: + inf
2764 * 255: - inf
2765 */
2766static int rdbSaveDoubleValue(FILE *fp, double val) {
2767 unsigned char buf[128];
2768 int len;
2769
2770 if (isnan(val)) {
2771 buf[0] = 253;
2772 len = 1;
2773 } else if (!isfinite(val)) {
2774 len = 1;
2775 buf[0] = (val < 0) ? 255 : 254;
2776 } else {
eaa256ad 2777 snprintf((char*)buf+1,sizeof(buf)-1,"%.17g",val);
6c446631 2778 buf[0] = strlen((char*)buf+1);
a7866db6 2779 len = buf[0]+1;
2780 }
2781 if (fwrite(buf,len,1,fp) == 0) return -1;
2782 return 0;
2783}
2784
06233c45 2785/* Save a Redis object. */
2786static int rdbSaveObject(FILE *fp, robj *o) {
2787 if (o->type == REDIS_STRING) {
2788 /* Save a string value */
2789 if (rdbSaveStringObject(fp,o) == -1) return -1;
2790 } else if (o->type == REDIS_LIST) {
2791 /* Save a list value */
2792 list *list = o->ptr;
2793 listNode *ln;
2794
2795 listRewind(list);
2796 if (rdbSaveLen(fp,listLength(list)) == -1) return -1;
2797 while((ln = listYield(list))) {
2798 robj *eleobj = listNodeValue(ln);
2799
2800 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
2801 }
2802 } else if (o->type == REDIS_SET) {
2803 /* Save a set value */
2804 dict *set = o->ptr;
2805 dictIterator *di = dictGetIterator(set);
2806 dictEntry *de;
2807
2808 if (rdbSaveLen(fp,dictSize(set)) == -1) return -1;
2809 while((de = dictNext(di)) != NULL) {
2810 robj *eleobj = dictGetEntryKey(de);
2811
2812 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
2813 }
2814 dictReleaseIterator(di);
2815 } else if (o->type == REDIS_ZSET) {
2816 /* Save a set value */
2817 zset *zs = o->ptr;
2818 dictIterator *di = dictGetIterator(zs->dict);
2819 dictEntry *de;
2820
2821 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) return -1;
2822 while((de = dictNext(di)) != NULL) {
2823 robj *eleobj = dictGetEntryKey(de);
2824 double *score = dictGetEntryVal(de);
2825
2826 if (rdbSaveStringObject(fp,eleobj) == -1) return -1;
2827 if (rdbSaveDoubleValue(fp,*score) == -1) return -1;
2828 }
2829 dictReleaseIterator(di);
2830 } else {
2831 redisAssert(0 != 0);
2832 }
2833 return 0;
2834}
2835
2836/* Return the length the object will have on disk if saved with
2837 * the rdbSaveObject() function. Currently we use a trick to get
2838 * this length with very little changes to the code. In the future
2839 * we could switch to a faster solution. */
2840static off_t rdbSavedObjectLen(robj *o) {
2841 static FILE *fp = NULL;
2842
2843 if (fp == NULL) fp = fopen("/dev/null","w");
2844 assert(fp != NULL);
2845
2846 rewind(fp);
2847 assert(rdbSaveObject(fp,o) != 1);
2848 return ftello(fp);
2849}
2850
06224fec 2851/* Return the number of pages required to save this object in the swap file */
2852static off_t rdbSavedObjectPages(robj *o) {
2853 off_t bytes = rdbSavedObjectLen(o);
2854
2855 return (bytes+(server.vm_page_size-1))/server.vm_page_size;
2856}
2857
ed9b544e 2858/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
f78fd11b 2859static int rdbSave(char *filename) {
ed9b544e 2860 dictIterator *di = NULL;
2861 dictEntry *de;
ed9b544e 2862 FILE *fp;
2863 char tmpfile[256];
2864 int j;
bb32ede5 2865 time_t now = time(NULL);
ed9b544e 2866
a3b21203 2867 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
ed9b544e 2868 fp = fopen(tmpfile,"w");
2869 if (!fp) {
2870 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2871 return REDIS_ERR;
2872 }
f78fd11b 2873 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
ed9b544e 2874 for (j = 0; j < server.dbnum; j++) {
bb32ede5 2875 redisDb *db = server.db+j;
2876 dict *d = db->dict;
3305306f 2877 if (dictSize(d) == 0) continue;
ed9b544e 2878 di = dictGetIterator(d);
2879 if (!di) {
2880 fclose(fp);
2881 return REDIS_ERR;
2882 }
2883
2884 /* Write the SELECT DB opcode */
f78fd11b 2885 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2886 if (rdbSaveLen(fp,j) == -1) goto werr;
ed9b544e 2887
2888 /* Iterate this DB writing every entry */
2889 while((de = dictNext(di)) != NULL) {
2890 robj *key = dictGetEntryKey(de);
2891 robj *o = dictGetEntryVal(de);
bb32ede5 2892 time_t expiretime = getExpire(db,key);
2893
2894 /* Save the expire time */
2895 if (expiretime != -1) {
2896 /* If this key is already expired skip it */
2897 if (expiretime < now) continue;
2898 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2899 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2900 }
7e69548d 2901 /* Save the key and associated value. This requires special
2902 * handling if the value is swapped out. */
38823f08 2903 if (!server.vm_enabled || key->storage == REDIS_VM_MEMORY) {
7e69548d 2904 /* Save type, key, value */
2905 if (rdbSaveType(fp,o->type) == -1) goto werr;
2906 if (rdbSaveStringObject(fp,key) == -1) goto werr;
2907 if (rdbSaveObject(fp,o) == -1) goto werr;
2908 } else {
2909 robj *po, *newkey;
2910 /* Get a preview of the object in memory */
2911 po = vmPreviewObject(key);
2912 /* Also duplicate the key object, to pass around a standard
2913 * string object. */
2914 newkey = dupStringObject(key);
2915 /* Save type, key, value */
2916 if (rdbSaveType(fp,key->vtype) == -1) goto werr;
2917 if (rdbSaveStringObject(fp,newkey) == -1) goto werr;
2918 if (rdbSaveObject(fp,po) == -1) goto werr;
2919 /* Remove the loaded object from memory */
2920 decrRefCount(po);
2921 decrRefCount(newkey);
2922 }
ed9b544e 2923 }
2924 dictReleaseIterator(di);
2925 }
2926 /* EOF opcode */
f78fd11b 2927 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2928
2929 /* Make sure data will not remain on the OS's output buffers */
ed9b544e 2930 fflush(fp);
2931 fsync(fileno(fp));
2932 fclose(fp);
2933
2934 /* Use RENAME to make sure the DB file is changed atomically only
2935 * if the generate DB file is ok. */
2936 if (rename(tmpfile,filename) == -1) {
325d1eb4 2937 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
ed9b544e 2938 unlink(tmpfile);
2939 return REDIS_ERR;
2940 }
2941 redisLog(REDIS_NOTICE,"DB saved on disk");
2942 server.dirty = 0;
2943 server.lastsave = time(NULL);
2944 return REDIS_OK;
2945
2946werr:
2947 fclose(fp);
2948 unlink(tmpfile);
2949 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2950 if (di) dictReleaseIterator(di);
2951 return REDIS_ERR;
2952}
2953
f78fd11b 2954static int rdbSaveBackground(char *filename) {
ed9b544e 2955 pid_t childpid;
2956
9d65a1bb 2957 if (server.bgsavechildpid != -1) return REDIS_ERR;
ed9b544e 2958 if ((childpid = fork()) == 0) {
2959 /* Child */
2960 close(server.fd);
f78fd11b 2961 if (rdbSave(filename) == REDIS_OK) {
ed9b544e 2962 exit(0);
2963 } else {
2964 exit(1);
2965 }
2966 } else {
2967 /* Parent */
5a7c647e 2968 if (childpid == -1) {
2969 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2970 strerror(errno));
2971 return REDIS_ERR;
2972 }
ed9b544e 2973 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
9f3c422c 2974 server.bgsavechildpid = childpid;
ed9b544e 2975 return REDIS_OK;
2976 }
2977 return REDIS_OK; /* unreached */
2978}
2979
a3b21203 2980static void rdbRemoveTempFile(pid_t childpid) {
2981 char tmpfile[256];
2982
2983 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2984 unlink(tmpfile);
2985}
2986
f78fd11b 2987static int rdbLoadType(FILE *fp) {
2988 unsigned char type;
7b45bfb2 2989 if (fread(&type,1,1,fp) == 0) return -1;
2990 return type;
2991}
2992
bb32ede5 2993static time_t rdbLoadTime(FILE *fp) {
2994 int32_t t32;
2995 if (fread(&t32,4,1,fp) == 0) return -1;
2996 return (time_t) t32;
2997}
2998
e3566d4b 2999/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
3000 * of this file for a description of how this are stored on disk.
3001 *
3002 * isencoded is set to 1 if the readed length is not actually a length but
3003 * an "encoding type", check the above comments for more info */
c78a8ccc 3004static uint32_t rdbLoadLen(FILE *fp, int *isencoded) {
f78fd11b 3005 unsigned char buf[2];
3006 uint32_t len;
c78a8ccc 3007 int type;
f78fd11b 3008
e3566d4b 3009 if (isencoded) *isencoded = 0;
c78a8ccc 3010 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
3011 type = (buf[0]&0xC0)>>6;
3012 if (type == REDIS_RDB_6BITLEN) {
3013 /* Read a 6 bit len */
3014 return buf[0]&0x3F;
3015 } else if (type == REDIS_RDB_ENCVAL) {
3016 /* Read a 6 bit len encoding type */
3017 if (isencoded) *isencoded = 1;
3018 return buf[0]&0x3F;
3019 } else if (type == REDIS_RDB_14BITLEN) {
3020 /* Read a 14 bit len */
3021 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
3022 return ((buf[0]&0x3F)<<8)|buf[1];
3023 } else {
3024 /* Read a 32 bit len */
f78fd11b 3025 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
3026 return ntohl(len);
f78fd11b 3027 }
f78fd11b 3028}
3029
e3566d4b 3030static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
3031 unsigned char enc[4];
3032 long long val;
3033
3034 if (enctype == REDIS_RDB_ENC_INT8) {
3035 if (fread(enc,1,1,fp) == 0) return NULL;
3036 val = (signed char)enc[0];
3037 } else if (enctype == REDIS_RDB_ENC_INT16) {
3038 uint16_t v;
3039 if (fread(enc,2,1,fp) == 0) return NULL;
3040 v = enc[0]|(enc[1]<<8);
3041 val = (int16_t)v;
3042 } else if (enctype == REDIS_RDB_ENC_INT32) {
3043 uint32_t v;
3044 if (fread(enc,4,1,fp) == 0) return NULL;
3045 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
3046 val = (int32_t)v;
3047 } else {
3048 val = 0; /* anti-warning */
dfc5e96c 3049 redisAssert(0!=0);
e3566d4b 3050 }
3051 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
3052}
3053
c78a8ccc 3054static robj *rdbLoadLzfStringObject(FILE*fp) {
88e85998 3055 unsigned int len, clen;
3056 unsigned char *c = NULL;
3057 sds val = NULL;
3058
c78a8ccc 3059 if ((clen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
3060 if ((len = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
88e85998 3061 if ((c = zmalloc(clen)) == NULL) goto err;
3062 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
3063 if (fread(c,clen,1,fp) == 0) goto err;
3064 if (lzf_decompress(c,clen,val,len) == 0) goto err;
5109cdff 3065 zfree(c);
88e85998 3066 return createObject(REDIS_STRING,val);
3067err:
3068 zfree(c);
3069 sdsfree(val);
3070 return NULL;
3071}
3072
c78a8ccc 3073static robj *rdbLoadStringObject(FILE*fp) {
e3566d4b 3074 int isencoded;
3075 uint32_t len;
f78fd11b 3076 sds val;
3077
c78a8ccc 3078 len = rdbLoadLen(fp,&isencoded);
e3566d4b 3079 if (isencoded) {
3080 switch(len) {
3081 case REDIS_RDB_ENC_INT8:
3082 case REDIS_RDB_ENC_INT16:
3083 case REDIS_RDB_ENC_INT32:
3305306f 3084 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
88e85998 3085 case REDIS_RDB_ENC_LZF:
c78a8ccc 3086 return tryObjectSharing(rdbLoadLzfStringObject(fp));
e3566d4b 3087 default:
dfc5e96c 3088 redisAssert(0!=0);
e3566d4b 3089 }
3090 }
3091
f78fd11b 3092 if (len == REDIS_RDB_LENERR) return NULL;
3093 val = sdsnewlen(NULL,len);
3094 if (len && fread(val,len,1,fp) == 0) {
3095 sdsfree(val);
3096 return NULL;
3097 }
10c43610 3098 return tryObjectSharing(createObject(REDIS_STRING,val));
f78fd11b 3099}
3100
a7866db6 3101/* For information about double serialization check rdbSaveDoubleValue() */
3102static int rdbLoadDoubleValue(FILE *fp, double *val) {
3103 char buf[128];
3104 unsigned char len;
3105
3106 if (fread(&len,1,1,fp) == 0) return -1;
3107 switch(len) {
3108 case 255: *val = R_NegInf; return 0;
3109 case 254: *val = R_PosInf; return 0;
3110 case 253: *val = R_Nan; return 0;
3111 default:
3112 if (fread(buf,len,1,fp) == 0) return -1;
231d758e 3113 buf[len] = '\0';
a7866db6 3114 sscanf(buf, "%lg", val);
3115 return 0;
3116 }
3117}
3118
c78a8ccc 3119/* Load a Redis object of the specified type from the specified file.
3120 * On success a newly allocated object is returned, otherwise NULL. */
3121static robj *rdbLoadObject(int type, FILE *fp) {
3122 robj *o;
3123
3124 if (type == REDIS_STRING) {
3125 /* Read string value */
3126 if ((o = rdbLoadStringObject(fp)) == NULL) return NULL;
3127 tryObjectEncoding(o);
3128 } else if (type == REDIS_LIST || type == REDIS_SET) {
3129 /* Read list/set value */
3130 uint32_t listlen;
3131
3132 if ((listlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
3133 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
3134 /* Load every single element of the list/set */
3135 while(listlen--) {
3136 robj *ele;
3137
3138 if ((ele = rdbLoadStringObject(fp)) == NULL) return NULL;
3139 tryObjectEncoding(ele);
3140 if (type == REDIS_LIST) {
3141 listAddNodeTail((list*)o->ptr,ele);
3142 } else {
3143 dictAdd((dict*)o->ptr,ele,NULL);
3144 }
3145 }
3146 } else if (type == REDIS_ZSET) {
3147 /* Read list/set value */
3148 uint32_t zsetlen;
3149 zset *zs;
3150
3151 if ((zsetlen = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR) return NULL;
3152 o = createZsetObject();
3153 zs = o->ptr;
3154 /* Load every single element of the list/set */
3155 while(zsetlen--) {
3156 robj *ele;
3157 double *score = zmalloc(sizeof(double));
3158
3159 if ((ele = rdbLoadStringObject(fp)) == NULL) return NULL;
3160 tryObjectEncoding(ele);
3161 if (rdbLoadDoubleValue(fp,score) == -1) return NULL;
3162 dictAdd(zs->dict,ele,score);
3163 zslInsert(zs->zsl,*score,ele);
3164 incrRefCount(ele); /* added to skiplist */
3165 }
3166 } else {
3167 redisAssert(0 != 0);
3168 }
3169 return o;
3170}
3171
f78fd11b 3172static int rdbLoad(char *filename) {
ed9b544e 3173 FILE *fp;
f78fd11b 3174 robj *keyobj = NULL;
3175 uint32_t dbid;
bb32ede5 3176 int type, retval, rdbver;
3305306f 3177 dict *d = server.db[0].dict;
bb32ede5 3178 redisDb *db = server.db+0;
f78fd11b 3179 char buf[1024];
bb32ede5 3180 time_t expiretime = -1, now = time(NULL);
b492cf00 3181 long long loadedkeys = 0;
bb32ede5 3182
ed9b544e 3183 fp = fopen(filename,"r");
3184 if (!fp) return REDIS_ERR;
3185 if (fread(buf,9,1,fp) == 0) goto eoferr;
f78fd11b 3186 buf[9] = '\0';
3187 if (memcmp(buf,"REDIS",5) != 0) {
ed9b544e 3188 fclose(fp);
3189 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
3190 return REDIS_ERR;
3191 }
f78fd11b 3192 rdbver = atoi(buf+5);
c78a8ccc 3193 if (rdbver != 1) {
f78fd11b 3194 fclose(fp);
3195 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
3196 return REDIS_ERR;
3197 }
ed9b544e 3198 while(1) {
3199 robj *o;
3200
3201 /* Read type. */
f78fd11b 3202 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
bb32ede5 3203 if (type == REDIS_EXPIRETIME) {
3204 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
3205 /* We read the time so we need to read the object type again */
3206 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
3207 }
ed9b544e 3208 if (type == REDIS_EOF) break;
3209 /* Handle SELECT DB opcode as a special case */
3210 if (type == REDIS_SELECTDB) {
c78a8ccc 3211 if ((dbid = rdbLoadLen(fp,NULL)) == REDIS_RDB_LENERR)
e3566d4b 3212 goto eoferr;
ed9b544e 3213 if (dbid >= (unsigned)server.dbnum) {
f78fd11b 3214 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
ed9b544e 3215 exit(1);
3216 }
bb32ede5 3217 db = server.db+dbid;
3218 d = db->dict;
ed9b544e 3219 continue;
3220 }
3221 /* Read key */
c78a8ccc 3222 if ((keyobj = rdbLoadStringObject(fp)) == NULL) goto eoferr;
3223 /* Read value */
3224 if ((o = rdbLoadObject(type,fp)) == NULL) goto eoferr;
ed9b544e 3225 /* Add the new object in the hash table */
f78fd11b 3226 retval = dictAdd(d,keyobj,o);
ed9b544e 3227 if (retval == DICT_ERR) {
f78fd11b 3228 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
ed9b544e 3229 exit(1);
3230 }
bb32ede5 3231 /* Set the expire time if needed */
3232 if (expiretime != -1) {
3233 setExpire(db,keyobj,expiretime);
3234 /* Delete this key if already expired */
3235 if (expiretime < now) deleteKey(db,keyobj);
3236 expiretime = -1;
3237 }
f78fd11b 3238 keyobj = o = NULL;
b492cf00 3239 /* Handle swapping while loading big datasets when VM is on */
3240 loadedkeys++;
3241 if (server.vm_enabled && (loadedkeys % 5000) == 0) {
3242 while (zmalloc_used_memory() > server.vm_max_memory) {
3243 if (vmSwapOneObject() == REDIS_ERR) break;
3244 }
3245 }
ed9b544e 3246 }
3247 fclose(fp);
3248 return REDIS_OK;
3249
3250eoferr: /* unexpected end of file is handled here with a fatal exit */
e3566d4b 3251 if (keyobj) decrRefCount(keyobj);
f80dff62 3252 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
ed9b544e 3253 exit(1);
3254 return REDIS_ERR; /* Just to avoid warning */
3255}
3256
3257/*================================== Commands =============================== */
3258
abcb223e 3259static void authCommand(redisClient *c) {
2e77c2ee 3260 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
abcb223e
BH
3261 c->authenticated = 1;
3262 addReply(c,shared.ok);
3263 } else {
3264 c->authenticated = 0;
fa4c0aba 3265 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
abcb223e
BH
3266 }
3267}
3268
ed9b544e 3269static void pingCommand(redisClient *c) {
3270 addReply(c,shared.pong);
3271}
3272
3273static void echoCommand(redisClient *c) {
942a3961 3274 addReplyBulkLen(c,c->argv[1]);
ed9b544e 3275 addReply(c,c->argv[1]);
3276 addReply(c,shared.crlf);
3277}
3278
3279/*=================================== Strings =============================== */
3280
3281static void setGenericCommand(redisClient *c, int nx) {
3282 int retval;
3283
333fd216 3284 if (nx) deleteIfVolatile(c->db,c->argv[1]);
3305306f 3285 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 3286 if (retval == DICT_ERR) {
3287 if (!nx) {
1b03836c 3288 /* If the key is about a swapped value, we want a new key object
3289 * to overwrite the old. So we delete the old key in the database.
3290 * This will also make sure that swap pages about the old object
3291 * will be marked as free. */
3292 if (deleteIfSwapped(c->db,c->argv[1]))
3293 incrRefCount(c->argv[1]);
3305306f 3294 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
ed9b544e 3295 incrRefCount(c->argv[2]);
3296 } else {
c937aa89 3297 addReply(c,shared.czero);
ed9b544e 3298 return;
3299 }
3300 } else {
3301 incrRefCount(c->argv[1]);
3302 incrRefCount(c->argv[2]);
3303 }
3304 server.dirty++;
3305306f 3305 removeExpire(c->db,c->argv[1]);
c937aa89 3306 addReply(c, nx ? shared.cone : shared.ok);
ed9b544e 3307}
3308
3309static void setCommand(redisClient *c) {
a4d1ba9a 3310 setGenericCommand(c,0);
ed9b544e 3311}
3312
3313static void setnxCommand(redisClient *c) {
a4d1ba9a 3314 setGenericCommand(c,1);
ed9b544e 3315}
3316
322fc7d8 3317static int getGenericCommand(redisClient *c) {
3305306f 3318 robj *o = lookupKeyRead(c->db,c->argv[1]);
3319
3320 if (o == NULL) {
c937aa89 3321 addReply(c,shared.nullbulk);
322fc7d8 3322 return REDIS_OK;
ed9b544e 3323 } else {
ed9b544e 3324 if (o->type != REDIS_STRING) {
c937aa89 3325 addReply(c,shared.wrongtypeerr);
322fc7d8 3326 return REDIS_ERR;
ed9b544e 3327 } else {
942a3961 3328 addReplyBulkLen(c,o);
ed9b544e 3329 addReply(c,o);
3330 addReply(c,shared.crlf);
322fc7d8 3331 return REDIS_OK;
ed9b544e 3332 }
3333 }
3334}
3335
322fc7d8 3336static void getCommand(redisClient *c) {
3337 getGenericCommand(c);
3338}
3339
f6b141c5 3340static void getsetCommand(redisClient *c) {
322fc7d8 3341 if (getGenericCommand(c) == REDIS_ERR) return;
a431eb74 3342 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
3343 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
3344 } else {
3345 incrRefCount(c->argv[1]);
3346 }
3347 incrRefCount(c->argv[2]);
3348 server.dirty++;
3349 removeExpire(c->db,c->argv[1]);
3350}
3351
70003d28 3352static void mgetCommand(redisClient *c) {
70003d28 3353 int j;
3354
c937aa89 3355 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
70003d28 3356 for (j = 1; j < c->argc; j++) {
3305306f 3357 robj *o = lookupKeyRead(c->db,c->argv[j]);
3358 if (o == NULL) {
c937aa89 3359 addReply(c,shared.nullbulk);
70003d28 3360 } else {
70003d28 3361 if (o->type != REDIS_STRING) {
c937aa89 3362 addReply(c,shared.nullbulk);
70003d28 3363 } else {
942a3961 3364 addReplyBulkLen(c,o);
70003d28 3365 addReply(c,o);
3366 addReply(c,shared.crlf);
3367 }
3368 }
3369 }
3370}
3371
6c446631 3372static void msetGenericCommand(redisClient *c, int nx) {
906573e7 3373 int j, busykeys = 0;
6c446631 3374
3375 if ((c->argc % 2) == 0) {
454d4e43 3376 addReplySds(c,sdsnew("-ERR wrong number of arguments for MSET\r\n"));
6c446631 3377 return;
3378 }
3379 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
3380 * set nothing at all if at least one already key exists. */
3381 if (nx) {
3382 for (j = 1; j < c->argc; j += 2) {
906573e7 3383 if (lookupKeyWrite(c->db,c->argv[j]) != NULL) {
3384 busykeys++;
6c446631 3385 }
3386 }
3387 }
906573e7 3388 if (busykeys) {
3389 addReply(c, shared.czero);
3390 return;
3391 }
6c446631 3392
3393 for (j = 1; j < c->argc; j += 2) {
3394 int retval;
3395
17511391 3396 tryObjectEncoding(c->argv[j+1]);
6c446631 3397 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
3398 if (retval == DICT_ERR) {
3399 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
3400 incrRefCount(c->argv[j+1]);
3401 } else {
3402 incrRefCount(c->argv[j]);
3403 incrRefCount(c->argv[j+1]);
3404 }
3405 removeExpire(c->db,c->argv[j]);
3406 }
3407 server.dirty += (c->argc-1)/2;
3408 addReply(c, nx ? shared.cone : shared.ok);
3409}
3410
3411static void msetCommand(redisClient *c) {
3412 msetGenericCommand(c,0);
3413}
3414
3415static void msetnxCommand(redisClient *c) {
3416 msetGenericCommand(c,1);
3417}
3418
d68ed120 3419static void incrDecrCommand(redisClient *c, long long incr) {
ed9b544e 3420 long long value;
3421 int retval;
3422 robj *o;
3423
3305306f 3424 o = lookupKeyWrite(c->db,c->argv[1]);
3425 if (o == NULL) {
ed9b544e 3426 value = 0;
3427 } else {
ed9b544e 3428 if (o->type != REDIS_STRING) {
3429 value = 0;
3430 } else {
3431 char *eptr;
3432
942a3961 3433 if (o->encoding == REDIS_ENCODING_RAW)
3434 value = strtoll(o->ptr, &eptr, 10);
3435 else if (o->encoding == REDIS_ENCODING_INT)
3436 value = (long)o->ptr;
3437 else
dfc5e96c 3438 redisAssert(1 != 1);
ed9b544e 3439 }
3440 }
3441
3442 value += incr;
3443 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
942a3961 3444 tryObjectEncoding(o);
3305306f 3445 retval = dictAdd(c->db->dict,c->argv[1],o);
ed9b544e 3446 if (retval == DICT_ERR) {
3305306f 3447 dictReplace(c->db->dict,c->argv[1],o);
3448 removeExpire(c->db,c->argv[1]);
ed9b544e 3449 } else {
3450 incrRefCount(c->argv[1]);
3451 }
3452 server.dirty++;
c937aa89 3453 addReply(c,shared.colon);
ed9b544e 3454 addReply(c,o);
3455 addReply(c,shared.crlf);
3456}
3457
3458static void incrCommand(redisClient *c) {
a4d1ba9a 3459 incrDecrCommand(c,1);
ed9b544e 3460}
3461
3462static void decrCommand(redisClient *c) {
a4d1ba9a 3463 incrDecrCommand(c,-1);
ed9b544e 3464}
3465
3466static void incrbyCommand(redisClient *c) {
d68ed120 3467 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3468 incrDecrCommand(c,incr);
ed9b544e 3469}
3470
3471static void decrbyCommand(redisClient *c) {
d68ed120 3472 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
a4d1ba9a 3473 incrDecrCommand(c,-incr);
ed9b544e 3474}
3475
3476/* ========================= Type agnostic commands ========================= */
3477
3478static void delCommand(redisClient *c) {
5109cdff 3479 int deleted = 0, j;
3480
3481 for (j = 1; j < c->argc; j++) {
3482 if (deleteKey(c->db,c->argv[j])) {
3483 server.dirty++;
3484 deleted++;
3485 }
3486 }
3487 switch(deleted) {
3488 case 0:
c937aa89 3489 addReply(c,shared.czero);
5109cdff 3490 break;
3491 case 1:
3492 addReply(c,shared.cone);
3493 break;
3494 default:
3495 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
3496 break;
ed9b544e 3497 }
3498}
3499
3500static void existsCommand(redisClient *c) {
3305306f 3501 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
ed9b544e 3502}
3503
3504static void selectCommand(redisClient *c) {
3505 int id = atoi(c->argv[1]->ptr);
3506
3507 if (selectDb(c,id) == REDIS_ERR) {
774e3047 3508 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
ed9b544e 3509 } else {
3510 addReply(c,shared.ok);
3511 }
3512}
3513
3514static void randomkeyCommand(redisClient *c) {
3515 dictEntry *de;
3305306f 3516
3517 while(1) {
3518 de = dictGetRandomKey(c->db->dict);
ce7bef07 3519 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
3305306f 3520 }
ed9b544e 3521 if (de == NULL) {
ce7bef07 3522 addReply(c,shared.plus);
ed9b544e 3523 addReply(c,shared.crlf);
3524 } else {
c937aa89 3525 addReply(c,shared.plus);
ed9b544e 3526 addReply(c,dictGetEntryKey(de));
3527 addReply(c,shared.crlf);
3528 }
3529}
3530
3531static void keysCommand(redisClient *c) {
3532 dictIterator *di;
3533 dictEntry *de;
3534 sds pattern = c->argv[1]->ptr;
3535 int plen = sdslen(pattern);
682ac724 3536 unsigned long numkeys = 0, keyslen = 0;
ed9b544e 3537 robj *lenobj = createObject(REDIS_STRING,NULL);
3538
3305306f 3539 di = dictGetIterator(c->db->dict);
ed9b544e 3540 addReply(c,lenobj);
3541 decrRefCount(lenobj);
3542 while((de = dictNext(di)) != NULL) {
3543 robj *keyobj = dictGetEntryKey(de);
3305306f 3544
ed9b544e 3545 sds key = keyobj->ptr;
3546 if ((pattern[0] == '*' && pattern[1] == '\0') ||
3547 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
3305306f 3548 if (expireIfNeeded(c->db,keyobj) == 0) {
3549 if (numkeys != 0)
3550 addReply(c,shared.space);
3551 addReply(c,keyobj);
3552 numkeys++;
3553 keyslen += sdslen(key);
3554 }
ed9b544e 3555 }
3556 }
3557 dictReleaseIterator(di);
c937aa89 3558 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
ed9b544e 3559 addReply(c,shared.crlf);
3560}
3561
3562static void dbsizeCommand(redisClient *c) {
3563 addReplySds(c,
3305306f 3564 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
ed9b544e 3565}
3566
3567static void lastsaveCommand(redisClient *c) {
3568 addReplySds(c,
c937aa89 3569 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
ed9b544e 3570}
3571
3572static void typeCommand(redisClient *c) {
3305306f 3573 robj *o;
ed9b544e 3574 char *type;
3305306f 3575
3576 o = lookupKeyRead(c->db,c->argv[1]);
3577 if (o == NULL) {
c937aa89 3578 type = "+none";
ed9b544e 3579 } else {
ed9b544e 3580 switch(o->type) {
c937aa89 3581 case REDIS_STRING: type = "+string"; break;
3582 case REDIS_LIST: type = "+list"; break;
3583 case REDIS_SET: type = "+set"; break;
412a8bce 3584 case REDIS_ZSET: type = "+zset"; break;
ed9b544e 3585 default: type = "unknown"; break;
3586 }
3587 }
3588 addReplySds(c,sdsnew(type));
3589 addReply(c,shared.crlf);
3590}
3591
3592static void saveCommand(redisClient *c) {
9d65a1bb 3593 if (server.bgsavechildpid != -1) {
05557f6d 3594 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3595 return;
3596 }
f78fd11b 3597 if (rdbSave(server.dbfilename) == REDIS_OK) {
ed9b544e 3598 addReply(c,shared.ok);
3599 } else {
3600 addReply(c,shared.err);
3601 }
3602}
3603
3604static void bgsaveCommand(redisClient *c) {
9d65a1bb 3605 if (server.bgsavechildpid != -1) {
ed9b544e 3606 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3607 return;
3608 }
f78fd11b 3609 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
49b99ab4 3610 char *status = "+Background saving started\r\n";
3611 addReplySds(c,sdsnew(status));
ed9b544e 3612 } else {
3613 addReply(c,shared.err);
3614 }
3615}
3616
3617static void shutdownCommand(redisClient *c) {
3618 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
a3b21203 3619 /* Kill the saving child if there is a background saving in progress.
3620 We want to avoid race conditions, for instance our saving child may
3621 overwrite the synchronous saving did by SHUTDOWN. */
9d65a1bb 3622 if (server.bgsavechildpid != -1) {
9f3c422c 3623 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3624 kill(server.bgsavechildpid,SIGKILL);
a3b21203 3625 rdbRemoveTempFile(server.bgsavechildpid);
9f3c422c 3626 }
ac945e2d 3627 if (server.appendonly) {
3628 /* Append only file: fsync() the AOF and exit */
3629 fsync(server.appendfd);
3630 exit(0);
ed9b544e 3631 } else {
ac945e2d 3632 /* Snapshotting. Perform a SYNC SAVE and exit */
3633 if (rdbSave(server.dbfilename) == REDIS_OK) {
3634 if (server.daemonize)
3635 unlink(server.pidfile);
3636 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
3637 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3638 exit(0);
3639 } else {
3640 /* Ooops.. error saving! The best we can do is to continue operating.
3641 * Note that if there was a background saving process, in the next
3642 * cron() Redis will be notified that the background saving aborted,
3643 * handling special stuff like slaves pending for synchronization... */
3644 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3645 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3646 }
ed9b544e 3647 }
3648}
3649
3650static void renameGenericCommand(redisClient *c, int nx) {
ed9b544e 3651 robj *o;
3652
3653 /* To use the same key as src and dst is probably an error */
3654 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
c937aa89 3655 addReply(c,shared.sameobjecterr);
ed9b544e 3656 return;
3657 }
3658
3305306f 3659 o = lookupKeyWrite(c->db,c->argv[1]);
3660 if (o == NULL) {
c937aa89 3661 addReply(c,shared.nokeyerr);
ed9b544e 3662 return;
3663 }
ed9b544e 3664 incrRefCount(o);
3305306f 3665 deleteIfVolatile(c->db,c->argv[2]);
3666 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
ed9b544e 3667 if (nx) {
3668 decrRefCount(o);
c937aa89 3669 addReply(c,shared.czero);
ed9b544e 3670 return;
3671 }
3305306f 3672 dictReplace(c->db->dict,c->argv[2],o);
ed9b544e 3673 } else {
3674 incrRefCount(c->argv[2]);
3675 }
3305306f 3676 deleteKey(c->db,c->argv[1]);
ed9b544e 3677 server.dirty++;
c937aa89 3678 addReply(c,nx ? shared.cone : shared.ok);
ed9b544e 3679}
3680
3681static void renameCommand(redisClient *c) {
3682 renameGenericCommand(c,0);
3683}
3684
3685static void renamenxCommand(redisClient *c) {
3686 renameGenericCommand(c,1);
3687}
3688
3689static void moveCommand(redisClient *c) {
3305306f 3690 robj *o;
3691 redisDb *src, *dst;
ed9b544e 3692 int srcid;
3693
3694 /* Obtain source and target DB pointers */
3305306f 3695 src = c->db;
3696 srcid = c->db->id;
ed9b544e 3697 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
c937aa89 3698 addReply(c,shared.outofrangeerr);
ed9b544e 3699 return;
3700 }
3305306f 3701 dst = c->db;
3702 selectDb(c,srcid); /* Back to the source DB */
ed9b544e 3703
3704 /* If the user is moving using as target the same
3705 * DB as the source DB it is probably an error. */
3706 if (src == dst) {
c937aa89 3707 addReply(c,shared.sameobjecterr);
ed9b544e 3708 return;
3709 }
3710
3711 /* Check if the element exists and get a reference */
3305306f 3712 o = lookupKeyWrite(c->db,c->argv[1]);
3713 if (!o) {
c937aa89 3714 addReply(c,shared.czero);
ed9b544e 3715 return;
3716 }
3717
3718 /* Try to add the element to the target DB */
3305306f 3719 deleteIfVolatile(dst,c->argv[1]);
3720 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
c937aa89 3721 addReply(c,shared.czero);
ed9b544e 3722 return;
3723 }
3305306f 3724 incrRefCount(c->argv[1]);
ed9b544e 3725 incrRefCount(o);
3726
3727 /* OK! key moved, free the entry in the source DB */
3305306f 3728 deleteKey(src,c->argv[1]);
ed9b544e 3729 server.dirty++;
c937aa89 3730 addReply(c,shared.cone);
ed9b544e 3731}
3732
3733/* =================================== Lists ================================ */
3734static void pushGenericCommand(redisClient *c, int where) {
3735 robj *lobj;
ed9b544e 3736 list *list;
3305306f 3737
3738 lobj = lookupKeyWrite(c->db,c->argv[1]);
3739 if (lobj == NULL) {
95242ab5 3740 if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
3741 addReply(c,shared.ok);
3742 return;
3743 }
ed9b544e 3744 lobj = createListObject();
3745 list = lobj->ptr;
3746 if (where == REDIS_HEAD) {
6b47e12e 3747 listAddNodeHead(list,c->argv[2]);
ed9b544e 3748 } else {
6b47e12e 3749 listAddNodeTail(list,c->argv[2]);
ed9b544e 3750 }
3305306f 3751 dictAdd(c->db->dict,c->argv[1],lobj);
ed9b544e 3752 incrRefCount(c->argv[1]);
3753 incrRefCount(c->argv[2]);
3754 } else {
ed9b544e 3755 if (lobj->type != REDIS_LIST) {
3756 addReply(c,shared.wrongtypeerr);
3757 return;
3758 }
95242ab5 3759 if (handleClientsWaitingListPush(c,c->argv[1],c->argv[2])) {
3760 addReply(c,shared.ok);
3761 return;
3762 }
ed9b544e 3763 list = lobj->ptr;
3764 if (where == REDIS_HEAD) {
6b47e12e 3765 listAddNodeHead(list,c->argv[2]);
ed9b544e 3766 } else {
6b47e12e 3767 listAddNodeTail(list,c->argv[2]);
ed9b544e 3768 }
3769 incrRefCount(c->argv[2]);
3770 }
3771 server.dirty++;
3772 addReply(c,shared.ok);
3773}
3774
3775static void lpushCommand(redisClient *c) {
3776 pushGenericCommand(c,REDIS_HEAD);
3777}
3778
3779static void rpushCommand(redisClient *c) {
3780 pushGenericCommand(c,REDIS_TAIL);
3781}
3782
3783static void llenCommand(redisClient *c) {
3305306f 3784 robj *o;
ed9b544e 3785 list *l;
3786
3305306f 3787 o = lookupKeyRead(c->db,c->argv[1]);
3788 if (o == NULL) {
c937aa89 3789 addReply(c,shared.czero);
ed9b544e 3790 return;
3791 } else {
ed9b544e 3792 if (o->type != REDIS_LIST) {
c937aa89 3793 addReply(c,shared.wrongtypeerr);
ed9b544e 3794 } else {
3795 l = o->ptr;
c937aa89 3796 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
ed9b544e 3797 }
3798 }
3799}
3800
3801static void lindexCommand(redisClient *c) {
3305306f 3802 robj *o;
ed9b544e 3803 int index = atoi(c->argv[2]->ptr);
3804
3305306f 3805 o = lookupKeyRead(c->db,c->argv[1]);
3806 if (o == NULL) {
c937aa89 3807 addReply(c,shared.nullbulk);
ed9b544e 3808 } else {
ed9b544e 3809 if (o->type != REDIS_LIST) {
c937aa89 3810 addReply(c,shared.wrongtypeerr);
ed9b544e 3811 } else {
3812 list *list = o->ptr;
3813 listNode *ln;
3814
3815 ln = listIndex(list, index);
3816 if (ln == NULL) {
c937aa89 3817 addReply(c,shared.nullbulk);
ed9b544e 3818 } else {
3819 robj *ele = listNodeValue(ln);
942a3961 3820 addReplyBulkLen(c,ele);
ed9b544e 3821 addReply(c,ele);
3822 addReply(c,shared.crlf);
3823 }
3824 }
3825 }
3826}
3827
3828static void lsetCommand(redisClient *c) {
3305306f 3829 robj *o;
ed9b544e 3830 int index = atoi(c->argv[2]->ptr);
3831
3305306f 3832 o = lookupKeyWrite(c->db,c->argv[1]);
3833 if (o == NULL) {
ed9b544e 3834 addReply(c,shared.nokeyerr);
3835 } else {
ed9b544e 3836 if (o->type != REDIS_LIST) {
3837 addReply(c,shared.wrongtypeerr);
3838 } else {
3839 list *list = o->ptr;
3840 listNode *ln;
3841
3842 ln = listIndex(list, index);
3843 if (ln == NULL) {
c937aa89 3844 addReply(c,shared.outofrangeerr);
ed9b544e 3845 } else {
3846 robj *ele = listNodeValue(ln);
3847
3848 decrRefCount(ele);
3849 listNodeValue(ln) = c->argv[3];
3850 incrRefCount(c->argv[3]);
3851 addReply(c,shared.ok);
3852 server.dirty++;
3853 }
3854 }
3855 }
3856}
3857
3858static void popGenericCommand(redisClient *c, int where) {
3305306f 3859 robj *o;
3860
3861 o = lookupKeyWrite(c->db,c->argv[1]);
3862 if (o == NULL) {
c937aa89 3863 addReply(c,shared.nullbulk);
ed9b544e 3864 } else {
ed9b544e 3865 if (o->type != REDIS_LIST) {
c937aa89 3866 addReply(c,shared.wrongtypeerr);
ed9b544e 3867 } else {
3868 list *list = o->ptr;
3869 listNode *ln;
3870
3871 if (where == REDIS_HEAD)
3872 ln = listFirst(list);
3873 else
3874 ln = listLast(list);
3875
3876 if (ln == NULL) {
c937aa89 3877 addReply(c,shared.nullbulk);
ed9b544e 3878 } else {
3879 robj *ele = listNodeValue(ln);
942a3961 3880 addReplyBulkLen(c,ele);
ed9b544e 3881 addReply(c,ele);
3882 addReply(c,shared.crlf);
3883 listDelNode(list,ln);
3884 server.dirty++;
3885 }
3886 }
3887 }
3888}
3889
3890static void lpopCommand(redisClient *c) {
3891 popGenericCommand(c,REDIS_HEAD);
3892}
3893
3894static void rpopCommand(redisClient *c) {
3895 popGenericCommand(c,REDIS_TAIL);
3896}
3897
3898static void lrangeCommand(redisClient *c) {
3305306f 3899 robj *o;
ed9b544e 3900 int start = atoi(c->argv[2]->ptr);
3901 int end = atoi(c->argv[3]->ptr);
3305306f 3902
3903 o = lookupKeyRead(c->db,c->argv[1]);
3904 if (o == NULL) {
c937aa89 3905 addReply(c,shared.nullmultibulk);
ed9b544e 3906 } else {
ed9b544e 3907 if (o->type != REDIS_LIST) {
c937aa89 3908 addReply(c,shared.wrongtypeerr);
ed9b544e 3909 } else {
3910 list *list = o->ptr;
3911 listNode *ln;
3912 int llen = listLength(list);
3913 int rangelen, j;
3914 robj *ele;
3915
3916 /* convert negative indexes */
3917 if (start < 0) start = llen+start;
3918 if (end < 0) end = llen+end;
3919 if (start < 0) start = 0;
3920 if (end < 0) end = 0;
3921
3922 /* indexes sanity checks */
3923 if (start > end || start >= llen) {
3924 /* Out of range start or start > end result in empty list */
c937aa89 3925 addReply(c,shared.emptymultibulk);
ed9b544e 3926 return;
3927 }
3928 if (end >= llen) end = llen-1;
3929 rangelen = (end-start)+1;
3930
3931 /* Return the result in form of a multi-bulk reply */
3932 ln = listIndex(list, start);
c937aa89 3933 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
ed9b544e 3934 for (j = 0; j < rangelen; j++) {
3935 ele = listNodeValue(ln);
942a3961 3936 addReplyBulkLen(c,ele);
ed9b544e 3937 addReply(c,ele);
3938 addReply(c,shared.crlf);
3939 ln = ln->next;
3940 }
3941 }
3942 }
3943}
3944
3945static void ltrimCommand(redisClient *c) {
3305306f 3946 robj *o;
ed9b544e 3947 int start = atoi(c->argv[2]->ptr);
3948 int end = atoi(c->argv[3]->ptr);
3949
3305306f 3950 o = lookupKeyWrite(c->db,c->argv[1]);
3951 if (o == NULL) {
ab9d4cb1 3952 addReply(c,shared.ok);
ed9b544e 3953 } else {
ed9b544e 3954 if (o->type != REDIS_LIST) {
3955 addReply(c,shared.wrongtypeerr);
3956 } else {
3957 list *list = o->ptr;
3958 listNode *ln;
3959 int llen = listLength(list);
3960 int j, ltrim, rtrim;
3961
3962 /* convert negative indexes */
3963 if (start < 0) start = llen+start;
3964 if (end < 0) end = llen+end;
3965 if (start < 0) start = 0;
3966 if (end < 0) end = 0;
3967
3968 /* indexes sanity checks */
3969 if (start > end || start >= llen) {
3970 /* Out of range start or start > end result in empty list */
3971 ltrim = llen;
3972 rtrim = 0;
3973 } else {
3974 if (end >= llen) end = llen-1;
3975 ltrim = start;
3976 rtrim = llen-end-1;
3977 }
3978
3979 /* Remove list elements to perform the trim */
3980 for (j = 0; j < ltrim; j++) {
3981 ln = listFirst(list);
3982 listDelNode(list,ln);
3983 }
3984 for (j = 0; j < rtrim; j++) {
3985 ln = listLast(list);
3986 listDelNode(list,ln);
3987 }
ed9b544e 3988 server.dirty++;
e59229a2 3989 addReply(c,shared.ok);
ed9b544e 3990 }
3991 }
3992}
3993
3994static void lremCommand(redisClient *c) {
3305306f 3995 robj *o;
ed9b544e 3996
3305306f 3997 o = lookupKeyWrite(c->db,c->argv[1]);
3998 if (o == NULL) {
33c08b39 3999 addReply(c,shared.czero);
ed9b544e 4000 } else {
ed9b544e 4001 if (o->type != REDIS_LIST) {
c937aa89 4002 addReply(c,shared.wrongtypeerr);
ed9b544e 4003 } else {
4004 list *list = o->ptr;
4005 listNode *ln, *next;
4006 int toremove = atoi(c->argv[2]->ptr);
4007 int removed = 0;
4008 int fromtail = 0;
4009
4010 if (toremove < 0) {
4011 toremove = -toremove;
4012 fromtail = 1;
4013 }
4014 ln = fromtail ? list->tail : list->head;
4015 while (ln) {
ed9b544e 4016 robj *ele = listNodeValue(ln);
a4d1ba9a 4017
4018 next = fromtail ? ln->prev : ln->next;
724a51b1 4019 if (compareStringObjects(ele,c->argv[3]) == 0) {
ed9b544e 4020 listDelNode(list,ln);
4021 server.dirty++;
4022 removed++;
4023 if (toremove && removed == toremove) break;
4024 }
4025 ln = next;
4026 }
c937aa89 4027 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
ed9b544e 4028 }
4029 }
4030}
4031
12f9d551 4032/* This is the semantic of this command:
0f5f7e9a 4033 * RPOPLPUSH srclist dstlist:
12f9d551 4034 * IF LLEN(srclist) > 0
4035 * element = RPOP srclist
4036 * LPUSH dstlist element
4037 * RETURN element
4038 * ELSE
4039 * RETURN nil
4040 * END
4041 * END
4042 *
4043 * The idea is to be able to get an element from a list in a reliable way
4044 * since the element is not just returned but pushed against another list
4045 * as well. This command was originally proposed by Ezra Zygmuntowicz.
4046 */
0f5f7e9a 4047static void rpoplpushcommand(redisClient *c) {
12f9d551 4048 robj *sobj;
4049
4050 sobj = lookupKeyWrite(c->db,c->argv[1]);
4051 if (sobj == NULL) {
4052 addReply(c,shared.nullbulk);
4053 } else {
4054 if (sobj->type != REDIS_LIST) {
4055 addReply(c,shared.wrongtypeerr);
4056 } else {
4057 list *srclist = sobj->ptr;
4058 listNode *ln = listLast(srclist);
4059
4060 if (ln == NULL) {
4061 addReply(c,shared.nullbulk);
4062 } else {
4063 robj *dobj = lookupKeyWrite(c->db,c->argv[2]);
4064 robj *ele = listNodeValue(ln);
4065 list *dstlist;
4066
e20fb74f 4067 if (dobj && dobj->type != REDIS_LIST) {
12f9d551 4068 addReply(c,shared.wrongtypeerr);
4069 return;
4070 }
e20fb74f 4071
4072 /* Add the element to the target list (unless it's directly
4073 * passed to some BLPOP-ing client */
4074 if (!handleClientsWaitingListPush(c,c->argv[2],ele)) {
4075 if (dobj == NULL) {
4076 /* Create the list if the key does not exist */
4077 dobj = createListObject();
4078 dictAdd(c->db->dict,c->argv[2],dobj);
4079 incrRefCount(c->argv[2]);
4080 }
4081 dstlist = dobj->ptr;
4082 listAddNodeHead(dstlist,ele);
4083 incrRefCount(ele);
4084 }
12f9d551 4085
4086 /* Send the element to the client as reply as well */
4087 addReplyBulkLen(c,ele);
4088 addReply(c,ele);
4089 addReply(c,shared.crlf);
4090
4091 /* Finally remove the element from the source list */
4092 listDelNode(srclist,ln);
4093 server.dirty++;
4094 }
4095 }
4096 }
4097}
4098
4099
ed9b544e 4100/* ==================================== Sets ================================ */
4101
4102static void saddCommand(redisClient *c) {
ed9b544e 4103 robj *set;
4104
3305306f 4105 set = lookupKeyWrite(c->db,c->argv[1]);
4106 if (set == NULL) {
ed9b544e 4107 set = createSetObject();
3305306f 4108 dictAdd(c->db->dict,c->argv[1],set);
ed9b544e 4109 incrRefCount(c->argv[1]);
4110 } else {
ed9b544e 4111 if (set->type != REDIS_SET) {
c937aa89 4112 addReply(c,shared.wrongtypeerr);
ed9b544e 4113 return;
4114 }
4115 }
4116 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
4117 incrRefCount(c->argv[2]);
4118 server.dirty++;
c937aa89 4119 addReply(c,shared.cone);
ed9b544e 4120 } else {
c937aa89 4121 addReply(c,shared.czero);
ed9b544e 4122 }
4123}
4124
4125static void sremCommand(redisClient *c) {
3305306f 4126 robj *set;
ed9b544e 4127
3305306f 4128 set = lookupKeyWrite(c->db,c->argv[1]);
4129 if (set == NULL) {
c937aa89 4130 addReply(c,shared.czero);
ed9b544e 4131 } else {
ed9b544e 4132 if (set->type != REDIS_SET) {
c937aa89 4133 addReply(c,shared.wrongtypeerr);
ed9b544e 4134 return;
4135 }
4136 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
4137 server.dirty++;
12fea928 4138 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
c937aa89 4139 addReply(c,shared.cone);
ed9b544e 4140 } else {
c937aa89 4141 addReply(c,shared.czero);
ed9b544e 4142 }
4143 }
4144}
4145
a4460ef4 4146static void smoveCommand(redisClient *c) {
4147 robj *srcset, *dstset;
4148
4149 srcset = lookupKeyWrite(c->db,c->argv[1]);
4150 dstset = lookupKeyWrite(c->db,c->argv[2]);
4151
4152 /* If the source key does not exist return 0, if it's of the wrong type
4153 * raise an error */
4154 if (srcset == NULL || srcset->type != REDIS_SET) {
4155 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
4156 return;
4157 }
4158 /* Error if the destination key is not a set as well */
4159 if (dstset && dstset->type != REDIS_SET) {
4160 addReply(c,shared.wrongtypeerr);
4161 return;
4162 }
4163 /* Remove the element from the source set */
4164 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
4165 /* Key not found in the src set! return zero */
4166 addReply(c,shared.czero);
4167 return;
4168 }
4169 server.dirty++;
4170 /* Add the element to the destination set */
4171 if (!dstset) {
4172 dstset = createSetObject();
4173 dictAdd(c->db->dict,c->argv[2],dstset);
4174 incrRefCount(c->argv[2]);
4175 }
4176 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
4177 incrRefCount(c->argv[3]);
4178 addReply(c,shared.cone);
4179}
4180
ed9b544e 4181static void sismemberCommand(redisClient *c) {
3305306f 4182 robj *set;
ed9b544e 4183
3305306f 4184 set = lookupKeyRead(c->db,c->argv[1]);
4185 if (set == NULL) {
c937aa89 4186 addReply(c,shared.czero);
ed9b544e 4187 } else {
ed9b544e 4188 if (set->type != REDIS_SET) {
c937aa89 4189 addReply(c,shared.wrongtypeerr);
ed9b544e 4190 return;
4191 }
4192 if (dictFind(set->ptr,c->argv[2]))
c937aa89 4193 addReply(c,shared.cone);
ed9b544e 4194 else
c937aa89 4195 addReply(c,shared.czero);
ed9b544e 4196 }
4197}
4198
4199static void scardCommand(redisClient *c) {
3305306f 4200 robj *o;
ed9b544e 4201 dict *s;
4202
3305306f 4203 o = lookupKeyRead(c->db,c->argv[1]);
4204 if (o == NULL) {
c937aa89 4205 addReply(c,shared.czero);
ed9b544e 4206 return;
4207 } else {
ed9b544e 4208 if (o->type != REDIS_SET) {
c937aa89 4209 addReply(c,shared.wrongtypeerr);
ed9b544e 4210 } else {
4211 s = o->ptr;
682ac724 4212 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
3305306f 4213 dictSize(s)));
ed9b544e 4214 }
4215 }
4216}
4217
12fea928 4218static void spopCommand(redisClient *c) {
4219 robj *set;
4220 dictEntry *de;
4221
4222 set = lookupKeyWrite(c->db,c->argv[1]);
4223 if (set == NULL) {
4224 addReply(c,shared.nullbulk);
4225 } else {
4226 if (set->type != REDIS_SET) {
4227 addReply(c,shared.wrongtypeerr);
4228 return;
4229 }
4230 de = dictGetRandomKey(set->ptr);
4231 if (de == NULL) {
4232 addReply(c,shared.nullbulk);
4233 } else {
4234 robj *ele = dictGetEntryKey(de);
4235
942a3961 4236 addReplyBulkLen(c,ele);
12fea928 4237 addReply(c,ele);
4238 addReply(c,shared.crlf);
4239 dictDelete(set->ptr,ele);
4240 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
4241 server.dirty++;
4242 }
4243 }
4244}
4245
2abb95a9 4246static void srandmemberCommand(redisClient *c) {
4247 robj *set;
4248 dictEntry *de;
4249
4250 set = lookupKeyRead(c->db,c->argv[1]);
4251 if (set == NULL) {
4252 addReply(c,shared.nullbulk);
4253 } else {
4254 if (set->type != REDIS_SET) {
4255 addReply(c,shared.wrongtypeerr);
4256 return;
4257 }
4258 de = dictGetRandomKey(set->ptr);
4259 if (de == NULL) {
4260 addReply(c,shared.nullbulk);
4261 } else {
4262 robj *ele = dictGetEntryKey(de);
4263
4264 addReplyBulkLen(c,ele);
4265 addReply(c,ele);
4266 addReply(c,shared.crlf);
4267 }
4268 }
4269}
4270
ed9b544e 4271static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
4272 dict **d1 = (void*) s1, **d2 = (void*) s2;
4273
3305306f 4274 return dictSize(*d1)-dictSize(*d2);
ed9b544e 4275}
4276
682ac724 4277static void sinterGenericCommand(redisClient *c, robj **setskeys, unsigned long setsnum, robj *dstkey) {
ed9b544e 4278 dict **dv = zmalloc(sizeof(dict*)*setsnum);
4279 dictIterator *di;
4280 dictEntry *de;
4281 robj *lenobj = NULL, *dstset = NULL;
682ac724 4282 unsigned long j, cardinality = 0;
ed9b544e 4283
ed9b544e 4284 for (j = 0; j < setsnum; j++) {
4285 robj *setobj;
3305306f 4286
4287 setobj = dstkey ?
4288 lookupKeyWrite(c->db,setskeys[j]) :
4289 lookupKeyRead(c->db,setskeys[j]);
4290 if (!setobj) {
ed9b544e 4291 zfree(dv);
5faa6025 4292 if (dstkey) {
fdcaae84 4293 if (deleteKey(c->db,dstkey))
4294 server.dirty++;
0d36ded0 4295 addReply(c,shared.czero);
5faa6025 4296 } else {
4297 addReply(c,shared.nullmultibulk);
4298 }
ed9b544e 4299 return;
4300 }
ed9b544e 4301 if (setobj->type != REDIS_SET) {
4302 zfree(dv);
c937aa89 4303 addReply(c,shared.wrongtypeerr);
ed9b544e 4304 return;
4305 }
4306 dv[j] = setobj->ptr;
4307 }
4308 /* Sort sets from the smallest to largest, this will improve our
4309 * algorithm's performace */
4310 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
4311
4312 /* The first thing we should output is the total number of elements...
4313 * since this is a multi-bulk write, but at this stage we don't know
4314 * the intersection set size, so we use a trick, append an empty object
4315 * to the output list and save the pointer to later modify it with the
4316 * right length */
4317 if (!dstkey) {
4318 lenobj = createObject(REDIS_STRING,NULL);
4319 addReply(c,lenobj);
4320 decrRefCount(lenobj);
4321 } else {
4322 /* If we have a target key where to store the resulting set
4323 * create this key with an empty set inside */
4324 dstset = createSetObject();
ed9b544e 4325 }
4326
4327 /* Iterate all the elements of the first (smallest) set, and test
4328 * the element against all the other sets, if at least one set does
4329 * not include the element it is discarded */
4330 di = dictGetIterator(dv[0]);
ed9b544e 4331
4332 while((de = dictNext(di)) != NULL) {
4333 robj *ele;
4334
4335 for (j = 1; j < setsnum; j++)
4336 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
4337 if (j != setsnum)
4338 continue; /* at least one set does not contain the member */
4339 ele = dictGetEntryKey(de);
4340 if (!dstkey) {
942a3961 4341 addReplyBulkLen(c,ele);
ed9b544e 4342 addReply(c,ele);
4343 addReply(c,shared.crlf);
4344 cardinality++;
4345 } else {
4346 dictAdd(dstset->ptr,ele,NULL);
4347 incrRefCount(ele);
4348 }
4349 }
4350 dictReleaseIterator(di);
4351
83cdfe18
AG
4352 if (dstkey) {
4353 /* Store the resulting set into the target */
4354 deleteKey(c->db,dstkey);
4355 dictAdd(c->db->dict,dstkey,dstset);
4356 incrRefCount(dstkey);
4357 }
4358
40d224a9 4359 if (!dstkey) {
682ac724 4360 lenobj->ptr = sdscatprintf(sdsempty(),"*%lu\r\n",cardinality);
40d224a9 4361 } else {
682ac724 4362 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
03fd01c7 4363 dictSize((dict*)dstset->ptr)));
40d224a9 4364 server.dirty++;
4365 }
ed9b544e 4366 zfree(dv);
4367}
4368
4369static void sinterCommand(redisClient *c) {
4370 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
4371}
4372
4373static void sinterstoreCommand(redisClient *c) {
4374 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
4375}
4376
f4f56e1d 4377#define REDIS_OP_UNION 0
4378#define REDIS_OP_DIFF 1
4379
4380static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
40d224a9 4381 dict **dv = zmalloc(sizeof(dict*)*setsnum);
4382 dictIterator *di;
4383 dictEntry *de;
f4f56e1d 4384 robj *dstset = NULL;
40d224a9 4385 int j, cardinality = 0;
4386
40d224a9 4387 for (j = 0; j < setsnum; j++) {
4388 robj *setobj;
4389
4390 setobj = dstkey ?
4391 lookupKeyWrite(c->db,setskeys[j]) :
4392 lookupKeyRead(c->db,setskeys[j]);
4393 if (!setobj) {
4394 dv[j] = NULL;
4395 continue;
4396 }
4397 if (setobj->type != REDIS_SET) {
4398 zfree(dv);
4399 addReply(c,shared.wrongtypeerr);
4400 return;
4401 }
4402 dv[j] = setobj->ptr;
4403 }
4404
4405 /* We need a temp set object to store our union. If the dstkey
4406 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
4407 * this set object will be the resulting object to set into the target key*/
4408 dstset = createSetObject();
4409
40d224a9 4410 /* Iterate all the elements of all the sets, add every element a single
4411 * time to the result set */
4412 for (j = 0; j < setsnum; j++) {
51829ed3 4413 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
40d224a9 4414 if (!dv[j]) continue; /* non existing keys are like empty sets */
4415
4416 di = dictGetIterator(dv[j]);
40d224a9 4417
4418 while((de = dictNext(di)) != NULL) {
4419 robj *ele;
4420
4421 /* dictAdd will not add the same element multiple times */
4422 ele = dictGetEntryKey(de);
f4f56e1d 4423 if (op == REDIS_OP_UNION || j == 0) {
4424 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
4425 incrRefCount(ele);
40d224a9 4426 cardinality++;
4427 }
f4f56e1d 4428 } else if (op == REDIS_OP_DIFF) {
4429 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
4430 cardinality--;
4431 }
40d224a9 4432 }
4433 }
4434 dictReleaseIterator(di);
51829ed3
AG
4435
4436 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
40d224a9 4437 }
4438
f4f56e1d 4439 /* Output the content of the resulting set, if not in STORE mode */
4440 if (!dstkey) {
4441 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
4442 di = dictGetIterator(dstset->ptr);
f4f56e1d 4443 while((de = dictNext(di)) != NULL) {
4444 robj *ele;
4445
4446 ele = dictGetEntryKey(de);
942a3961 4447 addReplyBulkLen(c,ele);
f4f56e1d 4448 addReply(c,ele);
4449 addReply(c,shared.crlf);
4450 }
4451 dictReleaseIterator(di);
83cdfe18
AG
4452 } else {
4453 /* If we have a target key where to store the resulting set
4454 * create this key with the result set inside */
4455 deleteKey(c->db,dstkey);
4456 dictAdd(c->db->dict,dstkey,dstset);
4457 incrRefCount(dstkey);
f4f56e1d 4458 }
4459
4460 /* Cleanup */
40d224a9 4461 if (!dstkey) {
40d224a9 4462 decrRefCount(dstset);
4463 } else {
682ac724 4464 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",
03fd01c7 4465 dictSize((dict*)dstset->ptr)));
40d224a9 4466 server.dirty++;
4467 }
4468 zfree(dv);
4469}
4470
4471static void sunionCommand(redisClient *c) {
f4f56e1d 4472 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
40d224a9 4473}
4474
4475static void sunionstoreCommand(redisClient *c) {
f4f56e1d 4476 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
4477}
4478
4479static void sdiffCommand(redisClient *c) {
4480 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
4481}
4482
4483static void sdiffstoreCommand(redisClient *c) {
4484 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
40d224a9 4485}
4486
6b47e12e 4487/* ==================================== ZSets =============================== */
4488
4489/* ZSETs are ordered sets using two data structures to hold the same elements
4490 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
4491 * data structure.
4492 *
4493 * The elements are added to an hash table mapping Redis objects to scores.
4494 * At the same time the elements are added to a skip list mapping scores
4495 * to Redis objects (so objects are sorted by scores in this "view"). */
4496
4497/* This skiplist implementation is almost a C translation of the original
4498 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
4499 * Alternative to Balanced Trees", modified in three ways:
4500 * a) this implementation allows for repeated values.
4501 * b) the comparison is not just by key (our 'score') but by satellite data.
4502 * c) there is a back pointer, so it's a doubly linked list with the back
4503 * pointers being only at "level 1". This allows to traverse the list
4504 * from tail to head, useful for ZREVRANGE. */
4505
4506static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
4507 zskiplistNode *zn = zmalloc(sizeof(*zn));
4508
4509 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
4510 zn->score = score;
4511 zn->obj = obj;
4512 return zn;
4513}
4514
4515static zskiplist *zslCreate(void) {
4516 int j;
4517 zskiplist *zsl;
4518
4519 zsl = zmalloc(sizeof(*zsl));
4520 zsl->level = 1;
cc812361 4521 zsl->length = 0;
6b47e12e 4522 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
4523 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
4524 zsl->header->forward[j] = NULL;
e3870fab 4525 zsl->header->backward = NULL;
4526 zsl->tail = NULL;
6b47e12e 4527 return zsl;
4528}
4529
fd8ccf44 4530static void zslFreeNode(zskiplistNode *node) {
4531 decrRefCount(node->obj);
ad807e6f 4532 zfree(node->forward);
fd8ccf44 4533 zfree(node);
4534}
4535
4536static void zslFree(zskiplist *zsl) {
ad807e6f 4537 zskiplistNode *node = zsl->header->forward[0], *next;
fd8ccf44 4538
ad807e6f 4539 zfree(zsl->header->forward);
4540 zfree(zsl->header);
fd8ccf44 4541 while(node) {
599379dd 4542 next = node->forward[0];
fd8ccf44 4543 zslFreeNode(node);
4544 node = next;
4545 }
ad807e6f 4546 zfree(zsl);
fd8ccf44 4547}
4548
6b47e12e 4549static int zslRandomLevel(void) {
4550 int level = 1;
4551 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
4552 level += 1;
4553 return level;
4554}
4555
4556static void zslInsert(zskiplist *zsl, double score, robj *obj) {
4557 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4558 int i, level;
4559
4560 x = zsl->header;
4561 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4562 while (x->forward[i] &&
4563 (x->forward[i]->score < score ||
4564 (x->forward[i]->score == score &&
4565 compareStringObjects(x->forward[i]->obj,obj) < 0)))
6b47e12e 4566 x = x->forward[i];
4567 update[i] = x;
4568 }
6b47e12e 4569 /* we assume the key is not already inside, since we allow duplicated
4570 * scores, and the re-insertion of score and redis object should never
4571 * happpen since the caller of zslInsert() should test in the hash table
4572 * if the element is already inside or not. */
4573 level = zslRandomLevel();
4574 if (level > zsl->level) {
4575 for (i = zsl->level; i < level; i++)
4576 update[i] = zsl->header;
4577 zsl->level = level;
4578 }
4579 x = zslCreateNode(level,score,obj);
4580 for (i = 0; i < level; i++) {
4581 x->forward[i] = update[i]->forward[i];
4582 update[i]->forward[i] = x;
4583 }
bb975144 4584 x->backward = (update[0] == zsl->header) ? NULL : update[0];
e3870fab 4585 if (x->forward[0])
4586 x->forward[0]->backward = x;
4587 else
4588 zsl->tail = x;
cc812361 4589 zsl->length++;
6b47e12e 4590}
4591
50c55df5 4592/* Delete an element with matching score/object from the skiplist. */
fd8ccf44 4593static int zslDelete(zskiplist *zsl, double score, robj *obj) {
e197b441 4594 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4595 int i;
4596
4597 x = zsl->header;
4598 for (i = zsl->level-1; i >= 0; i--) {
9d60e6e4 4599 while (x->forward[i] &&
4600 (x->forward[i]->score < score ||
4601 (x->forward[i]->score == score &&
4602 compareStringObjects(x->forward[i]->obj,obj) < 0)))
e197b441 4603 x = x->forward[i];
4604 update[i] = x;
4605 }
4606 /* We may have multiple elements with the same score, what we need
4607 * is to find the element with both the right score and object. */
4608 x = x->forward[0];
50c55df5 4609 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
9d60e6e4 4610 for (i = 0; i < zsl->level; i++) {
4611 if (update[i]->forward[i] != x) break;
4612 update[i]->forward[i] = x->forward[i];
4613 }
4614 if (x->forward[0]) {
4615 x->forward[0]->backward = (x->backward == zsl->header) ?
4616 NULL : x->backward;
e197b441 4617 } else {
9d60e6e4 4618 zsl->tail = x->backward;
e197b441 4619 }
9d60e6e4 4620 zslFreeNode(x);
4621 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4622 zsl->level--;
4623 zsl->length--;
4624 return 1;
4625 } else {
4626 return 0; /* not found */
e197b441 4627 }
4628 return 0; /* not found */
fd8ccf44 4629}
4630
1807985b 4631/* Delete all the elements with score between min and max from the skiplist.
4632 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
4633 * Note that this function takes the reference to the hash table view of the
4634 * sorted set, in order to remove the elements from the hash table too. */
4635static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
4636 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
4637 unsigned long removed = 0;
4638 int i;
4639
4640 x = zsl->header;
4641 for (i = zsl->level-1; i >= 0; i--) {
4642 while (x->forward[i] && x->forward[i]->score < min)
4643 x = x->forward[i];
4644 update[i] = x;
4645 }
4646 /* We may have multiple elements with the same score, what we need
4647 * is to find the element with both the right score and object. */
4648 x = x->forward[0];
4649 while (x && x->score <= max) {
4650 zskiplistNode *next;
4651
4652 for (i = 0; i < zsl->level; i++) {
4653 if (update[i]->forward[i] != x) break;
4654 update[i]->forward[i] = x->forward[i];
4655 }
4656 if (x->forward[0]) {
4657 x->forward[0]->backward = (x->backward == zsl->header) ?
4658 NULL : x->backward;
4659 } else {
4660 zsl->tail = x->backward;
4661 }
4662 next = x->forward[0];
4663 dictDelete(dict,x->obj);
4664 zslFreeNode(x);
4665 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4666 zsl->level--;
4667 zsl->length--;
4668 removed++;
4669 x = next;
4670 }
4671 return removed; /* not found */
4672}
4673
50c55df5 4674/* Find the first node having a score equal or greater than the specified one.
4675 * Returns NULL if there is no match. */
4676static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4677 zskiplistNode *x;
4678 int i;
4679
4680 x = zsl->header;
4681 for (i = zsl->level-1; i >= 0; i--) {
4682 while (x->forward[i] && x->forward[i]->score < score)
4683 x = x->forward[i];
4684 }
4685 /* We may have multiple elements with the same score, what we need
4686 * is to find the element with both the right score and object. */
4687 return x->forward[0];
4688}
4689
fd8ccf44 4690/* The actual Z-commands implementations */
4691
7db723ad 4692/* This generic command implements both ZADD and ZINCRBY.
e2665397 4693 * scoreval is the score if the operation is a ZADD (doincrement == 0) or
7db723ad 4694 * the increment if the operation is a ZINCRBY (doincrement == 1). */
e2665397 4695static void zaddGenericCommand(redisClient *c, robj *key, robj *ele, double scoreval, int doincrement) {
fd8ccf44 4696 robj *zsetobj;
4697 zset *zs;
4698 double *score;
4699
e2665397 4700 zsetobj = lookupKeyWrite(c->db,key);
fd8ccf44 4701 if (zsetobj == NULL) {
4702 zsetobj = createZsetObject();
e2665397 4703 dictAdd(c->db->dict,key,zsetobj);
4704 incrRefCount(key);
fd8ccf44 4705 } else {
4706 if (zsetobj->type != REDIS_ZSET) {
4707 addReply(c,shared.wrongtypeerr);
4708 return;
4709 }
4710 }
fd8ccf44 4711 zs = zsetobj->ptr;
e2665397 4712
7db723ad 4713 /* Ok now since we implement both ZADD and ZINCRBY here the code
e2665397 4714 * needs to handle the two different conditions. It's all about setting
4715 * '*score', that is, the new score to set, to the right value. */
4716 score = zmalloc(sizeof(double));
4717 if (doincrement) {
4718 dictEntry *de;
4719
4720 /* Read the old score. If the element was not present starts from 0 */
4721 de = dictFind(zs->dict,ele);
4722 if (de) {
4723 double *oldscore = dictGetEntryVal(de);
4724 *score = *oldscore + scoreval;
4725 } else {
4726 *score = scoreval;
4727 }
4728 } else {
4729 *score = scoreval;
4730 }
4731
4732 /* What follows is a simple remove and re-insert operation that is common
7db723ad 4733 * to both ZADD and ZINCRBY... */
e2665397 4734 if (dictAdd(zs->dict,ele,score) == DICT_OK) {
fd8ccf44 4735 /* case 1: New element */
e2665397 4736 incrRefCount(ele); /* added to hash */
4737 zslInsert(zs->zsl,*score,ele);
4738 incrRefCount(ele); /* added to skiplist */
fd8ccf44 4739 server.dirty++;
e2665397 4740 if (doincrement)
e2665397 4741 addReplyDouble(c,*score);
91d71bfc 4742 else
4743 addReply(c,shared.cone);
fd8ccf44 4744 } else {
4745 dictEntry *de;
4746 double *oldscore;
4747
4748 /* case 2: Score update operation */
e2665397 4749 de = dictFind(zs->dict,ele);
dfc5e96c 4750 redisAssert(de != NULL);
fd8ccf44 4751 oldscore = dictGetEntryVal(de);
4752 if (*score != *oldscore) {
4753 int deleted;
4754
e2665397 4755 /* Remove and insert the element in the skip list with new score */
4756 deleted = zslDelete(zs->zsl,*oldscore,ele);
dfc5e96c 4757 redisAssert(deleted != 0);
e2665397 4758 zslInsert(zs->zsl,*score,ele);
4759 incrRefCount(ele);
4760 /* Update the score in the hash table */
4761 dictReplace(zs->dict,ele,score);
fd8ccf44 4762 server.dirty++;
2161a965 4763 } else {
4764 zfree(score);
fd8ccf44 4765 }
e2665397 4766 if (doincrement)
4767 addReplyDouble(c,*score);
4768 else
4769 addReply(c,shared.czero);
fd8ccf44 4770 }
4771}
4772
e2665397 4773static void zaddCommand(redisClient *c) {
4774 double scoreval;
4775
4776 scoreval = strtod(c->argv[2]->ptr,NULL);
4777 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,0);
4778}
4779
7db723ad 4780static void zincrbyCommand(redisClient *c) {
e2665397 4781 double scoreval;
4782
4783 scoreval = strtod(c->argv[2]->ptr,NULL);
4784 zaddGenericCommand(c,c->argv[1],c->argv[3],scoreval,1);
4785}
4786
1b7106e7 4787static void zremCommand(redisClient *c) {
4788 robj *zsetobj;
4789 zset *zs;
4790
4791 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4792 if (zsetobj == NULL) {
4793 addReply(c,shared.czero);
4794 } else {
4795 dictEntry *de;
4796 double *oldscore;
4797 int deleted;
4798
4799 if (zsetobj->type != REDIS_ZSET) {
4800 addReply(c,shared.wrongtypeerr);
4801 return;
4802 }
4803 zs = zsetobj->ptr;
4804 de = dictFind(zs->dict,c->argv[2]);
4805 if (de == NULL) {
4806 addReply(c,shared.czero);
4807 return;
4808 }
4809 /* Delete from the skiplist */
4810 oldscore = dictGetEntryVal(de);
4811 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
dfc5e96c 4812 redisAssert(deleted != 0);
1b7106e7 4813
4814 /* Delete from the hash table */
4815 dictDelete(zs->dict,c->argv[2]);
4816 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4817 server.dirty++;
4818 addReply(c,shared.cone);
4819 }
4820}
4821
1807985b 4822static void zremrangebyscoreCommand(redisClient *c) {
4823 double min = strtod(c->argv[2]->ptr,NULL);
4824 double max = strtod(c->argv[3]->ptr,NULL);
4825 robj *zsetobj;
4826 zset *zs;
4827
4828 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4829 if (zsetobj == NULL) {
4830 addReply(c,shared.czero);
4831 } else {
4832 long deleted;
4833
4834 if (zsetobj->type != REDIS_ZSET) {
4835 addReply(c,shared.wrongtypeerr);
4836 return;
4837 }
4838 zs = zsetobj->ptr;
4839 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4840 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4841 server.dirty += deleted;
4842 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4843 }
4844}
4845
e3870fab 4846static void zrangeGenericCommand(redisClient *c, int reverse) {
cc812361 4847 robj *o;
4848 int start = atoi(c->argv[2]->ptr);
4849 int end = atoi(c->argv[3]->ptr);
752da584 4850 int withscores = 0;
4851
4852 if (c->argc == 5 && !strcasecmp(c->argv[4]->ptr,"withscores")) {
4853 withscores = 1;
4854 } else if (c->argc >= 5) {
4855 addReply(c,shared.syntaxerr);
4856 return;
4857 }
cc812361 4858
4859 o = lookupKeyRead(c->db,c->argv[1]);
4860 if (o == NULL) {
4861 addReply(c,shared.nullmultibulk);
4862 } else {
4863 if (o->type != REDIS_ZSET) {
4864 addReply(c,shared.wrongtypeerr);
4865 } else {
4866 zset *zsetobj = o->ptr;
4867 zskiplist *zsl = zsetobj->zsl;
4868 zskiplistNode *ln;
4869
4870 int llen = zsl->length;
4871 int rangelen, j;
4872 robj *ele;
4873
4874 /* convert negative indexes */
4875 if (start < 0) start = llen+start;
4876 if (end < 0) end = llen+end;
4877 if (start < 0) start = 0;
4878 if (end < 0) end = 0;
4879
4880 /* indexes sanity checks */
4881 if (start > end || start >= llen) {
4882 /* Out of range start or start > end result in empty list */
4883 addReply(c,shared.emptymultibulk);
4884 return;
4885 }
4886 if (end >= llen) end = llen-1;
4887 rangelen = (end-start)+1;
4888
4889 /* Return the result in form of a multi-bulk reply */
e3870fab 4890 if (reverse) {
4891 ln = zsl->tail;
4892 while (start--)
4893 ln = ln->backward;
4894 } else {
4895 ln = zsl->header->forward[0];
4896 while (start--)
4897 ln = ln->forward[0];
4898 }
cc812361 4899
752da584 4900 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",
4901 withscores ? (rangelen*2) : rangelen));
cc812361 4902 for (j = 0; j < rangelen; j++) {
0aad7a19 4903 ele = ln->obj;
cc812361 4904 addReplyBulkLen(c,ele);
4905 addReply(c,ele);
4906 addReply(c,shared.crlf);
752da584 4907 if (withscores)
4908 addReplyDouble(c,ln->score);
e3870fab 4909 ln = reverse ? ln->backward : ln->forward[0];
cc812361 4910 }
4911 }
4912 }
4913}
4914
e3870fab 4915static void zrangeCommand(redisClient *c) {
4916 zrangeGenericCommand(c,0);
4917}
4918
4919static void zrevrangeCommand(redisClient *c) {
4920 zrangeGenericCommand(c,1);
4921}
4922
50c55df5 4923static void zrangebyscoreCommand(redisClient *c) {
4924 robj *o;
4925 double min = strtod(c->argv[2]->ptr,NULL);
4926 double max = strtod(c->argv[3]->ptr,NULL);
80181f78 4927 int offset = 0, limit = -1;
4928
4929 if (c->argc != 4 && c->argc != 7) {
454d4e43 4930 addReplySds(c,
4931 sdsnew("-ERR wrong number of arguments for ZRANGEBYSCORE\r\n"));
80181f78 4932 return;
4933 } else if (c->argc == 7 && strcasecmp(c->argv[4]->ptr,"limit")) {
4934 addReply(c,shared.syntaxerr);
4935 return;
4936 } else if (c->argc == 7) {
4937 offset = atoi(c->argv[5]->ptr);
4938 limit = atoi(c->argv[6]->ptr);
0b13687c 4939 if (offset < 0) offset = 0;
80181f78 4940 }
50c55df5 4941
4942 o = lookupKeyRead(c->db,c->argv[1]);
4943 if (o == NULL) {
4944 addReply(c,shared.nullmultibulk);
4945 } else {
4946 if (o->type != REDIS_ZSET) {
4947 addReply(c,shared.wrongtypeerr);
4948 } else {
4949 zset *zsetobj = o->ptr;
4950 zskiplist *zsl = zsetobj->zsl;
4951 zskiplistNode *ln;
4952 robj *ele, *lenobj;
4953 unsigned int rangelen = 0;
4954
4955 /* Get the first node with the score >= min */
4956 ln = zslFirstWithScore(zsl,min);
4957 if (ln == NULL) {
4958 /* No element matching the speciifed interval */
4959 addReply(c,shared.emptymultibulk);
4960 return;
4961 }
4962
4963 /* We don't know in advance how many matching elements there
4964 * are in the list, so we push this object that will represent
4965 * the multi-bulk length in the output buffer, and will "fix"
4966 * it later */
4967 lenobj = createObject(REDIS_STRING,NULL);
4968 addReply(c,lenobj);
c74e7c77 4969 decrRefCount(lenobj);
50c55df5 4970
dbbc7285 4971 while(ln && ln->score <= max) {
80181f78 4972 if (offset) {
4973 offset--;
4974 ln = ln->forward[0];
4975 continue;
4976 }
4977 if (limit == 0) break;
50c55df5 4978 ele = ln->obj;
4979 addReplyBulkLen(c,ele);
4980 addReply(c,ele);
4981 addReply(c,shared.crlf);
4982 ln = ln->forward[0];
4983 rangelen++;
80181f78 4984 if (limit > 0) limit--;
50c55df5 4985 }
4986 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4987 }
4988 }
4989}
4990
3c41331e 4991static void zcardCommand(redisClient *c) {
e197b441 4992 robj *o;
4993 zset *zs;
4994
4995 o = lookupKeyRead(c->db,c->argv[1]);
4996 if (o == NULL) {
4997 addReply(c,shared.czero);
4998 return;
4999 } else {
5000 if (o->type != REDIS_ZSET) {
5001 addReply(c,shared.wrongtypeerr);
5002 } else {
5003 zs = o->ptr;
682ac724 5004 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",zs->zsl->length));
e197b441 5005 }
5006 }
5007}
5008
6e333bbe 5009static void zscoreCommand(redisClient *c) {
5010 robj *o;
5011 zset *zs;
5012
5013 o = lookupKeyRead(c->db,c->argv[1]);
5014 if (o == NULL) {
96d8b4ee 5015 addReply(c,shared.nullbulk);
6e333bbe 5016 return;
5017 } else {
5018 if (o->type != REDIS_ZSET) {
5019 addReply(c,shared.wrongtypeerr);
5020 } else {
5021 dictEntry *de;
5022
5023 zs = o->ptr;
5024 de = dictFind(zs->dict,c->argv[2]);
5025 if (!de) {
5026 addReply(c,shared.nullbulk);
5027 } else {
6e333bbe 5028 double *score = dictGetEntryVal(de);
5029
e2665397 5030 addReplyDouble(c,*score);
6e333bbe 5031 }
5032 }
5033 }
5034}
5035
6b47e12e 5036/* ========================= Non type-specific commands ==================== */
5037
ed9b544e 5038static void flushdbCommand(redisClient *c) {
ca37e9cd 5039 server.dirty += dictSize(c->db->dict);
3305306f 5040 dictEmpty(c->db->dict);
5041 dictEmpty(c->db->expires);
ed9b544e 5042 addReply(c,shared.ok);
ed9b544e 5043}
5044
5045static void flushallCommand(redisClient *c) {
ca37e9cd 5046 server.dirty += emptyDb();
ed9b544e 5047 addReply(c,shared.ok);
f78fd11b 5048 rdbSave(server.dbfilename);
ca37e9cd 5049 server.dirty++;
ed9b544e 5050}
5051
56906eef 5052static redisSortOperation *createSortOperation(int type, robj *pattern) {
ed9b544e 5053 redisSortOperation *so = zmalloc(sizeof(*so));
ed9b544e 5054 so->type = type;
5055 so->pattern = pattern;
5056 return so;
5057}
5058
5059/* Return the value associated to the key with a name obtained
5060 * substituting the first occurence of '*' in 'pattern' with 'subst' */
56906eef 5061static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
ed9b544e 5062 char *p;
5063 sds spat, ssub;
5064 robj keyobj;
5065 int prefixlen, sublen, postfixlen;
ed9b544e 5066 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
5067 struct {
f1017b3f 5068 long len;
5069 long free;
ed9b544e 5070 char buf[REDIS_SORTKEY_MAX+1];
5071 } keyname;
5072
28173a49 5073 /* If the pattern is "#" return the substitution object itself in order
5074 * to implement the "SORT ... GET #" feature. */
5075 spat = pattern->ptr;
5076 if (spat[0] == '#' && spat[1] == '\0') {
5077 return subst;
5078 }
5079
5080 /* The substitution object may be specially encoded. If so we create
9d65a1bb 5081 * a decoded object on the fly. Otherwise getDecodedObject will just
5082 * increment the ref count, that we'll decrement later. */
5083 subst = getDecodedObject(subst);
942a3961 5084
ed9b544e 5085 ssub = subst->ptr;
5086 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
5087 p = strchr(spat,'*');
ed5a857a 5088 if (!p) {
5089 decrRefCount(subst);
5090 return NULL;
5091 }
ed9b544e 5092
5093 prefixlen = p-spat;
5094 sublen = sdslen(ssub);
5095 postfixlen = sdslen(spat)-(prefixlen+1);
5096 memcpy(keyname.buf,spat,prefixlen);
5097 memcpy(keyname.buf+prefixlen,ssub,sublen);
5098 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
5099 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
5100 keyname.len = prefixlen+sublen+postfixlen;
5101
dfc5e96c 5102 initStaticStringObject(keyobj,((char*)&keyname)+(sizeof(long)*2))
942a3961 5103 decrRefCount(subst);
5104
a4d1ba9a 5105 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3305306f 5106 return lookupKeyRead(db,&keyobj);
ed9b544e 5107}
5108
5109/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
5110 * the additional parameter is not standard but a BSD-specific we have to
5111 * pass sorting parameters via the global 'server' structure */
5112static int sortCompare(const void *s1, const void *s2) {
5113 const redisSortObject *so1 = s1, *so2 = s2;
5114 int cmp;
5115
5116 if (!server.sort_alpha) {
5117 /* Numeric sorting. Here it's trivial as we precomputed scores */
5118 if (so1->u.score > so2->u.score) {
5119 cmp = 1;
5120 } else if (so1->u.score < so2->u.score) {
5121 cmp = -1;
5122 } else {
5123 cmp = 0;
5124 }
5125 } else {
5126 /* Alphanumeric sorting */
5127 if (server.sort_bypattern) {
5128 if (!so1->u.cmpobj || !so2->u.cmpobj) {
5129 /* At least one compare object is NULL */
5130 if (so1->u.cmpobj == so2->u.cmpobj)
5131 cmp = 0;
5132 else if (so1->u.cmpobj == NULL)
5133 cmp = -1;
5134 else
5135 cmp = 1;
5136 } else {
5137 /* We have both the objects, use strcoll */
5138 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
5139 }
5140 } else {
5141 /* Compare elements directly */
9d65a1bb 5142 robj *dec1, *dec2;
5143
5144 dec1 = getDecodedObject(so1->obj);
5145 dec2 = getDecodedObject(so2->obj);
5146 cmp = strcoll(dec1->ptr,dec2->ptr);
5147 decrRefCount(dec1);
5148 decrRefCount(dec2);
ed9b544e 5149 }
5150 }
5151 return server.sort_desc ? -cmp : cmp;
5152}
5153
5154/* The SORT command is the most complex command in Redis. Warning: this code
5155 * is optimized for speed and a bit less for readability */
5156static void sortCommand(redisClient *c) {
ed9b544e 5157 list *operations;
5158 int outputlen = 0;
5159 int desc = 0, alpha = 0;
5160 int limit_start = 0, limit_count = -1, start, end;
5161 int j, dontsort = 0, vectorlen;
5162 int getop = 0; /* GET operation counter */
443c6409 5163 robj *sortval, *sortby = NULL, *storekey = NULL;
ed9b544e 5164 redisSortObject *vector; /* Resulting vector to sort */
5165
5166 /* Lookup the key to sort. It must be of the right types */
3305306f 5167 sortval = lookupKeyRead(c->db,c->argv[1]);
5168 if (sortval == NULL) {
d922ae65 5169 addReply(c,shared.nullmultibulk);
ed9b544e 5170 return;
5171 }
a5eb649b 5172 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST &&
5173 sortval->type != REDIS_ZSET)
5174 {
c937aa89 5175 addReply(c,shared.wrongtypeerr);
ed9b544e 5176 return;
5177 }
5178
5179 /* Create a list of operations to perform for every sorted element.
5180 * Operations can be GET/DEL/INCR/DECR */
5181 operations = listCreate();
092dac2a 5182 listSetFreeMethod(operations,zfree);
ed9b544e 5183 j = 2;
5184
5185 /* Now we need to protect sortval incrementing its count, in the future
5186 * SORT may have options able to overwrite/delete keys during the sorting
5187 * and the sorted key itself may get destroied */
5188 incrRefCount(sortval);
5189
5190 /* The SORT command has an SQL-alike syntax, parse it */
5191 while(j < c->argc) {
5192 int leftargs = c->argc-j-1;
5193 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
5194 desc = 0;
5195 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
5196 desc = 1;
5197 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
5198 alpha = 1;
5199 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
5200 limit_start = atoi(c->argv[j+1]->ptr);
5201 limit_count = atoi(c->argv[j+2]->ptr);
5202 j+=2;
443c6409 5203 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
5204 storekey = c->argv[j+1];
5205 j++;
ed9b544e 5206 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
5207 sortby = c->argv[j+1];
5208 /* If the BY pattern does not contain '*', i.e. it is constant,
5209 * we don't need to sort nor to lookup the weight keys. */
5210 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
5211 j++;
5212 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
5213 listAddNodeTail(operations,createSortOperation(
5214 REDIS_SORT_GET,c->argv[j+1]));
5215 getop++;
5216 j++;
ed9b544e 5217 } else {
5218 decrRefCount(sortval);
5219 listRelease(operations);
c937aa89 5220 addReply(c,shared.syntaxerr);
ed9b544e 5221 return;
5222 }
5223 j++;
5224 }
5225
5226 /* Load the sorting vector with all the objects to sort */
a5eb649b 5227 switch(sortval->type) {
5228 case REDIS_LIST: vectorlen = listLength((list*)sortval->ptr); break;
5229 case REDIS_SET: vectorlen = dictSize((dict*)sortval->ptr); break;
5230 case REDIS_ZSET: vectorlen = dictSize(((zset*)sortval->ptr)->dict); break;
dfc5e96c 5231 default: vectorlen = 0; redisAssert(0); /* Avoid GCC warning */
a5eb649b 5232 }
ed9b544e 5233 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
ed9b544e 5234 j = 0;
a5eb649b 5235
ed9b544e 5236 if (sortval->type == REDIS_LIST) {
5237 list *list = sortval->ptr;
6208b3a7 5238 listNode *ln;
5239
5240 listRewind(list);
5241 while((ln = listYield(list))) {
ed9b544e 5242 robj *ele = ln->value;
5243 vector[j].obj = ele;
5244 vector[j].u.score = 0;
5245 vector[j].u.cmpobj = NULL;
ed9b544e 5246 j++;
5247 }
5248 } else {
a5eb649b 5249 dict *set;
ed9b544e 5250 dictIterator *di;
5251 dictEntry *setele;
5252
a5eb649b 5253 if (sortval->type == REDIS_SET) {
5254 set = sortval->ptr;
5255 } else {
5256 zset *zs = sortval->ptr;
5257 set = zs->dict;
5258 }
5259
ed9b544e 5260 di = dictGetIterator(set);
ed9b544e 5261 while((setele = dictNext(di)) != NULL) {
5262 vector[j].obj = dictGetEntryKey(setele);
5263 vector[j].u.score = 0;
5264 vector[j].u.cmpobj = NULL;
5265 j++;
5266 }
5267 dictReleaseIterator(di);
5268 }
dfc5e96c 5269 redisAssert(j == vectorlen);
ed9b544e 5270
5271 /* Now it's time to load the right scores in the sorting vector */
5272 if (dontsort == 0) {
5273 for (j = 0; j < vectorlen; j++) {
5274 if (sortby) {
5275 robj *byval;
5276
3305306f 5277 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
ed9b544e 5278 if (!byval || byval->type != REDIS_STRING) continue;
5279 if (alpha) {
9d65a1bb 5280 vector[j].u.cmpobj = getDecodedObject(byval);
ed9b544e 5281 } else {
942a3961 5282 if (byval->encoding == REDIS_ENCODING_RAW) {
5283 vector[j].u.score = strtod(byval->ptr,NULL);
5284 } else {
9d65a1bb 5285 /* Don't need to decode the object if it's
5286 * integer-encoded (the only encoding supported) so
5287 * far. We can just cast it */
f1017b3f 5288 if (byval->encoding == REDIS_ENCODING_INT) {
942a3961 5289 vector[j].u.score = (long)byval->ptr;
f1017b3f 5290 } else
dfc5e96c 5291 redisAssert(1 != 1);
942a3961 5292 }
ed9b544e 5293 }
5294 } else {
942a3961 5295 if (!alpha) {
5296 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
5297 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
5298 else {
5299 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
5300 vector[j].u.score = (long) vector[j].obj->ptr;
5301 else
dfc5e96c 5302 redisAssert(1 != 1);
942a3961 5303 }
5304 }
ed9b544e 5305 }
5306 }
5307 }
5308
5309 /* We are ready to sort the vector... perform a bit of sanity check
5310 * on the LIMIT option too. We'll use a partial version of quicksort. */
5311 start = (limit_start < 0) ? 0 : limit_start;
5312 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
5313 if (start >= vectorlen) {
5314 start = vectorlen-1;
5315 end = vectorlen-2;
5316 }
5317 if (end >= vectorlen) end = vectorlen-1;
5318
5319 if (dontsort == 0) {
5320 server.sort_desc = desc;
5321 server.sort_alpha = alpha;
5322 server.sort_bypattern = sortby ? 1 : 0;
5f5b9840 5323 if (sortby && (start != 0 || end != vectorlen-1))
5324 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
5325 else
5326 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
ed9b544e 5327 }
5328
5329 /* Send command output to the output buffer, performing the specified
5330 * GET/DEL/INCR/DECR operations if any. */
5331 outputlen = getop ? getop*(end-start+1) : end-start+1;
443c6409 5332 if (storekey == NULL) {
5333 /* STORE option not specified, sent the sorting result to client */
5334 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
5335 for (j = start; j <= end; j++) {
5336 listNode *ln;
5337 if (!getop) {
5338 addReplyBulkLen(c,vector[j].obj);
5339 addReply(c,vector[j].obj);
5340 addReply(c,shared.crlf);
5341 }
5342 listRewind(operations);
5343 while((ln = listYield(operations))) {
5344 redisSortOperation *sop = ln->value;
5345 robj *val = lookupKeyByPattern(c->db,sop->pattern,
5346 vector[j].obj);
5347
5348 if (sop->type == REDIS_SORT_GET) {
5349 if (!val || val->type != REDIS_STRING) {
5350 addReply(c,shared.nullbulk);
5351 } else {
5352 addReplyBulkLen(c,val);
5353 addReply(c,val);
5354 addReply(c,shared.crlf);
5355 }
5356 } else {
dfc5e96c 5357 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
443c6409 5358 }
5359 }
ed9b544e 5360 }
443c6409 5361 } else {
5362 robj *listObject = createListObject();
5363 list *listPtr = (list*) listObject->ptr;
5364
5365 /* STORE option specified, set the sorting result as a List object */
5366 for (j = start; j <= end; j++) {
5367 listNode *ln;
5368 if (!getop) {
5369 listAddNodeTail(listPtr,vector[j].obj);
5370 incrRefCount(vector[j].obj);
5371 }
5372 listRewind(operations);
5373 while((ln = listYield(operations))) {
5374 redisSortOperation *sop = ln->value;
5375 robj *val = lookupKeyByPattern(c->db,sop->pattern,
5376 vector[j].obj);
5377
5378 if (sop->type == REDIS_SORT_GET) {
5379 if (!val || val->type != REDIS_STRING) {
5380 listAddNodeTail(listPtr,createStringObject("",0));
5381 } else {
5382 listAddNodeTail(listPtr,val);
5383 incrRefCount(val);
5384 }
ed9b544e 5385 } else {
dfc5e96c 5386 redisAssert(sop->type == REDIS_SORT_GET); /* always fails */
ed9b544e 5387 }
ed9b544e 5388 }
ed9b544e 5389 }
121796f7 5390 if (dictReplace(c->db->dict,storekey,listObject)) {
5391 incrRefCount(storekey);
5392 }
443c6409 5393 /* Note: we add 1 because the DB is dirty anyway since even if the
5394 * SORT result is empty a new key is set and maybe the old content
5395 * replaced. */
5396 server.dirty += 1+outputlen;
5397 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
ed9b544e 5398 }
5399
5400 /* Cleanup */
5401 decrRefCount(sortval);
5402 listRelease(operations);
5403 for (j = 0; j < vectorlen; j++) {
5404 if (sortby && alpha && vector[j].u.cmpobj)
5405 decrRefCount(vector[j].u.cmpobj);
5406 }
5407 zfree(vector);
5408}
5409
1c85b79f 5410/* Create the string returned by the INFO command. This is decoupled
5411 * by the INFO command itself as we need to report the same information
5412 * on memory corruption problems. */
5413static sds genRedisInfoString(void) {
ed9b544e 5414 sds info;
5415 time_t uptime = time(NULL)-server.stat_starttime;
c3cb078d 5416 int j;
ed9b544e 5417
5418 info = sdscatprintf(sdsempty(),
5419 "redis_version:%s\r\n"
f1017b3f 5420 "arch_bits:%s\r\n"
7a932b74 5421 "multiplexing_api:%s\r\n"
0d7170a4 5422 "process_id:%ld\r\n"
682ac724 5423 "uptime_in_seconds:%ld\r\n"
5424 "uptime_in_days:%ld\r\n"
ed9b544e 5425 "connected_clients:%d\r\n"
5426 "connected_slaves:%d\r\n"
f86a74e9 5427 "blocked_clients:%d\r\n"
5fba9f71 5428 "used_memory:%zu\r\n"
ed9b544e 5429 "changes_since_last_save:%lld\r\n"
be2bb6b0 5430 "bgsave_in_progress:%d\r\n"
682ac724 5431 "last_save_time:%ld\r\n"
b3fad521 5432 "bgrewriteaof_in_progress:%d\r\n"
ed9b544e 5433 "total_connections_received:%lld\r\n"
5434 "total_commands_processed:%lld\r\n"
7d98e08c 5435 "vm_enabled:%d\r\n"
a0f643ea 5436 "role:%s\r\n"
ed9b544e 5437 ,REDIS_VERSION,
f1017b3f 5438 (sizeof(long) == 8) ? "64" : "32",
7a932b74 5439 aeGetApiName(),
0d7170a4 5440 (long) getpid(),
a0f643ea 5441 uptime,
5442 uptime/(3600*24),
ed9b544e 5443 listLength(server.clients)-listLength(server.slaves),
5444 listLength(server.slaves),
f86a74e9 5445 server.blockedclients,
ed9b544e 5446 server.usedmemory,
5447 server.dirty,
9d65a1bb 5448 server.bgsavechildpid != -1,
ed9b544e 5449 server.lastsave,
b3fad521 5450 server.bgrewritechildpid != -1,
ed9b544e 5451 server.stat_numconnections,
5452 server.stat_numcommands,
7d98e08c 5453 server.vm_enabled != 0,
a0f643ea 5454 server.masterhost == NULL ? "master" : "slave"
ed9b544e 5455 );
a0f643ea 5456 if (server.masterhost) {
5457 info = sdscatprintf(info,
5458 "master_host:%s\r\n"
5459 "master_port:%d\r\n"
5460 "master_link_status:%s\r\n"
5461 "master_last_io_seconds_ago:%d\r\n"
5462 ,server.masterhost,
5463 server.masterport,
5464 (server.replstate == REDIS_REPL_CONNECTED) ?
5465 "up" : "down",
f72b934d 5466 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
a0f643ea 5467 );
5468 }
7d98e08c 5469 if (server.vm_enabled) {
5470 info = sdscatprintf(info,
5471 "vm_conf_max_memory:%llu\r\n"
5472 "vm_conf_page_size:%llu\r\n"
5473 "vm_conf_pages:%llu\r\n"
5474 "vm_stats_used_pages:%llu\r\n"
5475 "vm_stats_swapped_objects:%llu\r\n"
5476 "vm_stats_swappin_count:%llu\r\n"
5477 "vm_stats_swappout_count:%llu\r\n"
5478 ,(unsigned long long) server.vm_max_memory,
5479 (unsigned long long) server.vm_page_size,
5480 (unsigned long long) server.vm_pages,
5481 (unsigned long long) server.vm_stats_used_pages,
5482 (unsigned long long) server.vm_stats_swapped_objects,
5483 (unsigned long long) server.vm_stats_swapins,
5484 (unsigned long long) server.vm_stats_swapouts
5485 );
5486 }
c3cb078d 5487 for (j = 0; j < server.dbnum; j++) {
5488 long long keys, vkeys;
5489
5490 keys = dictSize(server.db[j].dict);
5491 vkeys = dictSize(server.db[j].expires);
5492 if (keys || vkeys) {
9d65a1bb 5493 info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n",
c3cb078d 5494 j, keys, vkeys);
5495 }
5496 }
1c85b79f 5497 return info;
5498}
5499
5500static void infoCommand(redisClient *c) {
5501 sds info = genRedisInfoString();
83c6a618 5502 addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n",
5503 (unsigned long)sdslen(info)));
ed9b544e 5504 addReplySds(c,info);
70003d28 5505 addReply(c,shared.crlf);
ed9b544e 5506}
5507
3305306f 5508static void monitorCommand(redisClient *c) {
5509 /* ignore MONITOR if aleady slave or in monitor mode */
5510 if (c->flags & REDIS_SLAVE) return;
5511
5512 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
5513 c->slaveseldb = 0;
6b47e12e 5514 listAddNodeTail(server.monitors,c);
3305306f 5515 addReply(c,shared.ok);
5516}
5517
5518/* ================================= Expire ================================= */
5519static int removeExpire(redisDb *db, robj *key) {
5520 if (dictDelete(db->expires,key) == DICT_OK) {
5521 return 1;
5522 } else {
5523 return 0;
5524 }
5525}
5526
5527static int setExpire(redisDb *db, robj *key, time_t when) {
5528 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
5529 return 0;
5530 } else {
5531 incrRefCount(key);
5532 return 1;
5533 }
5534}
5535
bb32ede5 5536/* Return the expire time of the specified key, or -1 if no expire
5537 * is associated with this key (i.e. the key is non volatile) */
5538static time_t getExpire(redisDb *db, robj *key) {
5539 dictEntry *de;
5540
5541 /* No expire? return ASAP */
5542 if (dictSize(db->expires) == 0 ||
5543 (de = dictFind(db->expires,key)) == NULL) return -1;
5544
5545 return (time_t) dictGetEntryVal(de);
5546}
5547
3305306f 5548static int expireIfNeeded(redisDb *db, robj *key) {
5549 time_t when;
5550 dictEntry *de;
5551
5552 /* No expire? return ASAP */
5553 if (dictSize(db->expires) == 0 ||
5554 (de = dictFind(db->expires,key)) == NULL) return 0;
5555
5556 /* Lookup the expire */
5557 when = (time_t) dictGetEntryVal(de);
5558 if (time(NULL) <= when) return 0;
5559
5560 /* Delete the key */
5561 dictDelete(db->expires,key);
5562 return dictDelete(db->dict,key) == DICT_OK;
5563}
5564
5565static int deleteIfVolatile(redisDb *db, robj *key) {
5566 dictEntry *de;
5567
5568 /* No expire? return ASAP */
5569 if (dictSize(db->expires) == 0 ||
5570 (de = dictFind(db->expires,key)) == NULL) return 0;
5571
5572 /* Delete the key */
0c66a471 5573 server.dirty++;
3305306f 5574 dictDelete(db->expires,key);
5575 return dictDelete(db->dict,key) == DICT_OK;
5576}
5577
802e8373 5578static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
3305306f 5579 dictEntry *de;
3305306f 5580
802e8373 5581 de = dictFind(c->db->dict,key);
3305306f 5582 if (de == NULL) {
5583 addReply(c,shared.czero);
5584 return;
5585 }
43e5ccdf 5586 if (seconds < 0) {
5587 if (deleteKey(c->db,key)) server.dirty++;
5588 addReply(c, shared.cone);
3305306f 5589 return;
5590 } else {
5591 time_t when = time(NULL)+seconds;
802e8373 5592 if (setExpire(c->db,key,when)) {
3305306f 5593 addReply(c,shared.cone);
77423026 5594 server.dirty++;
5595 } else {
3305306f 5596 addReply(c,shared.czero);
77423026 5597 }
3305306f 5598 return;
5599 }
5600}
5601
802e8373 5602static void expireCommand(redisClient *c) {
5603 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
5604}
5605
5606static void expireatCommand(redisClient *c) {
5607 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
5608}
5609
fd88489a 5610static void ttlCommand(redisClient *c) {
5611 time_t expire;
5612 int ttl = -1;
5613
5614 expire = getExpire(c->db,c->argv[1]);
5615 if (expire != -1) {
5616 ttl = (int) (expire-time(NULL));
5617 if (ttl < 0) ttl = -1;
5618 }
5619 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
5620}
5621
6e469882 5622/* ================================ MULTI/EXEC ============================== */
5623
5624/* Client state initialization for MULTI/EXEC */
5625static void initClientMultiState(redisClient *c) {
5626 c->mstate.commands = NULL;
5627 c->mstate.count = 0;
5628}
5629
5630/* Release all the resources associated with MULTI/EXEC state */
5631static void freeClientMultiState(redisClient *c) {
5632 int j;
5633
5634 for (j = 0; j < c->mstate.count; j++) {
5635 int i;
5636 multiCmd *mc = c->mstate.commands+j;
5637
5638 for (i = 0; i < mc->argc; i++)
5639 decrRefCount(mc->argv[i]);
5640 zfree(mc->argv);
5641 }
5642 zfree(c->mstate.commands);
5643}
5644
5645/* Add a new command into the MULTI commands queue */
5646static void queueMultiCommand(redisClient *c, struct redisCommand *cmd) {
5647 multiCmd *mc;
5648 int j;
5649
5650 c->mstate.commands = zrealloc(c->mstate.commands,
5651 sizeof(multiCmd)*(c->mstate.count+1));
5652 mc = c->mstate.commands+c->mstate.count;
5653 mc->cmd = cmd;
5654 mc->argc = c->argc;
5655 mc->argv = zmalloc(sizeof(robj*)*c->argc);
5656 memcpy(mc->argv,c->argv,sizeof(robj*)*c->argc);
5657 for (j = 0; j < c->argc; j++)
5658 incrRefCount(mc->argv[j]);
5659 c->mstate.count++;
5660}
5661
5662static void multiCommand(redisClient *c) {
5663 c->flags |= REDIS_MULTI;
36c548f0 5664 addReply(c,shared.ok);
6e469882 5665}
5666
5667static void execCommand(redisClient *c) {
5668 int j;
5669 robj **orig_argv;
5670 int orig_argc;
5671
5672 if (!(c->flags & REDIS_MULTI)) {
5673 addReplySds(c,sdsnew("-ERR EXEC without MULTI\r\n"));
5674 return;
5675 }
5676
5677 orig_argv = c->argv;
5678 orig_argc = c->argc;
5679 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->mstate.count));
5680 for (j = 0; j < c->mstate.count; j++) {
5681 c->argc = c->mstate.commands[j].argc;
5682 c->argv = c->mstate.commands[j].argv;
5683 call(c,c->mstate.commands[j].cmd);
5684 }
5685 c->argv = orig_argv;
5686 c->argc = orig_argc;
5687 freeClientMultiState(c);
5688 initClientMultiState(c);
5689 c->flags &= (~REDIS_MULTI);
5690}
5691
4409877e 5692/* =========================== Blocking Operations ========================= */
5693
5694/* Currently Redis blocking operations support is limited to list POP ops,
5695 * so the current implementation is not fully generic, but it is also not
5696 * completely specific so it will not require a rewrite to support new
5697 * kind of blocking operations in the future.
5698 *
5699 * Still it's important to note that list blocking operations can be already
5700 * used as a notification mechanism in order to implement other blocking
5701 * operations at application level, so there must be a very strong evidence
5702 * of usefulness and generality before new blocking operations are implemented.
5703 *
5704 * This is how the current blocking POP works, we use BLPOP as example:
5705 * - If the user calls BLPOP and the key exists and contains a non empty list
5706 * then LPOP is called instead. So BLPOP is semantically the same as LPOP
5707 * if there is not to block.
5708 * - If instead BLPOP is called and the key does not exists or the list is
5709 * empty we need to block. In order to do so we remove the notification for
5710 * new data to read in the client socket (so that we'll not serve new
5711 * requests if the blocking request is not served). Also we put the client
95242ab5 5712 * in a dictionary (db->blockingkeys) mapping keys to a list of clients
4409877e 5713 * blocking for this keys.
5714 * - If a PUSH operation against a key with blocked clients waiting is
5715 * performed, we serve the first in the list: basically instead to push
5716 * the new element inside the list we return it to the (first / oldest)
5717 * blocking client, unblock the client, and remove it form the list.
5718 *
5719 * The above comment and the source code should be enough in order to understand
5720 * the implementation and modify / fix it later.
5721 */
5722
5723/* Set a client in blocking mode for the specified key, with the specified
5724 * timeout */
b177fd30 5725static void blockForKeys(redisClient *c, robj **keys, int numkeys, time_t timeout) {
4409877e 5726 dictEntry *de;
5727 list *l;
b177fd30 5728 int j;
4409877e 5729
b177fd30 5730 c->blockingkeys = zmalloc(sizeof(robj*)*numkeys);
5731 c->blockingkeysnum = numkeys;
4409877e 5732 c->blockingto = timeout;
b177fd30 5733 for (j = 0; j < numkeys; j++) {
5734 /* Add the key in the client structure, to map clients -> keys */
5735 c->blockingkeys[j] = keys[j];
5736 incrRefCount(keys[j]);
4409877e 5737
b177fd30 5738 /* And in the other "side", to map keys -> clients */
5739 de = dictFind(c->db->blockingkeys,keys[j]);
5740 if (de == NULL) {
5741 int retval;
5742
5743 /* For every key we take a list of clients blocked for it */
5744 l = listCreate();
5745 retval = dictAdd(c->db->blockingkeys,keys[j],l);
5746 incrRefCount(keys[j]);
5747 assert(retval == DICT_OK);
5748 } else {
5749 l = dictGetEntryVal(de);
5750 }
5751 listAddNodeTail(l,c);
4409877e 5752 }
b177fd30 5753 /* Mark the client as a blocked client */
4409877e 5754 c->flags |= REDIS_BLOCKED;
5755 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
f86a74e9 5756 server.blockedclients++;
4409877e 5757}
5758
5759/* Unblock a client that's waiting in a blocking operation such as BLPOP */
5760static void unblockClient(redisClient *c) {
5761 dictEntry *de;
5762 list *l;
b177fd30 5763 int j;
4409877e 5764
b177fd30 5765 assert(c->blockingkeys != NULL);
5766 /* The client may wait for multiple keys, so unblock it for every key. */
5767 for (j = 0; j < c->blockingkeysnum; j++) {
5768 /* Remove this client from the list of clients waiting for this key. */
5769 de = dictFind(c->db->blockingkeys,c->blockingkeys[j]);
5770 assert(de != NULL);
5771 l = dictGetEntryVal(de);
5772 listDelNode(l,listSearchKey(l,c));
5773 /* If the list is empty we need to remove it to avoid wasting memory */
5774 if (listLength(l) == 0)
5775 dictDelete(c->db->blockingkeys,c->blockingkeys[j]);
5776 decrRefCount(c->blockingkeys[j]);
5777 }
5778 /* Cleanup the client structure */
5779 zfree(c->blockingkeys);
5780 c->blockingkeys = NULL;
4409877e 5781 c->flags &= (~REDIS_BLOCKED);
f86a74e9 5782 server.blockedclients--;
4409877e 5783 /* Ok now we are ready to get read events from socket, note that we
5784 * can't trap errors here as it's possible that unblockClients() is
5785 * called from freeClient() itself, and the only thing we can do
5786 * if we failed to register the READABLE event is to kill the client.
5787 * Still the following function should never fail in the real world as
5788 * we are sure the file descriptor is sane, and we exit on out of mem. */
5789 aeCreateFileEvent(server.el, c->fd, AE_READABLE, readQueryFromClient, c);
5790 /* As a final step we want to process data if there is some command waiting
5791 * in the input buffer. Note that this is safe even if unblockClient()
5792 * gets called from freeClient() because freeClient() will be smart
5793 * enough to call this function *after* c->querybuf was set to NULL. */
5794 if (c->querybuf && sdslen(c->querybuf) > 0) processInputBuffer(c);
5795}
5796
5797/* This should be called from any function PUSHing into lists.
5798 * 'c' is the "pushing client", 'key' is the key it is pushing data against,
5799 * 'ele' is the element pushed.
5800 *
5801 * If the function returns 0 there was no client waiting for a list push
5802 * against this key.
5803 *
5804 * If the function returns 1 there was a client waiting for a list push
5805 * against this key, the element was passed to this client thus it's not
5806 * needed to actually add it to the list and the caller should return asap. */
5807static int handleClientsWaitingListPush(redisClient *c, robj *key, robj *ele) {
5808 struct dictEntry *de;
5809 redisClient *receiver;
5810 list *l;
5811 listNode *ln;
5812
5813 de = dictFind(c->db->blockingkeys,key);
5814 if (de == NULL) return 0;
5815 l = dictGetEntryVal(de);
5816 ln = listFirst(l);
5817 assert(ln != NULL);
5818 receiver = ln->value;
4409877e 5819
b177fd30 5820 addReplySds(receiver,sdsnew("*2\r\n"));
5821 addReplyBulkLen(receiver,key);
5822 addReply(receiver,key);
5823 addReply(receiver,shared.crlf);
4409877e 5824 addReplyBulkLen(receiver,ele);
5825 addReply(receiver,ele);
5826 addReply(receiver,shared.crlf);
5827 unblockClient(receiver);
5828 return 1;
5829}
5830
5831/* Blocking RPOP/LPOP */
5832static void blockingPopGenericCommand(redisClient *c, int where) {
5833 robj *o;
5834 time_t timeout;
b177fd30 5835 int j;
4409877e 5836
b177fd30 5837 for (j = 1; j < c->argc-1; j++) {
5838 o = lookupKeyWrite(c->db,c->argv[j]);
5839 if (o != NULL) {
5840 if (o->type != REDIS_LIST) {
5841 addReply(c,shared.wrongtypeerr);
4409877e 5842 return;
b177fd30 5843 } else {
5844 list *list = o->ptr;
5845 if (listLength(list) != 0) {
5846 /* If the list contains elements fall back to the usual
5847 * non-blocking POP operation */
5848 robj *argv[2], **orig_argv;
5849 int orig_argc;
5850
5851 /* We need to alter the command arguments before to call
5852 * popGenericCommand() as the command takes a single key. */
5853 orig_argv = c->argv;
5854 orig_argc = c->argc;
5855 argv[1] = c->argv[j];
5856 c->argv = argv;
5857 c->argc = 2;
5858
5859 /* Also the return value is different, we need to output
5860 * the multi bulk reply header and the key name. The
5861 * "real" command will add the last element (the value)
5862 * for us. If this souds like an hack to you it's just
5863 * because it is... */
5864 addReplySds(c,sdsnew("*2\r\n"));
5865 addReplyBulkLen(c,argv[1]);
5866 addReply(c,argv[1]);
5867 addReply(c,shared.crlf);
5868 popGenericCommand(c,where);
5869
5870 /* Fix the client structure with the original stuff */
5871 c->argv = orig_argv;
5872 c->argc = orig_argc;
5873 return;
5874 }
4409877e 5875 }
5876 }
5877 }
5878 /* If the list is empty or the key does not exists we must block */
b177fd30 5879 timeout = strtol(c->argv[c->argc-1]->ptr,NULL,10);
4409877e 5880 if (timeout > 0) timeout += time(NULL);
b177fd30 5881 blockForKeys(c,c->argv+1,c->argc-2,timeout);
4409877e 5882}
5883
5884static void blpopCommand(redisClient *c) {
5885 blockingPopGenericCommand(c,REDIS_HEAD);
5886}
5887
5888static void brpopCommand(redisClient *c) {
5889 blockingPopGenericCommand(c,REDIS_TAIL);
5890}
5891
ed9b544e 5892/* =============================== Replication ============================= */
5893
a4d1ba9a 5894static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5895 ssize_t nwritten, ret = size;
5896 time_t start = time(NULL);
5897
5898 timeout++;
5899 while(size) {
5900 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
5901 nwritten = write(fd,ptr,size);
5902 if (nwritten == -1) return -1;
5903 ptr += nwritten;
5904 size -= nwritten;
5905 }
5906 if ((time(NULL)-start) > timeout) {
5907 errno = ETIMEDOUT;
5908 return -1;
5909 }
5910 }
5911 return ret;
5912}
5913
a4d1ba9a 5914static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
ed9b544e 5915 ssize_t nread, totread = 0;
5916 time_t start = time(NULL);
5917
5918 timeout++;
5919 while(size) {
5920 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
5921 nread = read(fd,ptr,size);
5922 if (nread == -1) return -1;
5923 ptr += nread;
5924 size -= nread;
5925 totread += nread;
5926 }
5927 if ((time(NULL)-start) > timeout) {
5928 errno = ETIMEDOUT;
5929 return -1;
5930 }
5931 }
5932 return totread;
5933}
5934
5935static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
5936 ssize_t nread = 0;
5937
5938 size--;
5939 while(size) {
5940 char c;
5941
5942 if (syncRead(fd,&c,1,timeout) == -1) return -1;
5943 if (c == '\n') {
5944 *ptr = '\0';
5945 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
5946 return nread;
5947 } else {
5948 *ptr++ = c;
5949 *ptr = '\0';
5950 nread++;
5951 }
5952 }
5953 return nread;
5954}
5955
5956static void syncCommand(redisClient *c) {
40d224a9 5957 /* ignore SYNC if aleady slave or in monitor mode */
5958 if (c->flags & REDIS_SLAVE) return;
5959
5960 /* SYNC can't be issued when the server has pending data to send to
5961 * the client about already issued commands. We need a fresh reply
5962 * buffer registering the differences between the BGSAVE and the current
5963 * dataset, so that we can copy to other slaves if needed. */
5964 if (listLength(c->reply) != 0) {
5965 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
5966 return;
5967 }
5968
5969 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
5970 /* Here we need to check if there is a background saving operation
5971 * in progress, or if it is required to start one */
9d65a1bb 5972 if (server.bgsavechildpid != -1) {
40d224a9 5973 /* Ok a background save is in progress. Let's check if it is a good
5974 * one for replication, i.e. if there is another slave that is
5975 * registering differences since the server forked to save */
5976 redisClient *slave;
5977 listNode *ln;
5978
6208b3a7 5979 listRewind(server.slaves);
5980 while((ln = listYield(server.slaves))) {
40d224a9 5981 slave = ln->value;
5982 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
40d224a9 5983 }
5984 if (ln) {
5985 /* Perfect, the server is already registering differences for
5986 * another slave. Set the right state, and copy the buffer. */
5987 listRelease(c->reply);
5988 c->reply = listDup(slave->reply);
40d224a9 5989 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5990 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
5991 } else {
5992 /* No way, we need to wait for the next BGSAVE in order to
5993 * register differences */
5994 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5995 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5996 }
5997 } else {
5998 /* Ok we don't have a BGSAVE in progress, let's start one */
5999 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
6000 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
6001 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
6002 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
6003 return;
6004 }
6005 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
6006 }
6208b3a7 6007 c->repldbfd = -1;
40d224a9 6008 c->flags |= REDIS_SLAVE;
6009 c->slaveseldb = 0;
6b47e12e 6010 listAddNodeTail(server.slaves,c);
40d224a9 6011 return;
6012}
6013
6208b3a7 6014static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
6015 redisClient *slave = privdata;
6016 REDIS_NOTUSED(el);
6017 REDIS_NOTUSED(mask);
6018 char buf[REDIS_IOBUF_LEN];
6019 ssize_t nwritten, buflen;
6020
6021 if (slave->repldboff == 0) {
6022 /* Write the bulk write count before to transfer the DB. In theory here
6023 * we don't know how much room there is in the output buffer of the
6024 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
6025 * operations) will never be smaller than the few bytes we need. */
6026 sds bulkcount;
6027
6028 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
6029 slave->repldbsize);
6030 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
6031 {
6032 sdsfree(bulkcount);
6033 freeClient(slave);
6034 return;
6035 }
6036 sdsfree(bulkcount);
6037 }
6038 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
6039 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
6040 if (buflen <= 0) {
6041 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
6042 (buflen == 0) ? "premature EOF" : strerror(errno));
6043 freeClient(slave);
6044 return;
6045 }
6046 if ((nwritten = write(fd,buf,buflen)) == -1) {
f870935d 6047 redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s",
6208b3a7 6048 strerror(errno));
6049 freeClient(slave);
6050 return;
6051 }
6052 slave->repldboff += nwritten;
6053 if (slave->repldboff == slave->repldbsize) {
6054 close(slave->repldbfd);
6055 slave->repldbfd = -1;
6056 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
6057 slave->replstate = REDIS_REPL_ONLINE;
6058 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
266373b2 6059 sendReplyToClient, slave) == AE_ERR) {
6208b3a7 6060 freeClient(slave);
6061 return;
6062 }
6063 addReplySds(slave,sdsempty());
6064 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
6065 }
6066}
ed9b544e 6067
a3b21203 6068/* This function is called at the end of every backgrond saving.
6069 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
6070 * otherwise REDIS_ERR is passed to the function.
6071 *
6072 * The goal of this function is to handle slaves waiting for a successful
6073 * background saving in order to perform non-blocking synchronization. */
6074static void updateSlavesWaitingBgsave(int bgsaveerr) {
6208b3a7 6075 listNode *ln;
6076 int startbgsave = 0;
ed9b544e 6077
6208b3a7 6078 listRewind(server.slaves);
6079 while((ln = listYield(server.slaves))) {
6080 redisClient *slave = ln->value;
ed9b544e 6081
6208b3a7 6082 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
6083 startbgsave = 1;
6084 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
6085 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
dde65f3f 6086 struct redis_stat buf;
6208b3a7 6087
6088 if (bgsaveerr != REDIS_OK) {
6089 freeClient(slave);
6090 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
6091 continue;
6092 }
6093 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
dde65f3f 6094 redis_fstat(slave->repldbfd,&buf) == -1) {
6208b3a7 6095 freeClient(slave);
6096 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
6097 continue;
6098 }
6099 slave->repldboff = 0;
6100 slave->repldbsize = buf.st_size;
6101 slave->replstate = REDIS_REPL_SEND_BULK;
6102 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
266373b2 6103 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
6208b3a7 6104 freeClient(slave);
6105 continue;
6106 }
6107 }
ed9b544e 6108 }
6208b3a7 6109 if (startbgsave) {
6110 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
6111 listRewind(server.slaves);
6112 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
6113 while((ln = listYield(server.slaves))) {
6114 redisClient *slave = ln->value;
ed9b544e 6115
6208b3a7 6116 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
6117 freeClient(slave);
6118 }
6119 }
6120 }
ed9b544e 6121}
6122
6123static int syncWithMaster(void) {
d0ccebcf 6124 char buf[1024], tmpfile[256], authcmd[1024];
ed9b544e 6125 int dumpsize;
6126 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
6127 int dfd;
6128
6129 if (fd == -1) {
6130 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
6131 strerror(errno));
6132 return REDIS_ERR;
6133 }
d0ccebcf 6134
6135 /* AUTH with the master if required. */
6136 if(server.masterauth) {
6137 snprintf(authcmd, 1024, "AUTH %s\r\n", server.masterauth);
6138 if (syncWrite(fd, authcmd, strlen(server.masterauth)+7, 5) == -1) {
6139 close(fd);
6140 redisLog(REDIS_WARNING,"Unable to AUTH to MASTER: %s",
6141 strerror(errno));
6142 return REDIS_ERR;
6143 }
6144 /* Read the AUTH result. */
6145 if (syncReadLine(fd,buf,1024,3600) == -1) {
6146 close(fd);
6147 redisLog(REDIS_WARNING,"I/O error reading auth result from MASTER: %s",
6148 strerror(errno));
6149 return REDIS_ERR;
6150 }
6151 if (buf[0] != '+') {
6152 close(fd);
6153 redisLog(REDIS_WARNING,"Cannot AUTH to MASTER, is the masterauth password correct?");
6154 return REDIS_ERR;
6155 }
6156 }
6157
ed9b544e 6158 /* Issue the SYNC command */
6159 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
6160 close(fd);
6161 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
6162 strerror(errno));
6163 return REDIS_ERR;
6164 }
6165 /* Read the bulk write count */
8c4d91fc 6166 if (syncReadLine(fd,buf,1024,3600) == -1) {
ed9b544e 6167 close(fd);
6168 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
6169 strerror(errno));
6170 return REDIS_ERR;
6171 }
4aa701c1 6172 if (buf[0] != '$') {
6173 close(fd);
6174 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
6175 return REDIS_ERR;
6176 }
c937aa89 6177 dumpsize = atoi(buf+1);
ed9b544e 6178 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
6179 /* Read the bulk write data on a temp file */
6180 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
6181 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
6182 if (dfd == -1) {
6183 close(fd);
6184 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
6185 return REDIS_ERR;
6186 }
6187 while(dumpsize) {
6188 int nread, nwritten;
6189
6190 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
6191 if (nread == -1) {
6192 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
6193 strerror(errno));
6194 close(fd);
6195 close(dfd);
6196 return REDIS_ERR;
6197 }
6198 nwritten = write(dfd,buf,nread);
6199 if (nwritten == -1) {
6200 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
6201 close(fd);
6202 close(dfd);
6203 return REDIS_ERR;
6204 }
6205 dumpsize -= nread;
6206 }
6207 close(dfd);
6208 if (rename(tmpfile,server.dbfilename) == -1) {
6209 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
6210 unlink(tmpfile);
6211 close(fd);
6212 return REDIS_ERR;
6213 }
6214 emptyDb();
f78fd11b 6215 if (rdbLoad(server.dbfilename) != REDIS_OK) {
ed9b544e 6216 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
6217 close(fd);
6218 return REDIS_ERR;
6219 }
6220 server.master = createClient(fd);
6221 server.master->flags |= REDIS_MASTER;
179b3952 6222 server.master->authenticated = 1;
ed9b544e 6223 server.replstate = REDIS_REPL_CONNECTED;
6224 return REDIS_OK;
6225}
6226
321b0e13 6227static void slaveofCommand(redisClient *c) {
6228 if (!strcasecmp(c->argv[1]->ptr,"no") &&
6229 !strcasecmp(c->argv[2]->ptr,"one")) {
6230 if (server.masterhost) {
6231 sdsfree(server.masterhost);
6232 server.masterhost = NULL;
6233 if (server.master) freeClient(server.master);
6234 server.replstate = REDIS_REPL_NONE;
6235 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
6236 }
6237 } else {
6238 sdsfree(server.masterhost);
6239 server.masterhost = sdsdup(c->argv[1]->ptr);
6240 server.masterport = atoi(c->argv[2]->ptr);
6241 if (server.master) freeClient(server.master);
6242 server.replstate = REDIS_REPL_CONNECT;
6243 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
6244 server.masterhost, server.masterport);
6245 }
6246 addReply(c,shared.ok);
6247}
6248
3fd78bcd 6249/* ============================ Maxmemory directive ======================== */
6250
f870935d 6251/* Free one object form the pre-allocated objects free list. This is useful
6252 * under low mem conditions as by default we take 1 million free objects
6253 * allocated. */
6254static void freeOneObjectFromFreelist(void) {
6255 robj *o;
6256
6257 listNode *head = listFirst(server.objfreelist);
6258 o = listNodeValue(head);
6259 listDelNode(server.objfreelist,head);
6260 zfree(o);
6261}
6262
3fd78bcd 6263/* This function gets called when 'maxmemory' is set on the config file to limit
6264 * the max memory used by the server, and we are out of memory.
6265 * This function will try to, in order:
6266 *
6267 * - Free objects from the free list
6268 * - Try to remove keys with an EXPIRE set
6269 *
6270 * It is not possible to free enough memory to reach used-memory < maxmemory
6271 * the server will start refusing commands that will enlarge even more the
6272 * memory usage.
6273 */
6274static void freeMemoryIfNeeded(void) {
6275 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
6276 if (listLength(server.objfreelist)) {
f870935d 6277 freeOneObjectFromFreelist();
3fd78bcd 6278 } else {
6279 int j, k, freed = 0;
6280
6281 for (j = 0; j < server.dbnum; j++) {
6282 int minttl = -1;
6283 robj *minkey = NULL;
6284 struct dictEntry *de;
6285
6286 if (dictSize(server.db[j].expires)) {
6287 freed = 1;
6288 /* From a sample of three keys drop the one nearest to
6289 * the natural expire */
6290 for (k = 0; k < 3; k++) {
6291 time_t t;
6292
6293 de = dictGetRandomKey(server.db[j].expires);
6294 t = (time_t) dictGetEntryVal(de);
6295 if (minttl == -1 || t < minttl) {
6296 minkey = dictGetEntryKey(de);
6297 minttl = t;
6298 }
6299 }
6300 deleteKey(server.db+j,minkey);
6301 }
6302 }
6303 if (!freed) return; /* nothing to free... */
6304 }
6305 }
6306}
6307
f80dff62 6308/* ============================== Append Only file ========================== */
6309
6310static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
6311 sds buf = sdsempty();
6312 int j;
6313 ssize_t nwritten;
6314 time_t now;
6315 robj *tmpargv[3];
6316
6317 /* The DB this command was targetting is not the same as the last command
6318 * we appendend. To issue a SELECT command is needed. */
6319 if (dictid != server.appendseldb) {
6320 char seldb[64];
6321
6322 snprintf(seldb,sizeof(seldb),"%d",dictid);
682ac724 6323 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
83c6a618 6324 (unsigned long)strlen(seldb),seldb);
f80dff62 6325 server.appendseldb = dictid;
6326 }
6327
6328 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
6329 * EXPIREs into EXPIREATs calls */
6330 if (cmd->proc == expireCommand) {
6331 long when;
6332
6333 tmpargv[0] = createStringObject("EXPIREAT",8);
6334 tmpargv[1] = argv[1];
6335 incrRefCount(argv[1]);
6336 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
6337 tmpargv[2] = createObject(REDIS_STRING,
6338 sdscatprintf(sdsempty(),"%ld",when));
6339 argv = tmpargv;
6340 }
6341
6342 /* Append the actual command */
6343 buf = sdscatprintf(buf,"*%d\r\n",argc);
6344 for (j = 0; j < argc; j++) {
6345 robj *o = argv[j];
6346
9d65a1bb 6347 o = getDecodedObject(o);
83c6a618 6348 buf = sdscatprintf(buf,"$%lu\r\n",(unsigned long)sdslen(o->ptr));
f80dff62 6349 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
6350 buf = sdscatlen(buf,"\r\n",2);
9d65a1bb 6351 decrRefCount(o);
f80dff62 6352 }
6353
6354 /* Free the objects from the modified argv for EXPIREAT */
6355 if (cmd->proc == expireCommand) {
6356 for (j = 0; j < 3; j++)
6357 decrRefCount(argv[j]);
6358 }
6359
6360 /* We want to perform a single write. This should be guaranteed atomic
6361 * at least if the filesystem we are writing is a real physical one.
6362 * While this will save us against the server being killed I don't think
6363 * there is much to do about the whole server stopping for power problems
6364 * or alike */
6365 nwritten = write(server.appendfd,buf,sdslen(buf));
6366 if (nwritten != (signed)sdslen(buf)) {
6367 /* Ooops, we are in troubles. The best thing to do for now is
6368 * to simply exit instead to give the illusion that everything is
6369 * working as expected. */
6370 if (nwritten == -1) {
6371 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
6372 } else {
6373 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
6374 }
6375 exit(1);
6376 }
85a83172 6377 /* If a background append only file rewriting is in progress we want to
6378 * accumulate the differences between the child DB and the current one
6379 * in a buffer, so that when the child process will do its work we
6380 * can append the differences to the new append only file. */
6381 if (server.bgrewritechildpid != -1)
6382 server.bgrewritebuf = sdscatlen(server.bgrewritebuf,buf,sdslen(buf));
6383
6384 sdsfree(buf);
f80dff62 6385 now = time(NULL);
6386 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
6387 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
6388 now-server.lastfsync > 1))
6389 {
6390 fsync(server.appendfd); /* Let's try to get this data on the disk */
6391 server.lastfsync = now;
6392 }
6393}
6394
6395/* In Redis commands are always executed in the context of a client, so in
6396 * order to load the append only file we need to create a fake client. */
6397static struct redisClient *createFakeClient(void) {
6398 struct redisClient *c = zmalloc(sizeof(*c));
6399
6400 selectDb(c,0);
6401 c->fd = -1;
6402 c->querybuf = sdsempty();
6403 c->argc = 0;
6404 c->argv = NULL;
6405 c->flags = 0;
9387d17d 6406 /* We set the fake client as a slave waiting for the synchronization
6407 * so that Redis will not try to send replies to this client. */
6408 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
f80dff62 6409 c->reply = listCreate();
6410 listSetFreeMethod(c->reply,decrRefCount);
6411 listSetDupMethod(c->reply,dupClientReplyValue);
6412 return c;
6413}
6414
6415static void freeFakeClient(struct redisClient *c) {
6416 sdsfree(c->querybuf);
6417 listRelease(c->reply);
6418 zfree(c);
6419}
6420
6421/* Replay the append log file. On error REDIS_OK is returned. On non fatal
6422 * error (the append only file is zero-length) REDIS_ERR is returned. On
6423 * fatal error an error message is logged and the program exists. */
6424int loadAppendOnlyFile(char *filename) {
6425 struct redisClient *fakeClient;
6426 FILE *fp = fopen(filename,"r");
6427 struct redis_stat sb;
b492cf00 6428 unsigned long long loadedkeys = 0;
f80dff62 6429
6430 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
6431 return REDIS_ERR;
6432
6433 if (fp == NULL) {
6434 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
6435 exit(1);
6436 }
6437
6438 fakeClient = createFakeClient();
6439 while(1) {
6440 int argc, j;
6441 unsigned long len;
6442 robj **argv;
6443 char buf[128];
6444 sds argsds;
6445 struct redisCommand *cmd;
6446
6447 if (fgets(buf,sizeof(buf),fp) == NULL) {
6448 if (feof(fp))
6449 break;
6450 else
6451 goto readerr;
6452 }
6453 if (buf[0] != '*') goto fmterr;
6454 argc = atoi(buf+1);
6455 argv = zmalloc(sizeof(robj*)*argc);
6456 for (j = 0; j < argc; j++) {
6457 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
6458 if (buf[0] != '$') goto fmterr;
6459 len = strtol(buf+1,NULL,10);
6460 argsds = sdsnewlen(NULL,len);
0f151ef1 6461 if (len && fread(argsds,len,1,fp) == 0) goto fmterr;
f80dff62 6462 argv[j] = createObject(REDIS_STRING,argsds);
6463 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
6464 }
6465
6466 /* Command lookup */
6467 cmd = lookupCommand(argv[0]->ptr);
6468 if (!cmd) {
6469 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
6470 exit(1);
6471 }
6472 /* Try object sharing and encoding */
6473 if (server.shareobjects) {
6474 int j;
6475 for(j = 1; j < argc; j++)
6476 argv[j] = tryObjectSharing(argv[j]);
6477 }
6478 if (cmd->flags & REDIS_CMD_BULK)
6479 tryObjectEncoding(argv[argc-1]);
6480 /* Run the command in the context of a fake client */
6481 fakeClient->argc = argc;
6482 fakeClient->argv = argv;
6483 cmd->proc(fakeClient);
6484 /* Discard the reply objects list from the fake client */
6485 while(listLength(fakeClient->reply))
6486 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
6487 /* Clean up, ready for the next command */
6488 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
6489 zfree(argv);
b492cf00 6490 /* Handle swapping while loading big datasets when VM is on */
6491 loadedkeys++;
6492 if (server.vm_enabled && (loadedkeys % 5000) == 0) {
6493 while (zmalloc_used_memory() > server.vm_max_memory) {
6494 if (vmSwapOneObject() == REDIS_ERR) break;
6495 }
6496 }
f80dff62 6497 }
6498 fclose(fp);
6499 freeFakeClient(fakeClient);
6500 return REDIS_OK;
6501
6502readerr:
6503 if (feof(fp)) {
6504 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
6505 } else {
6506 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
6507 }
6508 exit(1);
6509fmterr:
6510 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
6511 exit(1);
6512}
6513
9d65a1bb 6514/* Write an object into a file in the bulk format $<count>\r\n<payload>\r\n */
6515static int fwriteBulk(FILE *fp, robj *obj) {
6516 char buf[128];
6517 obj = getDecodedObject(obj);
6518 snprintf(buf,sizeof(buf),"$%ld\r\n",(long)sdslen(obj->ptr));
6519 if (fwrite(buf,strlen(buf),1,fp) == 0) goto err;
e96e4fbf 6520 if (sdslen(obj->ptr) && fwrite(obj->ptr,sdslen(obj->ptr),1,fp) == 0)
6521 goto err;
9d65a1bb 6522 if (fwrite("\r\n",2,1,fp) == 0) goto err;
6523 decrRefCount(obj);
6524 return 1;
6525err:
6526 decrRefCount(obj);
6527 return 0;
6528}
6529
6530/* Write a double value in bulk format $<count>\r\n<payload>\r\n */
6531static int fwriteBulkDouble(FILE *fp, double d) {
6532 char buf[128], dbuf[128];
6533
6534 snprintf(dbuf,sizeof(dbuf),"%.17g\r\n",d);
6535 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(dbuf)-2);
6536 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
6537 if (fwrite(dbuf,strlen(dbuf),1,fp) == 0) return 0;
6538 return 1;
6539}
6540
6541/* Write a long value in bulk format $<count>\r\n<payload>\r\n */
6542static int fwriteBulkLong(FILE *fp, long l) {
6543 char buf[128], lbuf[128];
6544
6545 snprintf(lbuf,sizeof(lbuf),"%ld\r\n",l);
6546 snprintf(buf,sizeof(buf),"$%lu\r\n",(unsigned long)strlen(lbuf)-2);
6547 if (fwrite(buf,strlen(buf),1,fp) == 0) return 0;
6548 if (fwrite(lbuf,strlen(lbuf),1,fp) == 0) return 0;
6549 return 1;
6550}
6551
6552/* Write a sequence of commands able to fully rebuild the dataset into
6553 * "filename". Used both by REWRITEAOF and BGREWRITEAOF. */
6554static int rewriteAppendOnlyFile(char *filename) {
6555 dictIterator *di = NULL;
6556 dictEntry *de;
6557 FILE *fp;
6558 char tmpfile[256];
6559 int j;
6560 time_t now = time(NULL);
6561
6562 /* Note that we have to use a different temp name here compared to the
6563 * one used by rewriteAppendOnlyFileBackground() function. */
6564 snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());
6565 fp = fopen(tmpfile,"w");
6566 if (!fp) {
6567 redisLog(REDIS_WARNING, "Failed rewriting the append only file: %s", strerror(errno));
6568 return REDIS_ERR;
6569 }
6570 for (j = 0; j < server.dbnum; j++) {
6571 char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";
6572 redisDb *db = server.db+j;
6573 dict *d = db->dict;
6574 if (dictSize(d) == 0) continue;
6575 di = dictGetIterator(d);
6576 if (!di) {
6577 fclose(fp);
6578 return REDIS_ERR;
6579 }
6580
6581 /* SELECT the new DB */
6582 if (fwrite(selectcmd,sizeof(selectcmd)-1,1,fp) == 0) goto werr;
85a83172 6583 if (fwriteBulkLong(fp,j) == 0) goto werr;
9d65a1bb 6584
6585 /* Iterate this DB writing every entry */
6586 while((de = dictNext(di)) != NULL) {
e7546c63 6587 robj *key, *o;
6588 time_t expiretime;
6589 int swapped;
6590
6591 key = dictGetEntryKey(de);
38823f08 6592 if (!server.vm_enabled || key->storage == REDIS_VM_MEMORY) {
e7546c63 6593 o = dictGetEntryVal(de);
6594 swapped = 0;
6595 } else {
6596 o = vmPreviewObject(key);
6597 key = dupStringObject(key);
6598 swapped = 1;
6599 }
6600 expiretime = getExpire(db,key);
9d65a1bb 6601
6602 /* Save the key and associated value */
9d65a1bb 6603 if (o->type == REDIS_STRING) {
6604 /* Emit a SET command */
6605 char cmd[]="*3\r\n$3\r\nSET\r\n";
6606 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6607 /* Key and value */
6608 if (fwriteBulk(fp,key) == 0) goto werr;
6609 if (fwriteBulk(fp,o) == 0) goto werr;
6610 } else if (o->type == REDIS_LIST) {
6611 /* Emit the RPUSHes needed to rebuild the list */
6612 list *list = o->ptr;
6613 listNode *ln;
6614
6615 listRewind(list);
6616 while((ln = listYield(list))) {
6617 char cmd[]="*3\r\n$5\r\nRPUSH\r\n";
6618 robj *eleobj = listNodeValue(ln);
6619
6620 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6621 if (fwriteBulk(fp,key) == 0) goto werr;
6622 if (fwriteBulk(fp,eleobj) == 0) goto werr;
6623 }
6624 } else if (o->type == REDIS_SET) {
6625 /* Emit the SADDs needed to rebuild the set */
6626 dict *set = o->ptr;
6627 dictIterator *di = dictGetIterator(set);
6628 dictEntry *de;
6629
6630 while((de = dictNext(di)) != NULL) {
6631 char cmd[]="*3\r\n$4\r\nSADD\r\n";
6632 robj *eleobj = dictGetEntryKey(de);
6633
6634 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6635 if (fwriteBulk(fp,key) == 0) goto werr;
6636 if (fwriteBulk(fp,eleobj) == 0) goto werr;
6637 }
6638 dictReleaseIterator(di);
6639 } else if (o->type == REDIS_ZSET) {
6640 /* Emit the ZADDs needed to rebuild the sorted set */
6641 zset *zs = o->ptr;
6642 dictIterator *di = dictGetIterator(zs->dict);
6643 dictEntry *de;
6644
6645 while((de = dictNext(di)) != NULL) {
6646 char cmd[]="*4\r\n$4\r\nZADD\r\n";
6647 robj *eleobj = dictGetEntryKey(de);
6648 double *score = dictGetEntryVal(de);
6649
6650 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6651 if (fwriteBulk(fp,key) == 0) goto werr;
6652 if (fwriteBulkDouble(fp,*score) == 0) goto werr;
6653 if (fwriteBulk(fp,eleobj) == 0) goto werr;
6654 }
6655 dictReleaseIterator(di);
6656 } else {
dfc5e96c 6657 redisAssert(0 != 0);
9d65a1bb 6658 }
6659 /* Save the expire time */
6660 if (expiretime != -1) {
e96e4fbf 6661 char cmd[]="*3\r\n$8\r\nEXPIREAT\r\n";
9d65a1bb 6662 /* If this key is already expired skip it */
6663 if (expiretime < now) continue;
6664 if (fwrite(cmd,sizeof(cmd)-1,1,fp) == 0) goto werr;
6665 if (fwriteBulk(fp,key) == 0) goto werr;
6666 if (fwriteBulkLong(fp,expiretime) == 0) goto werr;
6667 }
e7546c63 6668 /* We created a few temp objects if the key->value pair
6669 * was about a swapped out object. Free both. */
6670 if (swapped) {
6671 decrRefCount(key);
6672 decrRefCount(o);
6673 }
9d65a1bb 6674 }
6675 dictReleaseIterator(di);
6676 }
6677
6678 /* Make sure data will not remain on the OS's output buffers */
6679 fflush(fp);
6680 fsync(fileno(fp));
6681 fclose(fp);
6682
6683 /* Use RENAME to make sure the DB file is changed atomically only
6684 * if the generate DB file is ok. */
6685 if (rename(tmpfile,filename) == -1) {
6686 redisLog(REDIS_WARNING,"Error moving temp append only file on the final destination: %s", strerror(errno));
6687 unlink(tmpfile);
6688 return REDIS_ERR;
6689 }
6690 redisLog(REDIS_NOTICE,"SYNC append only file rewrite performed");
6691 return REDIS_OK;
6692
6693werr:
6694 fclose(fp);
6695 unlink(tmpfile);
e96e4fbf 6696 redisLog(REDIS_WARNING,"Write error writing append only file on disk: %s", strerror(errno));
9d65a1bb 6697 if (di) dictReleaseIterator(di);
6698 return REDIS_ERR;
6699}
6700
6701/* This is how rewriting of the append only file in background works:
6702 *
6703 * 1) The user calls BGREWRITEAOF
6704 * 2) Redis calls this function, that forks():
6705 * 2a) the child rewrite the append only file in a temp file.
6706 * 2b) the parent accumulates differences in server.bgrewritebuf.
6707 * 3) When the child finished '2a' exists.
6708 * 4) The parent will trap the exit code, if it's OK, will append the
6709 * data accumulated into server.bgrewritebuf into the temp file, and
6710 * finally will rename(2) the temp file in the actual file name.
6711 * The the new file is reopened as the new append only file. Profit!
6712 */
6713static int rewriteAppendOnlyFileBackground(void) {
6714 pid_t childpid;
6715
6716 if (server.bgrewritechildpid != -1) return REDIS_ERR;
6717 if ((childpid = fork()) == 0) {
6718 /* Child */
6719 char tmpfile[256];
6720 close(server.fd);
6721
6722 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());
6723 if (rewriteAppendOnlyFile(tmpfile) == REDIS_OK) {
6724 exit(0);
6725 } else {
6726 exit(1);
6727 }
6728 } else {
6729 /* Parent */
6730 if (childpid == -1) {
6731 redisLog(REDIS_WARNING,
6732 "Can't rewrite append only file in background: fork: %s",
6733 strerror(errno));
6734 return REDIS_ERR;
6735 }
6736 redisLog(REDIS_NOTICE,
6737 "Background append only file rewriting started by pid %d",childpid);
6738 server.bgrewritechildpid = childpid;
85a83172 6739 /* We set appendseldb to -1 in order to force the next call to the
6740 * feedAppendOnlyFile() to issue a SELECT command, so the differences
6741 * accumulated by the parent into server.bgrewritebuf will start
6742 * with a SELECT statement and it will be safe to merge. */
6743 server.appendseldb = -1;
9d65a1bb 6744 return REDIS_OK;
6745 }
6746 return REDIS_OK; /* unreached */
6747}
6748
6749static void bgrewriteaofCommand(redisClient *c) {
6750 if (server.bgrewritechildpid != -1) {
6751 addReplySds(c,sdsnew("-ERR background append only file rewriting already in progress\r\n"));
6752 return;
6753 }
6754 if (rewriteAppendOnlyFileBackground() == REDIS_OK) {
49b99ab4 6755 char *status = "+Background append only file rewriting started\r\n";
6756 addReplySds(c,sdsnew(status));
9d65a1bb 6757 } else {
6758 addReply(c,shared.err);
6759 }
6760}
6761
6762static void aofRemoveTempFile(pid_t childpid) {
6763 char tmpfile[256];
6764
6765 snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid);
6766 unlink(tmpfile);
6767}
6768
75680a3c 6769/* =============================== Virtual Memory =========================== */
6770static void vmInit(void) {
6771 off_t totsize;
6772
6773 server.vm_fp = fopen("/tmp/redisvm","w+b");
6774 if (server.vm_fp == NULL) {
6775 redisLog(REDIS_WARNING,"Impossible to open the swap file. Exiting.");
6776 exit(1);
6777 }
6778 server.vm_fd = fileno(server.vm_fp);
6779 server.vm_next_page = 0;
6780 server.vm_near_pages = 0;
7d98e08c 6781 server.vm_stats_used_pages = 0;
6782 server.vm_stats_swapped_objects = 0;
6783 server.vm_stats_swapouts = 0;
6784 server.vm_stats_swapins = 0;
75680a3c 6785 totsize = server.vm_pages*server.vm_page_size;
6786 redisLog(REDIS_NOTICE,"Allocating %lld bytes of swap file",totsize);
6787 if (ftruncate(server.vm_fd,totsize) == -1) {
6788 redisLog(REDIS_WARNING,"Can't ftruncate swap file: %s. Exiting.",
6789 strerror(errno));
6790 exit(1);
6791 } else {
6792 redisLog(REDIS_NOTICE,"Swap file allocated with success");
6793 }
7d30035d 6794 server.vm_bitmap = zmalloc((server.vm_pages+7)/8);
f870935d 6795 redisLog(REDIS_VERBOSE,"Allocated %lld bytes page table for %lld pages",
4ef8de8a 6796 (long long) (server.vm_pages+7)/8, server.vm_pages);
7d30035d 6797 memset(server.vm_bitmap,0,(server.vm_pages+7)/8);
75680a3c 6798 /* Try to remove the swap file, so the OS will really delete it from the
6799 * file system when Redis exists. */
6800 unlink("/tmp/redisvm");
6801}
6802
06224fec 6803/* Mark the page as used */
6804static void vmMarkPageUsed(off_t page) {
6805 off_t byte = page/8;
6806 int bit = page&7;
6807 server.vm_bitmap[byte] |= 1<<bit;
f870935d 6808 redisLog(REDIS_DEBUG,"Mark used: %lld (byte:%lld bit:%d)\n",
6809 (long long)page, (long long)byte, bit);
06224fec 6810}
6811
6812/* Mark N contiguous pages as used, with 'page' being the first. */
6813static void vmMarkPagesUsed(off_t page, off_t count) {
6814 off_t j;
6815
6816 for (j = 0; j < count; j++)
7d30035d 6817 vmMarkPageUsed(page+j);
7d98e08c 6818 server.vm_stats_used_pages += count;
06224fec 6819}
6820
6821/* Mark the page as free */
6822static void vmMarkPageFree(off_t page) {
6823 off_t byte = page/8;
6824 int bit = page&7;
6825 server.vm_bitmap[byte] &= ~(1<<bit);
6826}
6827
6828/* Mark N contiguous pages as free, with 'page' being the first. */
6829static void vmMarkPagesFree(off_t page, off_t count) {
6830 off_t j;
6831
6832 for (j = 0; j < count; j++)
7d30035d 6833 vmMarkPageFree(page+j);
7d98e08c 6834 server.vm_stats_used_pages -= count;
06224fec 6835}
6836
6837/* Test if the page is free */
6838static int vmFreePage(off_t page) {
6839 off_t byte = page/8;
6840 int bit = page&7;
7d30035d 6841 return (server.vm_bitmap[byte] & (1<<bit)) == 0;
06224fec 6842}
6843
6844/* Find N contiguous free pages storing the first page of the cluster in *first.
3a66edc7 6845 * Returns REDIS_OK if it was able to find N contiguous pages, otherwise
6846 * REDIS_ERR is returned.
06224fec 6847 *
6848 * This function uses a simple algorithm: we try to allocate
6849 * REDIS_VM_MAX_NEAR_PAGES sequentially, when we reach this limit we start
6850 * again from the start of the swap file searching for free spaces.
6851 *
6852 * If it looks pretty clear that there are no free pages near our offset
6853 * we try to find less populated places doing a forward jump of
6854 * REDIS_VM_MAX_RANDOM_JUMP, then we start scanning again a few pages
6855 * without hurry, and then we jump again and so forth...
6856 *
6857 * This function can be improved using a free list to avoid to guess
6858 * too much, since we could collect data about freed pages.
6859 *
6860 * note: I implemented this function just after watching an episode of
6861 * Battlestar Galactica, where the hybrid was continuing to say "JUMP!"
6862 */
6863static int vmFindContiguousPages(off_t *first, int n) {
6864 off_t base, offset = 0, since_jump = 0, numfree = 0;
6865
6866 if (server.vm_near_pages == REDIS_VM_MAX_NEAR_PAGES) {
6867 server.vm_near_pages = 0;
6868 server.vm_next_page = 0;
6869 }
6870 server.vm_near_pages++; /* Yet another try for pages near to the old ones */
6871 base = server.vm_next_page;
6872
6873 while(offset < server.vm_pages) {
6874 off_t this = base+offset;
6875
f870935d 6876 redisLog(REDIS_DEBUG, "THIS: %lld (%c)\n", (long long) this, vmFreePage(this) ? 'F' : 'X');
06224fec 6877 /* If we overflow, restart from page zero */
6878 if (this >= server.vm_pages) {
6879 this -= server.vm_pages;
6880 if (this == 0) {
6881 /* Just overflowed, what we found on tail is no longer
6882 * interesting, as it's no longer contiguous. */
6883 numfree = 0;
6884 }
6885 }
6886 if (vmFreePage(this)) {
6887 /* This is a free page */
6888 numfree++;
6889 /* Already got N free pages? Return to the caller, with success */
6890 if (numfree == n) {
7d30035d 6891 *first = this-(n-1);
6892 server.vm_next_page = this+1;
3a66edc7 6893 return REDIS_OK;
06224fec 6894 }
6895 } else {
6896 /* The current one is not a free page */
6897 numfree = 0;
6898 }
6899
6900 /* Fast-forward if the current page is not free and we already
6901 * searched enough near this place. */
6902 since_jump++;
6903 if (!numfree && since_jump >= REDIS_VM_MAX_RANDOM_JUMP/4) {
6904 offset += random() % REDIS_VM_MAX_RANDOM_JUMP;
6905 since_jump = 0;
6906 /* Note that even if we rewind after the jump, we are don't need
6907 * to make sure numfree is set to zero as we only jump *if* it
6908 * is set to zero. */
6909 } else {
6910 /* Otherwise just check the next page */
6911 offset++;
6912 }
6913 }
3a66edc7 6914 return REDIS_ERR;
6915}
6916
6917/* Swap the 'val' object relative to 'key' into disk. Store all the information
6918 * needed to later retrieve the object into the key object.
6919 * If we can't find enough contiguous empty pages to swap the object on disk
6920 * REDIS_ERR is returned. */
6921static int vmSwapObject(robj *key, robj *val) {
6922 off_t pages = rdbSavedObjectPages(val);
6923 off_t page;
6924
6925 assert(key->storage == REDIS_VM_MEMORY);
4ef8de8a 6926 assert(key->refcount == 1);
3a66edc7 6927 if (vmFindContiguousPages(&page,pages) == REDIS_ERR) return REDIS_ERR;
6928 if (fseeko(server.vm_fp,page*server.vm_page_size,SEEK_SET) == -1) {
6929 redisLog(REDIS_WARNING,
6930 "Critical VM problem in vmSwapObject(): can't seek: %s",
6931 strerror(errno));
6932 return REDIS_ERR;
6933 }
6934 rdbSaveObject(server.vm_fp,val);
6935 key->vm.page = page;
6936 key->vm.usedpages = pages;
6937 key->storage = REDIS_VM_SWAPPED;
d894161b 6938 key->vtype = val->type;
3a66edc7 6939 decrRefCount(val); /* Deallocate the object from memory. */
6940 vmMarkPagesUsed(page,pages);
7d30035d 6941 redisLog(REDIS_DEBUG,"VM: object %s swapped out at %lld (%lld pages)",
6942 (unsigned char*) key->ptr,
6943 (unsigned long long) page, (unsigned long long) pages);
7d98e08c 6944 server.vm_stats_swapped_objects++;
6945 server.vm_stats_swapouts++;
3a66edc7 6946 return REDIS_OK;
6947}
6948
6949/* Load the value object relative to the 'key' object from swap to memory.
7e69548d 6950 * The newly allocated object is returned.
6951 *
6952 * If preview is true the unserialized object is returned to the caller but
6953 * no changes are made to the key object, nor the pages are marked as freed */
6954static robj *vmGenericLoadObject(robj *key, int preview) {
3a66edc7 6955 robj *val;
6956
38823f08 6957 redisAssert(key->storage == REDIS_VM_SWAPPED);
3a66edc7 6958 if (fseeko(server.vm_fp,key->vm.page*server.vm_page_size,SEEK_SET) == -1) {
6959 redisLog(REDIS_WARNING,
6960 "Unrecoverable VM problem in vmLoadObject(): can't seek: %s",
6961 strerror(errno));
6962 exit(1);
6963 }
d894161b 6964 val = rdbLoadObject(key->vtype,server.vm_fp);
3a66edc7 6965 if (val == NULL) {
6966 redisLog(REDIS_WARNING, "Unrecoverable VM problem in vmLoadObject(): can't load object from swap file: %s", strerror(errno));
6967 exit(1);
6968 }
7e69548d 6969 if (!preview) {
6970 key->storage = REDIS_VM_MEMORY;
6971 key->vm.atime = server.unixtime;
6972 vmMarkPagesFree(key->vm.page,key->vm.usedpages);
6973 redisLog(REDIS_DEBUG, "VM: object %s loaded from disk",
6974 (unsigned char*) key->ptr);
7d98e08c 6975 server.vm_stats_swapped_objects--;
38aba9a1 6976 } else {
6977 redisLog(REDIS_DEBUG, "VM: object %s previewed from disk",
6978 (unsigned char*) key->ptr);
7e69548d 6979 }
7d98e08c 6980 server.vm_stats_swapins++;
3a66edc7 6981 return val;
06224fec 6982}
6983
7e69548d 6984/* Plain object loading, from swap to memory */
6985static robj *vmLoadObject(robj *key) {
6986 return vmGenericLoadObject(key,0);
6987}
6988
6989/* Just load the value on disk, without to modify the key.
6990 * This is useful when we want to perform some operation on the value
6991 * without to really bring it from swap to memory, like while saving the
6992 * dataset or rewriting the append only log. */
6993static robj *vmPreviewObject(robj *key) {
6994 return vmGenericLoadObject(key,1);
6995}
6996
4ef8de8a 6997/* How a good candidate is this object for swapping?
6998 * The better candidate it is, the greater the returned value.
6999 *
7000 * Currently we try to perform a fast estimation of the object size in
7001 * memory, and combine it with aging informations.
7002 *
7003 * Basically swappability = idle-time * log(estimated size)
7004 *
7005 * Bigger objects are preferred over smaller objects, but not
7006 * proportionally, this is why we use the logarithm. This algorithm is
7007 * just a first try and will probably be tuned later. */
7008static double computeObjectSwappability(robj *o) {
7009 time_t age = server.unixtime - o->vm.atime;
7010 long asize = 0;
7011 list *l;
7012 dict *d;
7013 struct dictEntry *de;
7014 int z;
7015
7016 if (age <= 0) return 0;
7017 switch(o->type) {
7018 case REDIS_STRING:
7019 if (o->encoding != REDIS_ENCODING_RAW) {
7020 asize = sizeof(*o);
7021 } else {
7022 asize = sdslen(o->ptr)+sizeof(*o)+sizeof(long)*2;
7023 }
7024 break;
7025 case REDIS_LIST:
7026 l = o->ptr;
7027 listNode *ln = listFirst(l);
7028
7029 asize = sizeof(list);
7030 if (ln) {
7031 robj *ele = ln->value;
7032 long elesize;
7033
7034 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
7035 (sizeof(*o)+sdslen(ele->ptr)) :
7036 sizeof(*o);
7037 asize += (sizeof(listNode)+elesize)*listLength(l);
7038 }
7039 break;
7040 case REDIS_SET:
7041 case REDIS_ZSET:
7042 z = (o->type == REDIS_ZSET);
7043 d = z ? ((zset*)o->ptr)->dict : o->ptr;
7044
7045 asize = sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
7046 if (z) asize += sizeof(zset)-sizeof(dict);
7047 if (dictSize(d)) {
7048 long elesize;
7049 robj *ele;
7050
7051 de = dictGetRandomKey(d);
7052 ele = dictGetEntryKey(de);
7053 elesize = (ele->encoding == REDIS_ENCODING_RAW) ?
7054 (sizeof(*o)+sdslen(ele->ptr)) :
7055 sizeof(*o);
7056 asize += (sizeof(struct dictEntry)+elesize)*dictSize(d);
7057 if (z) asize += sizeof(zskiplistNode)*dictSize(d);
7058 }
7059 break;
7060 }
7061 return (double)asize*log(1+asize);
7062}
7063
7064/* Try to swap an object that's a good candidate for swapping.
7065 * Returns REDIS_OK if the object was swapped, REDIS_ERR if it's not possible
7066 * to swap any object at all. */
7067static int vmSwapOneObject(void) {
7068 int j, i;
7069 struct dictEntry *best = NULL;
7070 double best_swappability = 0;
7071 robj *key, *val;
7072
7073 for (j = 0; j < server.dbnum; j++) {
7074 redisDb *db = server.db+j;
e3cadb8a 7075 int maxtries = 1000;
4ef8de8a 7076
7077 if (dictSize(db->dict) == 0) continue;
7078 for (i = 0; i < 5; i++) {
7079 dictEntry *de;
7080 double swappability;
7081
e3cadb8a 7082 if (maxtries) maxtries--;
4ef8de8a 7083 de = dictGetRandomKey(db->dict);
7084 key = dictGetEntryKey(de);
7085 val = dictGetEntryVal(de);
e3cadb8a 7086 if (key->storage != REDIS_VM_MEMORY) {
7087 if (maxtries) i--; /* don't count this try */
7088 continue;
7089 }
4ef8de8a 7090 swappability = computeObjectSwappability(val);
7091 if (!best || swappability > best_swappability) {
7092 best = de;
7093 best_swappability = swappability;
7094 }
7095 }
7096 }
e3cadb8a 7097 if (best == NULL) {
7098 redisLog(REDIS_DEBUG,"No swappable key found!");
7099 return REDIS_ERR;
7100 }
4ef8de8a 7101 key = dictGetEntryKey(best);
7102 val = dictGetEntryVal(best);
7103
e3cadb8a 7104 redisLog(REDIS_DEBUG,"Key with best swappability: %s, %f",
4ef8de8a 7105 key->ptr, best_swappability);
7106
7107 /* Unshare the key if needed */
7108 if (key->refcount > 1) {
7109 robj *newkey = dupStringObject(key);
7110 decrRefCount(key);
7111 key = dictGetEntryKey(best) = newkey;
7112 }
7113 /* Swap it */
7114 if (vmSwapObject(key,val) == REDIS_OK) {
7115 dictGetEntryVal(best) = NULL;
7116 return REDIS_OK;
7117 } else {
7118 return REDIS_ERR;
7119 }
7120}
7121
7e69548d 7122/* Return true if it's safe to swap out objects in a given moment.
7123 * Basically we don't want to swap objects out while there is a BGSAVE
7124 * or a BGAEOREWRITE running in backgroud. */
7125static int vmCanSwapOut(void) {
7126 return (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1);
7127}
7128
1b03836c 7129/* Delete a key if swapped. Returns 1 if the key was found, was swapped
7130 * and was deleted. Otherwise 0 is returned. */
7131static int deleteIfSwapped(redisDb *db, robj *key) {
7132 dictEntry *de;
7133 robj *foundkey;
7134
7135 if ((de = dictFind(db->dict,key)) == NULL) return 0;
7136 foundkey = dictGetEntryKey(de);
7137 if (foundkey->storage == REDIS_VM_MEMORY) return 0;
7138 deleteKey(db,key);
7139 return 1;
7140}
7141
7f957c92 7142/* ================================= Debugging ============================== */
7143
7144static void debugCommand(redisClient *c) {
7145 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
7146 *((char*)-1) = 'x';
210e29f7 7147 } else if (!strcasecmp(c->argv[1]->ptr,"reload")) {
7148 if (rdbSave(server.dbfilename) != REDIS_OK) {
7149 addReply(c,shared.err);
7150 return;
7151 }
7152 emptyDb();
7153 if (rdbLoad(server.dbfilename) != REDIS_OK) {
7154 addReply(c,shared.err);
7155 return;
7156 }
7157 redisLog(REDIS_WARNING,"DB reloaded by DEBUG RELOAD");
7158 addReply(c,shared.ok);
71c2b467 7159 } else if (!strcasecmp(c->argv[1]->ptr,"loadaof")) {
7160 emptyDb();
7161 if (loadAppendOnlyFile(server.appendfilename) != REDIS_OK) {
7162 addReply(c,shared.err);
7163 return;
7164 }
7165 redisLog(REDIS_WARNING,"Append Only File loaded by DEBUG LOADAOF");
7166 addReply(c,shared.ok);
333298da 7167 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
7168 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
7169 robj *key, *val;
7170
7171 if (!de) {
7172 addReply(c,shared.nokeyerr);
7173 return;
7174 }
7175 key = dictGetEntryKey(de);
7176 val = dictGetEntryVal(de);
7177 addReplySds(c,sdscatprintf(sdsempty(),
06233c45 7178 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d serializedlength:%lld\r\n",
682ac724 7179 (void*)key, key->refcount, (void*)val, val->refcount,
06233c45 7180 val->encoding, rdbSavedObjectLen(val)));
7d30035d 7181 } else if (!strcasecmp(c->argv[1]->ptr,"swapout") && c->argc == 3) {
7182 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
7183 robj *key, *val;
7184
7185 if (!server.vm_enabled) {
7186 addReplySds(c,sdsnew("-ERR Virtual Memory is disabled\r\n"));
7187 return;
7188 }
7189 if (!de) {
7190 addReply(c,shared.nokeyerr);
7191 return;
7192 }
7193 key = dictGetEntryKey(de);
7194 val = dictGetEntryVal(de);
4ef8de8a 7195 /* If the key is shared we want to create a copy */
7196 if (key->refcount > 1) {
7197 robj *newkey = dupStringObject(key);
7198 decrRefCount(key);
7199 key = dictGetEntryKey(de) = newkey;
7200 }
7201 /* Swap it */
7d30035d 7202 if (key->storage != REDIS_VM_MEMORY) {
7203 addReplySds(c,sdsnew("-ERR This key is not in memory\r\n"));
7204 } else if (vmSwapObject(key,val) == REDIS_OK) {
7205 dictGetEntryVal(de) = NULL;
7206 addReply(c,shared.ok);
7207 } else {
7208 addReply(c,shared.err);
7209 }
7f957c92 7210 } else {
333298da 7211 addReplySds(c,sdsnew(
7d30035d 7212 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>|SWAPOUT <key>|RELOAD]\r\n"));
7f957c92 7213 }
7214}
56906eef 7215
dfc5e96c 7216static void _redisAssert(char *estr) {
7217 redisLog(REDIS_WARNING,"=== ASSERTION FAILED ===");
7218 redisLog(REDIS_WARNING,"==> %s\n",estr);
7219#ifdef HAVE_BACKTRACE
7220 redisLog(REDIS_WARNING,"(forcing SIGSEGV in order to print the stack trace)");
7221 *((char*)-1) = 'x';
7222#endif
7223}
7224
bcfc686d 7225/* =================================== Main! ================================ */
56906eef 7226
bcfc686d 7227#ifdef __linux__
7228int linuxOvercommitMemoryValue(void) {
7229 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
7230 char buf[64];
56906eef 7231
bcfc686d 7232 if (!fp) return -1;
7233 if (fgets(buf,64,fp) == NULL) {
7234 fclose(fp);
7235 return -1;
7236 }
7237 fclose(fp);
56906eef 7238
bcfc686d 7239 return atoi(buf);
7240}
7241
7242void linuxOvercommitMemoryWarning(void) {
7243 if (linuxOvercommitMemoryValue() == 0) {
7244 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
7245 }
7246}
7247#endif /* __linux__ */
7248
7249static void daemonize(void) {
7250 int fd;
7251 FILE *fp;
7252
7253 if (fork() != 0) exit(0); /* parent exits */
71c54b21 7254 printf("New pid: %d\n", getpid());
bcfc686d 7255 setsid(); /* create a new session */
7256
7257 /* Every output goes to /dev/null. If Redis is daemonized but
7258 * the 'logfile' is set to 'stdout' in the configuration file
7259 * it will not log at all. */
7260 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
7261 dup2(fd, STDIN_FILENO);
7262 dup2(fd, STDOUT_FILENO);
7263 dup2(fd, STDERR_FILENO);
7264 if (fd > STDERR_FILENO) close(fd);
7265 }
7266 /* Try to write the pid file */
7267 fp = fopen(server.pidfile,"w");
7268 if (fp) {
7269 fprintf(fp,"%d\n",getpid());
7270 fclose(fp);
56906eef 7271 }
56906eef 7272}
7273
bcfc686d 7274int main(int argc, char **argv) {
7275 initServerConfig();
7276 if (argc == 2) {
7277 resetServerSaveParams();
7278 loadServerConfig(argv[1]);
7279 } else if (argc > 2) {
7280 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
7281 exit(1);
7282 } else {
7283 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
7284 }
bcfc686d 7285 if (server.daemonize) daemonize();
71c54b21 7286 initServer();
bcfc686d 7287 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
7288#ifdef __linux__
7289 linuxOvercommitMemoryWarning();
7290#endif
7291 if (server.appendonly) {
7292 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
7293 redisLog(REDIS_NOTICE,"DB loaded from append only file");
7294 } else {
7295 if (rdbLoad(server.dbfilename) == REDIS_OK)
7296 redisLog(REDIS_NOTICE,"DB loaded from disk");
7297 }
7298 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
266373b2 7299 acceptHandler, NULL) == AE_ERR) oom("creating file event");
bcfc686d 7300 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
7301 aeMain(server.el);
7302 aeDeleteEventLoop(server.el);
7303 return 0;
7304}
7305
7306/* ============================= Backtrace support ========================= */
7307
7308#ifdef HAVE_BACKTRACE
7309static char *findFuncName(void *pointer, unsigned long *offset);
7310
56906eef 7311static void *getMcontextEip(ucontext_t *uc) {
7312#if defined(__FreeBSD__)
7313 return (void*) uc->uc_mcontext.mc_eip;
7314#elif defined(__dietlibc__)
7315 return (void*) uc->uc_mcontext.eip;
06db1f50 7316#elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
da0a1620 7317 #if __x86_64__
7318 return (void*) uc->uc_mcontext->__ss.__rip;
7319 #else
56906eef 7320 return (void*) uc->uc_mcontext->__ss.__eip;
da0a1620 7321 #endif
06db1f50 7322#elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
cb7e07cc 7323 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
06db1f50 7324 return (void*) uc->uc_mcontext->__ss.__rip;
cbc59b38 7325 #else
7326 return (void*) uc->uc_mcontext->__ss.__eip;
7327 #endif
c04c9ac9 7328#elif defined(__i386__) || defined(__X86_64__) || defined(__x86_64__)
7329 return (void*) uc->uc_mcontext.gregs[REG_EIP]; /* Linux 32/64 bit */
b91cf5ef 7330#elif defined(__ia64__) /* Linux IA64 */
7331 return (void*) uc->uc_mcontext.sc_ip;
7332#else
7333 return NULL;
56906eef 7334#endif
7335}
7336
7337static void segvHandler(int sig, siginfo_t *info, void *secret) {
7338 void *trace[100];
7339 char **messages = NULL;
7340 int i, trace_size = 0;
7341 unsigned long offset=0;
56906eef 7342 ucontext_t *uc = (ucontext_t*) secret;
1c85b79f 7343 sds infostring;
56906eef 7344 REDIS_NOTUSED(info);
7345
7346 redisLog(REDIS_WARNING,
7347 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
1c85b79f 7348 infostring = genRedisInfoString();
7349 redisLog(REDIS_WARNING, "%s",infostring);
7350 /* It's not safe to sdsfree() the returned string under memory
7351 * corruption conditions. Let it leak as we are going to abort */
56906eef 7352
7353 trace_size = backtrace(trace, 100);
de96dbfe 7354 /* overwrite sigaction with caller's address */
b91cf5ef 7355 if (getMcontextEip(uc) != NULL) {
7356 trace[1] = getMcontextEip(uc);
7357 }
56906eef 7358 messages = backtrace_symbols(trace, trace_size);
fe3bbfbe 7359
d76412d1 7360 for (i=1; i<trace_size; ++i) {
56906eef 7361 char *fn = findFuncName(trace[i], &offset), *p;
7362
7363 p = strchr(messages[i],'+');
7364 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
7365 redisLog(REDIS_WARNING,"%s", messages[i]);
7366 } else {
7367 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
7368 }
7369 }
b177fd30 7370 /* free(messages); Don't call free() with possibly corrupted memory. */
56906eef 7371 exit(0);
fe3bbfbe 7372}
56906eef 7373
7374static void setupSigSegvAction(void) {
7375 struct sigaction act;
7376
7377 sigemptyset (&act.sa_mask);
7378 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
7379 * is used. Otherwise, sa_handler is used */
7380 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
7381 act.sa_sigaction = segvHandler;
7382 sigaction (SIGSEGV, &act, NULL);
7383 sigaction (SIGBUS, &act, NULL);
12fea928 7384 sigaction (SIGFPE, &act, NULL);
7385 sigaction (SIGILL, &act, NULL);
7386 sigaction (SIGBUS, &act, NULL);
e65fdc78 7387 return;
56906eef 7388}
e65fdc78 7389
bcfc686d 7390#include "staticsymbols.h"
7391/* This function try to convert a pointer into a function name. It's used in
7392 * oreder to provide a backtrace under segmentation fault that's able to
7393 * display functions declared as static (otherwise the backtrace is useless). */
7394static char *findFuncName(void *pointer, unsigned long *offset){
7395 int i, ret = -1;
7396 unsigned long off, minoff = 0;
ed9b544e 7397
bcfc686d 7398 /* Try to match against the Symbol with the smallest offset */
7399 for (i=0; symsTable[i].pointer; i++) {
7400 unsigned long lp = (unsigned long) pointer;
0bc03378 7401
bcfc686d 7402 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
7403 off=lp-symsTable[i].pointer;
7404 if (ret < 0 || off < minoff) {
7405 minoff=off;
7406 ret=i;
7407 }
7408 }
0bc03378 7409 }
bcfc686d 7410 if (ret == -1) return NULL;
7411 *offset = minoff;
7412 return symsTable[ret].name;
0bc03378 7413}
bcfc686d 7414#else /* HAVE_BACKTRACE */
7415static void setupSigSegvAction(void) {
0bc03378 7416}
bcfc686d 7417#endif /* HAVE_BACKTRACE */
0bc03378 7418
ed9b544e 7419
ed9b544e 7420
bcfc686d 7421/* The End */
7422
7423
ed9b544e 7424