]> git.saurik.com Git - redis.git/blame_incremental - redis.c
SLAVEOF command implemented for replication remote control
[redis.git] / redis.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#define REDIS_VERSION "0.100"
31
32#include "fmacros.h"
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <time.h>
38#include <unistd.h>
39#include <signal.h>
40#include <sys/wait.h>
41#include <errno.h>
42#include <assert.h>
43#include <ctype.h>
44#include <stdarg.h>
45#include <inttypes.h>
46#include <arpa/inet.h>
47#include <sys/stat.h>
48#include <fcntl.h>
49#include <sys/time.h>
50#include <sys/resource.h>
51#include <limits.h>
52
53#include "ae.h" /* Event driven programming library */
54#include "sds.h" /* Dynamic safe strings */
55#include "anet.h" /* Networking the easy way */
56#include "dict.h" /* Hash tables */
57#include "adlist.h" /* Linked lists */
58#include "zmalloc.h" /* total memory usage aware version of malloc/free */
59#include "lzf.h" /* LZF compression library */
60#include "pqsort.h" /* Partial qsort for SORT+LIMIT */
61
62/* Error codes */
63#define REDIS_OK 0
64#define REDIS_ERR -1
65
66/* Static server configuration */
67#define REDIS_SERVERPORT 6379 /* TCP port */
68#define REDIS_MAXIDLETIME (60*5) /* default client timeout */
69#define REDIS_IOBUF_LEN 1024
70#define REDIS_LOADBUF_LEN 1024
71#define REDIS_STATIC_ARGS 4
72#define REDIS_DEFAULT_DBNUM 16
73#define REDIS_CONFIGLINE_MAX 1024
74#define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
75#define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
76#define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
77
78/* Hash table parameters */
79#define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
80#define REDIS_HT_MINSLOTS 16384 /* Never resize the HT under this */
81
82/* Command flags */
83#define REDIS_CMD_BULK 1
84#define REDIS_CMD_INLINE 2
85
86/* Object types */
87#define REDIS_STRING 0
88#define REDIS_LIST 1
89#define REDIS_SET 2
90#define REDIS_HASH 3
91
92/* Object types only used for dumping to disk */
93#define REDIS_EXPIRETIME 253
94#define REDIS_SELECTDB 254
95#define REDIS_EOF 255
96
97/* Defines related to the dump file format. To store 32 bits lengths for short
98 * keys requires a lot of space, so we check the most significant 2 bits of
99 * the first byte to interpreter the length:
100 *
101 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
102 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
103 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
104 * 11|000000 this means: specially encoded object will follow. The six bits
105 * number specify the kind of object that follows.
106 * See the REDIS_RDB_ENC_* defines.
107 *
108 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
109 * values, will fit inside. */
110#define REDIS_RDB_6BITLEN 0
111#define REDIS_RDB_14BITLEN 1
112#define REDIS_RDB_32BITLEN 2
113#define REDIS_RDB_ENCVAL 3
114#define REDIS_RDB_LENERR UINT_MAX
115
116/* When a length of a string object stored on disk has the first two bits
117 * set, the remaining two bits specify a special encoding for the object
118 * accordingly to the following defines: */
119#define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
120#define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
121#define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
122#define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
123
124/* Client flags */
125#define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
126#define REDIS_SLAVE 2 /* This client is a slave server */
127#define REDIS_MASTER 4 /* This client is a master server */
128#define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
129
130/* Slave replication state - slave side */
131#define REDIS_REPL_NONE 0 /* No active replication */
132#define REDIS_REPL_CONNECT 1 /* Must connect to master */
133#define REDIS_REPL_CONNECTED 2 /* Connected to master */
134
135/* Slave replication state - from the point of view of master
136 * Note that in SEND_BULK and ONLINE state the slave receives new updates
137 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
138 * to start the next background saving in order to send updates to it. */
139#define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
140#define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
141#define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
142#define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
143
144/* List related stuff */
145#define REDIS_HEAD 0
146#define REDIS_TAIL 1
147
148/* Sort operations */
149#define REDIS_SORT_GET 0
150#define REDIS_SORT_DEL 1
151#define REDIS_SORT_INCR 2
152#define REDIS_SORT_DECR 3
153#define REDIS_SORT_ASC 4
154#define REDIS_SORT_DESC 5
155#define REDIS_SORTKEY_MAX 1024
156
157/* Log levels */
158#define REDIS_DEBUG 0
159#define REDIS_NOTICE 1
160#define REDIS_WARNING 2
161
162/* Anti-warning macro... */
163#define REDIS_NOTUSED(V) ((void) V)
164
165/*================================= Data types ============================== */
166
167/* A redis object, that is a type able to hold a string / list / set */
168typedef struct redisObject {
169 void *ptr;
170 int type;
171 int refcount;
172} robj;
173
174typedef struct redisDb {
175 dict *dict;
176 dict *expires;
177 int id;
178} redisDb;
179
180/* With multiplexing we need to take per-clinet state.
181 * Clients are taken in a liked list. */
182typedef struct redisClient {
183 int fd;
184 redisDb *db;
185 int dictid;
186 sds querybuf;
187 robj **argv;
188 int argc;
189 int bulklen; /* bulk read len. -1 if not in bulk read mode */
190 list *reply;
191 int sentlen;
192 time_t lastinteraction; /* time of the last interaction, used for timeout */
193 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
194 int slaveseldb; /* slave selected db, if this client is a slave */
195 int authenticated; /* when requirepass is non-NULL */
196 int replstate; /* replication state if this is a slave */
197 int repldbfd; /* replication DB file descriptor */
198 long repldboff; /* replication DB file offset */
199 off_t repldbsize; /* replication DB file size */
200} redisClient;
201
202struct saveparam {
203 time_t seconds;
204 int changes;
205};
206
207/* Global server state structure */
208struct redisServer {
209 int port;
210 int fd;
211 redisDb *db;
212 dict *sharingpool;
213 unsigned int sharingpoolsize;
214 long long dirty; /* changes to DB from the last save */
215 list *clients;
216 list *slaves, *monitors;
217 char neterr[ANET_ERR_LEN];
218 aeEventLoop *el;
219 int cronloops; /* number of times the cron function run */
220 list *objfreelist; /* A list of freed objects to avoid malloc() */
221 time_t lastsave; /* Unix time of last save succeeede */
222 size_t usedmemory; /* Used memory in megabytes */
223 /* Fields used only for stats */
224 time_t stat_starttime; /* server start time */
225 long long stat_numcommands; /* number of processed commands */
226 long long stat_numconnections; /* number of connections received */
227 /* Configuration */
228 int verbosity;
229 int glueoutputbuf;
230 int maxidletime;
231 int dbnum;
232 int daemonize;
233 char *pidfile;
234 int bgsaveinprogress;
235 struct saveparam *saveparams;
236 int saveparamslen;
237 char *logfile;
238 char *bindaddr;
239 char *dbfilename;
240 char *requirepass;
241 int shareobjects;
242 /* Replication related */
243 int isslave;
244 char *masterhost;
245 int masterport;
246 redisClient *master; /* client that is master for this slave */
247 int replstate;
248 /* Sort parameters - qsort_r() is only available under BSD so we
249 * have to take this state global, in order to pass it to sortCompare() */
250 int sort_desc;
251 int sort_alpha;
252 int sort_bypattern;
253};
254
255typedef void redisCommandProc(redisClient *c);
256struct redisCommand {
257 char *name;
258 redisCommandProc *proc;
259 int arity;
260 int flags;
261};
262
263typedef struct _redisSortObject {
264 robj *obj;
265 union {
266 double score;
267 robj *cmpobj;
268 } u;
269} redisSortObject;
270
271typedef struct _redisSortOperation {
272 int type;
273 robj *pattern;
274} redisSortOperation;
275
276struct sharedObjectsStruct {
277 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
278 *colon, *nullbulk, *nullmultibulk,
279 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
280 *outofrangeerr, *plus,
281 *select0, *select1, *select2, *select3, *select4,
282 *select5, *select6, *select7, *select8, *select9;
283} shared;
284
285/*================================ Prototypes =============================== */
286
287static void freeStringObject(robj *o);
288static void freeListObject(robj *o);
289static void freeSetObject(robj *o);
290static void decrRefCount(void *o);
291static robj *createObject(int type, void *ptr);
292static void freeClient(redisClient *c);
293static int rdbLoad(char *filename);
294static void addReply(redisClient *c, robj *obj);
295static void addReplySds(redisClient *c, sds s);
296static void incrRefCount(robj *o);
297static int rdbSaveBackground(char *filename);
298static robj *createStringObject(char *ptr, size_t len);
299static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
300static int syncWithMaster(void);
301static robj *tryObjectSharing(robj *o);
302static int removeExpire(redisDb *db, robj *key);
303static int expireIfNeeded(redisDb *db, robj *key);
304static int deleteIfVolatile(redisDb *db, robj *key);
305static int deleteKey(redisDb *db, robj *key);
306static time_t getExpire(redisDb *db, robj *key);
307static int setExpire(redisDb *db, robj *key, time_t when);
308static void updateSalvesWaitingBgsave(int bgsaveerr);
309
310static void authCommand(redisClient *c);
311static void pingCommand(redisClient *c);
312static void echoCommand(redisClient *c);
313static void setCommand(redisClient *c);
314static void setnxCommand(redisClient *c);
315static void getCommand(redisClient *c);
316static void delCommand(redisClient *c);
317static void existsCommand(redisClient *c);
318static void incrCommand(redisClient *c);
319static void decrCommand(redisClient *c);
320static void incrbyCommand(redisClient *c);
321static void decrbyCommand(redisClient *c);
322static void selectCommand(redisClient *c);
323static void randomkeyCommand(redisClient *c);
324static void keysCommand(redisClient *c);
325static void dbsizeCommand(redisClient *c);
326static void lastsaveCommand(redisClient *c);
327static void saveCommand(redisClient *c);
328static void bgsaveCommand(redisClient *c);
329static void shutdownCommand(redisClient *c);
330static void moveCommand(redisClient *c);
331static void renameCommand(redisClient *c);
332static void renamenxCommand(redisClient *c);
333static void lpushCommand(redisClient *c);
334static void rpushCommand(redisClient *c);
335static void lpopCommand(redisClient *c);
336static void rpopCommand(redisClient *c);
337static void llenCommand(redisClient *c);
338static void lindexCommand(redisClient *c);
339static void lrangeCommand(redisClient *c);
340static void ltrimCommand(redisClient *c);
341static void typeCommand(redisClient *c);
342static void lsetCommand(redisClient *c);
343static void saddCommand(redisClient *c);
344static void sremCommand(redisClient *c);
345static void smoveCommand(redisClient *c);
346static void sismemberCommand(redisClient *c);
347static void scardCommand(redisClient *c);
348static void sinterCommand(redisClient *c);
349static void sinterstoreCommand(redisClient *c);
350static void sunionCommand(redisClient *c);
351static void sunionstoreCommand(redisClient *c);
352static void sdiffCommand(redisClient *c);
353static void sdiffstoreCommand(redisClient *c);
354static void syncCommand(redisClient *c);
355static void flushdbCommand(redisClient *c);
356static void flushallCommand(redisClient *c);
357static void sortCommand(redisClient *c);
358static void lremCommand(redisClient *c);
359static void infoCommand(redisClient *c);
360static void mgetCommand(redisClient *c);
361static void monitorCommand(redisClient *c);
362static void expireCommand(redisClient *c);
363static void getSetCommand(redisClient *c);
364static void ttlCommand(redisClient *c);
365static void slaveofCommand(redisClient *c);
366
367/*================================= Globals ================================= */
368
369/* Global vars */
370static struct redisServer server; /* server global state */
371static struct redisCommand cmdTable[] = {
372 {"get",getCommand,2,REDIS_CMD_INLINE},
373 {"set",setCommand,3,REDIS_CMD_BULK},
374 {"setnx",setnxCommand,3,REDIS_CMD_BULK},
375 {"del",delCommand,-2,REDIS_CMD_INLINE},
376 {"exists",existsCommand,2,REDIS_CMD_INLINE},
377 {"incr",incrCommand,2,REDIS_CMD_INLINE},
378 {"decr",decrCommand,2,REDIS_CMD_INLINE},
379 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
380 {"rpush",rpushCommand,3,REDIS_CMD_BULK},
381 {"lpush",lpushCommand,3,REDIS_CMD_BULK},
382 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
383 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
384 {"llen",llenCommand,2,REDIS_CMD_INLINE},
385 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
386 {"lset",lsetCommand,4,REDIS_CMD_BULK},
387 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
388 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
389 {"lrem",lremCommand,4,REDIS_CMD_BULK},
390 {"sadd",saddCommand,3,REDIS_CMD_BULK},
391 {"srem",sremCommand,3,REDIS_CMD_BULK},
392 {"smove",smoveCommand,4,REDIS_CMD_BULK},
393 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
394 {"scard",scardCommand,2,REDIS_CMD_INLINE},
395 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE},
396 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE},
397 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE},
398 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE},
399 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE},
400 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE},
401 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
402 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE},
403 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE},
404 {"getset",getSetCommand,3,REDIS_CMD_BULK},
405 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
406 {"select",selectCommand,2,REDIS_CMD_INLINE},
407 {"move",moveCommand,3,REDIS_CMD_INLINE},
408 {"rename",renameCommand,3,REDIS_CMD_INLINE},
409 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
410 {"expire",expireCommand,3,REDIS_CMD_INLINE},
411 {"keys",keysCommand,2,REDIS_CMD_INLINE},
412 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
413 {"auth",authCommand,2,REDIS_CMD_INLINE},
414 {"ping",pingCommand,1,REDIS_CMD_INLINE},
415 {"echo",echoCommand,2,REDIS_CMD_BULK},
416 {"save",saveCommand,1,REDIS_CMD_INLINE},
417 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
418 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
419 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
420 {"type",typeCommand,2,REDIS_CMD_INLINE},
421 {"sync",syncCommand,1,REDIS_CMD_INLINE},
422 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
423 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
424 {"sort",sortCommand,-2,REDIS_CMD_INLINE},
425 {"info",infoCommand,1,REDIS_CMD_INLINE},
426 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
427 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
428 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
429 {NULL,NULL,0,0}
430};
431
432/*============================ Utility functions ============================ */
433
434/* Glob-style pattern matching. */
435int stringmatchlen(const char *pattern, int patternLen,
436 const char *string, int stringLen, int nocase)
437{
438 while(patternLen) {
439 switch(pattern[0]) {
440 case '*':
441 while (pattern[1] == '*') {
442 pattern++;
443 patternLen--;
444 }
445 if (patternLen == 1)
446 return 1; /* match */
447 while(stringLen) {
448 if (stringmatchlen(pattern+1, patternLen-1,
449 string, stringLen, nocase))
450 return 1; /* match */
451 string++;
452 stringLen--;
453 }
454 return 0; /* no match */
455 break;
456 case '?':
457 if (stringLen == 0)
458 return 0; /* no match */
459 string++;
460 stringLen--;
461 break;
462 case '[':
463 {
464 int not, match;
465
466 pattern++;
467 patternLen--;
468 not = pattern[0] == '^';
469 if (not) {
470 pattern++;
471 patternLen--;
472 }
473 match = 0;
474 while(1) {
475 if (pattern[0] == '\\') {
476 pattern++;
477 patternLen--;
478 if (pattern[0] == string[0])
479 match = 1;
480 } else if (pattern[0] == ']') {
481 break;
482 } else if (patternLen == 0) {
483 pattern--;
484 patternLen++;
485 break;
486 } else if (pattern[1] == '-' && patternLen >= 3) {
487 int start = pattern[0];
488 int end = pattern[2];
489 int c = string[0];
490 if (start > end) {
491 int t = start;
492 start = end;
493 end = t;
494 }
495 if (nocase) {
496 start = tolower(start);
497 end = tolower(end);
498 c = tolower(c);
499 }
500 pattern += 2;
501 patternLen -= 2;
502 if (c >= start && c <= end)
503 match = 1;
504 } else {
505 if (!nocase) {
506 if (pattern[0] == string[0])
507 match = 1;
508 } else {
509 if (tolower((int)pattern[0]) == tolower((int)string[0]))
510 match = 1;
511 }
512 }
513 pattern++;
514 patternLen--;
515 }
516 if (not)
517 match = !match;
518 if (!match)
519 return 0; /* no match */
520 string++;
521 stringLen--;
522 break;
523 }
524 case '\\':
525 if (patternLen >= 2) {
526 pattern++;
527 patternLen--;
528 }
529 /* fall through */
530 default:
531 if (!nocase) {
532 if (pattern[0] != string[0])
533 return 0; /* no match */
534 } else {
535 if (tolower((int)pattern[0]) != tolower((int)string[0]))
536 return 0; /* no match */
537 }
538 string++;
539 stringLen--;
540 break;
541 }
542 pattern++;
543 patternLen--;
544 if (stringLen == 0) {
545 while(*pattern == '*') {
546 pattern++;
547 patternLen--;
548 }
549 break;
550 }
551 }
552 if (patternLen == 0 && stringLen == 0)
553 return 1;
554 return 0;
555}
556
557void redisLog(int level, const char *fmt, ...)
558{
559 va_list ap;
560 FILE *fp;
561
562 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
563 if (!fp) return;
564
565 va_start(ap, fmt);
566 if (level >= server.verbosity) {
567 char *c = ".-*";
568 char buf[64];
569 time_t now;
570
571 now = time(NULL);
572 strftime(buf,64,"%d %b %H:%M:%S",gmtime(&now));
573 fprintf(fp,"%s %c ",buf,c[level]);
574 vfprintf(fp, fmt, ap);
575 fprintf(fp,"\n");
576 fflush(fp);
577 }
578 va_end(ap);
579
580 if (server.logfile) fclose(fp);
581}
582
583/*====================== Hash table type implementation ==================== */
584
585/* This is an hash table type that uses the SDS dynamic strings libary as
586 * keys and radis objects as values (objects can hold SDS strings,
587 * lists, sets). */
588
589static int sdsDictKeyCompare(void *privdata, const void *key1,
590 const void *key2)
591{
592 int l1,l2;
593 DICT_NOTUSED(privdata);
594
595 l1 = sdslen((sds)key1);
596 l2 = sdslen((sds)key2);
597 if (l1 != l2) return 0;
598 return memcmp(key1, key2, l1) == 0;
599}
600
601static void dictRedisObjectDestructor(void *privdata, void *val)
602{
603 DICT_NOTUSED(privdata);
604
605 decrRefCount(val);
606}
607
608static int dictSdsKeyCompare(void *privdata, const void *key1,
609 const void *key2)
610{
611 const robj *o1 = key1, *o2 = key2;
612 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
613}
614
615static unsigned int dictSdsHash(const void *key) {
616 const robj *o = key;
617 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
618}
619
620static dictType setDictType = {
621 dictSdsHash, /* hash function */
622 NULL, /* key dup */
623 NULL, /* val dup */
624 dictSdsKeyCompare, /* key compare */
625 dictRedisObjectDestructor, /* key destructor */
626 NULL /* val destructor */
627};
628
629static dictType hashDictType = {
630 dictSdsHash, /* hash function */
631 NULL, /* key dup */
632 NULL, /* val dup */
633 dictSdsKeyCompare, /* key compare */
634 dictRedisObjectDestructor, /* key destructor */
635 dictRedisObjectDestructor /* val destructor */
636};
637
638/* ========================= Random utility functions ======================= */
639
640/* Redis generally does not try to recover from out of memory conditions
641 * when allocating objects or strings, it is not clear if it will be possible
642 * to report this condition to the client since the networking layer itself
643 * is based on heap allocation for send buffers, so we simply abort.
644 * At least the code will be simpler to read... */
645static void oom(const char *msg) {
646 fprintf(stderr, "%s: Out of memory\n",msg);
647 fflush(stderr);
648 sleep(1);
649 abort();
650}
651
652/* ====================== Redis server networking stuff ===================== */
653void closeTimedoutClients(void) {
654 redisClient *c;
655 listNode *ln;
656 time_t now = time(NULL);
657
658 listRewind(server.clients);
659 while ((ln = listYield(server.clients)) != NULL) {
660 c = listNodeValue(ln);
661 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
662 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
663 (now - c->lastinteraction > server.maxidletime)) {
664 redisLog(REDIS_DEBUG,"Closing idle client");
665 freeClient(c);
666 }
667 }
668}
669
670/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
671 * we resize the hash table to save memory */
672void tryResizeHashTables(void) {
673 int j;
674
675 for (j = 0; j < server.dbnum; j++) {
676 long long size, used;
677
678 size = dictSlots(server.db[j].dict);
679 used = dictSize(server.db[j].dict);
680 if (size && used && size > REDIS_HT_MINSLOTS &&
681 (used*100/size < REDIS_HT_MINFILL)) {
682 redisLog(REDIS_NOTICE,"The hash table %d is too sparse, resize it...",j);
683 dictResize(server.db[j].dict);
684 redisLog(REDIS_NOTICE,"Hash table %d resized.",j);
685 }
686 }
687}
688
689int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
690 int j, loops = server.cronloops++;
691 REDIS_NOTUSED(eventLoop);
692 REDIS_NOTUSED(id);
693 REDIS_NOTUSED(clientData);
694
695 /* Update the global state with the amount of used memory */
696 server.usedmemory = zmalloc_used_memory();
697
698 /* Show some info about non-empty databases */
699 for (j = 0; j < server.dbnum; j++) {
700 long long size, used, vkeys;
701
702 size = dictSlots(server.db[j].dict);
703 used = dictSize(server.db[j].dict);
704 vkeys = dictSize(server.db[j].expires);
705 if (!(loops % 5) && used > 0) {
706 redisLog(REDIS_DEBUG,"DB %d: %d keys (%d volatile) in %d slots HT.",j,used,vkeys,size);
707 /* dictPrintStats(server.dict); */
708 }
709 }
710
711 /* We don't want to resize the hash tables while a bacground saving
712 * is in progress: the saving child is created using fork() that is
713 * implemented with a copy-on-write semantic in most modern systems, so
714 * if we resize the HT while there is the saving child at work actually
715 * a lot of memory movements in the parent will cause a lot of pages
716 * copied. */
717 if (!server.bgsaveinprogress) tryResizeHashTables();
718
719 /* Show information about connected clients */
720 if (!(loops % 5)) {
721 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use",
722 listLength(server.clients)-listLength(server.slaves),
723 listLength(server.slaves),
724 server.usedmemory,
725 dictSize(server.sharingpool));
726 }
727
728 /* Close connections of timedout clients */
729 if (server.maxidletime && !(loops % 10))
730 closeTimedoutClients();
731
732 /* Check if a background saving in progress terminated */
733 if (server.bgsaveinprogress) {
734 int statloc;
735 /* XXX: TODO handle the case of the saving child killed */
736 if (wait4(-1,&statloc,WNOHANG,NULL)) {
737 int exitcode = WEXITSTATUS(statloc);
738 if (exitcode == 0) {
739 redisLog(REDIS_NOTICE,
740 "Background saving terminated with success");
741 server.dirty = 0;
742 server.lastsave = time(NULL);
743 } else {
744 redisLog(REDIS_WARNING,
745 "Background saving error");
746 }
747 server.bgsaveinprogress = 0;
748 updateSalvesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
749 }
750 } else {
751 /* If there is not a background saving in progress check if
752 * we have to save now */
753 time_t now = time(NULL);
754 for (j = 0; j < server.saveparamslen; j++) {
755 struct saveparam *sp = server.saveparams+j;
756
757 if (server.dirty >= sp->changes &&
758 now-server.lastsave > sp->seconds) {
759 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
760 sp->changes, sp->seconds);
761 rdbSaveBackground(server.dbfilename);
762 break;
763 }
764 }
765 }
766
767 /* Try to expire a few timed out keys */
768 for (j = 0; j < server.dbnum; j++) {
769 redisDb *db = server.db+j;
770 int num = dictSize(db->expires);
771
772 if (num) {
773 time_t now = time(NULL);
774
775 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
776 num = REDIS_EXPIRELOOKUPS_PER_CRON;
777 while (num--) {
778 dictEntry *de;
779 time_t t;
780
781 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
782 t = (time_t) dictGetEntryVal(de);
783 if (now > t) {
784 deleteKey(db,dictGetEntryKey(de));
785 }
786 }
787 }
788 }
789
790 /* Check if we should connect to a MASTER */
791 if (server.replstate == REDIS_REPL_CONNECT) {
792 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
793 if (syncWithMaster() == REDIS_OK) {
794 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
795 }
796 }
797 return 1000;
798}
799
800static void createSharedObjects(void) {
801 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
802 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
803 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
804 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
805 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
806 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
807 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
808 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
809 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
810 /* no such key */
811 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
812 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
813 "-ERR Operation against a key holding the wrong kind of value\r\n"));
814 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
815 "-ERR no such key\r\n"));
816 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
817 "-ERR syntax error\r\n"));
818 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
819 "-ERR source and destination objects are the same\r\n"));
820 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
821 "-ERR index out of range\r\n"));
822 shared.space = createObject(REDIS_STRING,sdsnew(" "));
823 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
824 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
825 shared.select0 = createStringObject("select 0\r\n",10);
826 shared.select1 = createStringObject("select 1\r\n",10);
827 shared.select2 = createStringObject("select 2\r\n",10);
828 shared.select3 = createStringObject("select 3\r\n",10);
829 shared.select4 = createStringObject("select 4\r\n",10);
830 shared.select5 = createStringObject("select 5\r\n",10);
831 shared.select6 = createStringObject("select 6\r\n",10);
832 shared.select7 = createStringObject("select 7\r\n",10);
833 shared.select8 = createStringObject("select 8\r\n",10);
834 shared.select9 = createStringObject("select 9\r\n",10);
835}
836
837static void appendServerSaveParams(time_t seconds, int changes) {
838 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
839 if (server.saveparams == NULL) oom("appendServerSaveParams");
840 server.saveparams[server.saveparamslen].seconds = seconds;
841 server.saveparams[server.saveparamslen].changes = changes;
842 server.saveparamslen++;
843}
844
845static void ResetServerSaveParams() {
846 zfree(server.saveparams);
847 server.saveparams = NULL;
848 server.saveparamslen = 0;
849}
850
851static void initServerConfig() {
852 server.dbnum = REDIS_DEFAULT_DBNUM;
853 server.port = REDIS_SERVERPORT;
854 server.verbosity = REDIS_DEBUG;
855 server.maxidletime = REDIS_MAXIDLETIME;
856 server.saveparams = NULL;
857 server.logfile = NULL; /* NULL = log on standard output */
858 server.bindaddr = NULL;
859 server.glueoutputbuf = 1;
860 server.daemonize = 0;
861 server.pidfile = "/var/run/redis.pid";
862 server.dbfilename = "dump.rdb";
863 server.requirepass = NULL;
864 server.shareobjects = 0;
865 ResetServerSaveParams();
866
867 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
868 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
869 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
870 /* Replication related */
871 server.isslave = 0;
872 server.masterhost = NULL;
873 server.masterport = 6379;
874 server.master = NULL;
875 server.replstate = REDIS_REPL_NONE;
876}
877
878static void initServer() {
879 int j;
880
881 signal(SIGHUP, SIG_IGN);
882 signal(SIGPIPE, SIG_IGN);
883
884 server.clients = listCreate();
885 server.slaves = listCreate();
886 server.monitors = listCreate();
887 server.objfreelist = listCreate();
888 createSharedObjects();
889 server.el = aeCreateEventLoop();
890 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
891 server.sharingpool = dictCreate(&setDictType,NULL);
892 server.sharingpoolsize = 1024;
893 if (!server.db || !server.clients || !server.slaves || !server.monitors || !server.el || !server.objfreelist)
894 oom("server initialization"); /* Fatal OOM */
895 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
896 if (server.fd == -1) {
897 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
898 exit(1);
899 }
900 for (j = 0; j < server.dbnum; j++) {
901 server.db[j].dict = dictCreate(&hashDictType,NULL);
902 server.db[j].expires = dictCreate(&setDictType,NULL);
903 server.db[j].id = j;
904 }
905 server.cronloops = 0;
906 server.bgsaveinprogress = 0;
907 server.lastsave = time(NULL);
908 server.dirty = 0;
909 server.usedmemory = 0;
910 server.stat_numcommands = 0;
911 server.stat_numconnections = 0;
912 server.stat_starttime = time(NULL);
913 aeCreateTimeEvent(server.el, 1000, serverCron, NULL, NULL);
914}
915
916/* Empty the whole database */
917static long long emptyDb() {
918 int j;
919 long long removed = 0;
920
921 for (j = 0; j < server.dbnum; j++) {
922 removed += dictSize(server.db[j].dict);
923 dictEmpty(server.db[j].dict);
924 dictEmpty(server.db[j].expires);
925 }
926 return removed;
927}
928
929static int yesnotoi(char *s) {
930 if (!strcasecmp(s,"yes")) return 1;
931 else if (!strcasecmp(s,"no")) return 0;
932 else return -1;
933}
934
935/* I agree, this is a very rudimental way to load a configuration...
936 will improve later if the config gets more complex */
937static void loadServerConfig(char *filename) {
938 FILE *fp = fopen(filename,"r");
939 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
940 int linenum = 0;
941 sds line = NULL;
942
943 if (!fp) {
944 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
945 exit(1);
946 }
947 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
948 sds *argv;
949 int argc, j;
950
951 linenum++;
952 line = sdsnew(buf);
953 line = sdstrim(line," \t\r\n");
954
955 /* Skip comments and blank lines*/
956 if (line[0] == '#' || line[0] == '\0') {
957 sdsfree(line);
958 continue;
959 }
960
961 /* Split into arguments */
962 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
963 sdstolower(argv[0]);
964
965 /* Execute config directives */
966 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
967 server.maxidletime = atoi(argv[1]);
968 if (server.maxidletime < 0) {
969 err = "Invalid timeout value"; goto loaderr;
970 }
971 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
972 server.port = atoi(argv[1]);
973 if (server.port < 1 || server.port > 65535) {
974 err = "Invalid port"; goto loaderr;
975 }
976 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
977 server.bindaddr = zstrdup(argv[1]);
978 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
979 int seconds = atoi(argv[1]);
980 int changes = atoi(argv[2]);
981 if (seconds < 1 || changes < 0) {
982 err = "Invalid save parameters"; goto loaderr;
983 }
984 appendServerSaveParams(seconds,changes);
985 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
986 if (chdir(argv[1]) == -1) {
987 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
988 argv[1], strerror(errno));
989 exit(1);
990 }
991 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
992 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
993 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
994 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
995 else {
996 err = "Invalid log level. Must be one of debug, notice, warning";
997 goto loaderr;
998 }
999 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
1000 FILE *fp;
1001
1002 server.logfile = zstrdup(argv[1]);
1003 if (!strcasecmp(server.logfile,"stdout")) {
1004 zfree(server.logfile);
1005 server.logfile = NULL;
1006 }
1007 if (server.logfile) {
1008 /* Test if we are able to open the file. The server will not
1009 * be able to abort just for this problem later... */
1010 fp = fopen(server.logfile,"a");
1011 if (fp == NULL) {
1012 err = sdscatprintf(sdsempty(),
1013 "Can't open the log file: %s", strerror(errno));
1014 goto loaderr;
1015 }
1016 fclose(fp);
1017 }
1018 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
1019 server.dbnum = atoi(argv[1]);
1020 if (server.dbnum < 1) {
1021 err = "Invalid number of databases"; goto loaderr;
1022 }
1023 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
1024 server.masterhost = sdsnew(argv[1]);
1025 server.masterport = atoi(argv[2]);
1026 server.replstate = REDIS_REPL_CONNECT;
1027 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
1028 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
1029 err = "argument must be 'yes' or 'no'"; goto loaderr;
1030 }
1031 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
1032 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
1033 err = "argument must be 'yes' or 'no'"; goto loaderr;
1034 }
1035 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
1036 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
1037 err = "argument must be 'yes' or 'no'"; goto loaderr;
1038 }
1039 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
1040 server.requirepass = zstrdup(argv[1]);
1041 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
1042 server.pidfile = zstrdup(argv[1]);
1043 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
1044 server.dbfilename = zstrdup(argv[1]);
1045 } else {
1046 err = "Bad directive or wrong number of arguments"; goto loaderr;
1047 }
1048 for (j = 0; j < argc; j++)
1049 sdsfree(argv[j]);
1050 zfree(argv);
1051 sdsfree(line);
1052 }
1053 fclose(fp);
1054 return;
1055
1056loaderr:
1057 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1058 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1059 fprintf(stderr, ">>> '%s'\n", line);
1060 fprintf(stderr, "%s\n", err);
1061 exit(1);
1062}
1063
1064static void freeClientArgv(redisClient *c) {
1065 int j;
1066
1067 for (j = 0; j < c->argc; j++)
1068 decrRefCount(c->argv[j]);
1069 c->argc = 0;
1070}
1071
1072static void freeClient(redisClient *c) {
1073 listNode *ln;
1074
1075 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1076 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1077 sdsfree(c->querybuf);
1078 listRelease(c->reply);
1079 freeClientArgv(c);
1080 close(c->fd);
1081 ln = listSearchKey(server.clients,c);
1082 assert(ln != NULL);
1083 listDelNode(server.clients,ln);
1084 if (c->flags & REDIS_SLAVE) {
1085 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1086 close(c->repldbfd);
1087 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1088 ln = listSearchKey(l,c);
1089 assert(ln != NULL);
1090 listDelNode(l,ln);
1091 }
1092 if (c->flags & REDIS_MASTER) {
1093 server.master = NULL;
1094 server.replstate = REDIS_REPL_CONNECT;
1095 }
1096 zfree(c->argv);
1097 zfree(c);
1098}
1099
1100static void glueReplyBuffersIfNeeded(redisClient *c) {
1101 int totlen = 0;
1102 listNode *ln;
1103 robj *o;
1104
1105 listRewind(c->reply);
1106 while((ln = listYield(c->reply))) {
1107 o = ln->value;
1108 totlen += sdslen(o->ptr);
1109 /* This optimization makes more sense if we don't have to copy
1110 * too much data */
1111 if (totlen > 1024) return;
1112 }
1113 if (totlen > 0) {
1114 char buf[1024];
1115 int copylen = 0;
1116
1117 listRewind(c->reply);
1118 while((ln = listYield(c->reply))) {
1119 o = ln->value;
1120 memcpy(buf+copylen,o->ptr,sdslen(o->ptr));
1121 copylen += sdslen(o->ptr);
1122 listDelNode(c->reply,ln);
1123 }
1124 /* Now the output buffer is empty, add the new single element */
1125 o = createObject(REDIS_STRING,sdsnewlen(buf,totlen));
1126 if (!listAddNodeTail(c->reply,o)) oom("listAddNodeTail");
1127 }
1128}
1129
1130static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1131 redisClient *c = privdata;
1132 int nwritten = 0, totwritten = 0, objlen;
1133 robj *o;
1134 REDIS_NOTUSED(el);
1135 REDIS_NOTUSED(mask);
1136
1137 if (server.glueoutputbuf && listLength(c->reply) > 1)
1138 glueReplyBuffersIfNeeded(c);
1139 while(listLength(c->reply)) {
1140 o = listNodeValue(listFirst(c->reply));
1141 objlen = sdslen(o->ptr);
1142
1143 if (objlen == 0) {
1144 listDelNode(c->reply,listFirst(c->reply));
1145 continue;
1146 }
1147
1148 if (c->flags & REDIS_MASTER) {
1149 nwritten = objlen - c->sentlen;
1150 } else {
1151 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
1152 if (nwritten <= 0) break;
1153 }
1154 c->sentlen += nwritten;
1155 totwritten += nwritten;
1156 /* If we fully sent the object on head go to the next one */
1157 if (c->sentlen == objlen) {
1158 listDelNode(c->reply,listFirst(c->reply));
1159 c->sentlen = 0;
1160 }
1161 }
1162 if (nwritten == -1) {
1163 if (errno == EAGAIN) {
1164 nwritten = 0;
1165 } else {
1166 redisLog(REDIS_DEBUG,
1167 "Error writing to client: %s", strerror(errno));
1168 freeClient(c);
1169 return;
1170 }
1171 }
1172 if (totwritten > 0) c->lastinteraction = time(NULL);
1173 if (listLength(c->reply) == 0) {
1174 c->sentlen = 0;
1175 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1176 }
1177}
1178
1179static struct redisCommand *lookupCommand(char *name) {
1180 int j = 0;
1181 while(cmdTable[j].name != NULL) {
1182 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
1183 j++;
1184 }
1185 return NULL;
1186}
1187
1188/* resetClient prepare the client to process the next command */
1189static void resetClient(redisClient *c) {
1190 freeClientArgv(c);
1191 c->bulklen = -1;
1192}
1193
1194/* If this function gets called we already read a whole
1195 * command, argments are in the client argv/argc fields.
1196 * processCommand() execute the command or prepare the
1197 * server for a bulk read from the client.
1198 *
1199 * If 1 is returned the client is still alive and valid and
1200 * and other operations can be performed by the caller. Otherwise
1201 * if 0 is returned the client was destroied (i.e. after QUIT). */
1202static int processCommand(redisClient *c) {
1203 struct redisCommand *cmd;
1204 long long dirty;
1205
1206 /* The QUIT command is handled as a special case. Normal command
1207 * procs are unable to close the client connection safely */
1208 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1209 freeClient(c);
1210 return 0;
1211 }
1212 cmd = lookupCommand(c->argv[0]->ptr);
1213 if (!cmd) {
1214 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1215 resetClient(c);
1216 return 1;
1217 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1218 (c->argc < -cmd->arity)) {
1219 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
1220 resetClient(c);
1221 return 1;
1222 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1223 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1224
1225 decrRefCount(c->argv[c->argc-1]);
1226 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1227 c->argc--;
1228 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1229 resetClient(c);
1230 return 1;
1231 }
1232 c->argc--;
1233 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1234 /* It is possible that the bulk read is already in the
1235 * buffer. Check this condition and handle it accordingly */
1236 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1237 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1238 c->argc++;
1239 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1240 } else {
1241 return 1;
1242 }
1243 }
1244 /* Let's try to share objects on the command arguments vector */
1245 if (server.shareobjects) {
1246 int j;
1247 for(j = 1; j < c->argc; j++)
1248 c->argv[j] = tryObjectSharing(c->argv[j]);
1249 }
1250 /* Check if the user is authenticated */
1251 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1252 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1253 resetClient(c);
1254 return 1;
1255 }
1256
1257 /* Exec the command */
1258 dirty = server.dirty;
1259 cmd->proc(c);
1260 if (server.dirty-dirty != 0 && listLength(server.slaves))
1261 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
1262 if (listLength(server.monitors))
1263 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
1264 server.stat_numcommands++;
1265
1266 /* Prepare the client for the next command */
1267 if (c->flags & REDIS_CLOSE) {
1268 freeClient(c);
1269 return 0;
1270 }
1271 resetClient(c);
1272 return 1;
1273}
1274
1275static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
1276 listNode *ln;
1277 int outc = 0, j;
1278 robj **outv;
1279 /* (args*2)+1 is enough room for args, spaces, newlines */
1280 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1281
1282 if (argc <= REDIS_STATIC_ARGS) {
1283 outv = static_outv;
1284 } else {
1285 outv = zmalloc(sizeof(robj*)*(argc*2+1));
1286 if (!outv) oom("replicationFeedSlaves");
1287 }
1288
1289 for (j = 0; j < argc; j++) {
1290 if (j != 0) outv[outc++] = shared.space;
1291 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1292 robj *lenobj;
1293
1294 lenobj = createObject(REDIS_STRING,
1295 sdscatprintf(sdsempty(),"%d\r\n",sdslen(argv[j]->ptr)));
1296 lenobj->refcount = 0;
1297 outv[outc++] = lenobj;
1298 }
1299 outv[outc++] = argv[j];
1300 }
1301 outv[outc++] = shared.crlf;
1302
1303 /* Increment all the refcounts at start and decrement at end in order to
1304 * be sure to free objects if there is no slave in a replication state
1305 * able to be feed with commands */
1306 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
1307 listRewind(slaves);
1308 while((ln = listYield(slaves))) {
1309 redisClient *slave = ln->value;
1310
1311 /* Don't feed slaves that are still waiting for BGSAVE to start */
1312 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
1313
1314 /* Feed all the other slaves, MONITORs and so on */
1315 if (slave->slaveseldb != dictid) {
1316 robj *selectcmd;
1317
1318 switch(dictid) {
1319 case 0: selectcmd = shared.select0; break;
1320 case 1: selectcmd = shared.select1; break;
1321 case 2: selectcmd = shared.select2; break;
1322 case 3: selectcmd = shared.select3; break;
1323 case 4: selectcmd = shared.select4; break;
1324 case 5: selectcmd = shared.select5; break;
1325 case 6: selectcmd = shared.select6; break;
1326 case 7: selectcmd = shared.select7; break;
1327 case 8: selectcmd = shared.select8; break;
1328 case 9: selectcmd = shared.select9; break;
1329 default:
1330 selectcmd = createObject(REDIS_STRING,
1331 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1332 selectcmd->refcount = 0;
1333 break;
1334 }
1335 addReply(slave,selectcmd);
1336 slave->slaveseldb = dictid;
1337 }
1338 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
1339 }
1340 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
1341 if (outv != static_outv) zfree(outv);
1342}
1343
1344static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1345 redisClient *c = (redisClient*) privdata;
1346 char buf[REDIS_IOBUF_LEN];
1347 int nread;
1348 REDIS_NOTUSED(el);
1349 REDIS_NOTUSED(mask);
1350
1351 nread = read(fd, buf, REDIS_IOBUF_LEN);
1352 if (nread == -1) {
1353 if (errno == EAGAIN) {
1354 nread = 0;
1355 } else {
1356 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1357 freeClient(c);
1358 return;
1359 }
1360 } else if (nread == 0) {
1361 redisLog(REDIS_DEBUG, "Client closed connection");
1362 freeClient(c);
1363 return;
1364 }
1365 if (nread) {
1366 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1367 c->lastinteraction = time(NULL);
1368 } else {
1369 return;
1370 }
1371
1372again:
1373 if (c->bulklen == -1) {
1374 /* Read the first line of the query */
1375 char *p = strchr(c->querybuf,'\n');
1376 size_t querylen;
1377 if (p) {
1378 sds query, *argv;
1379 int argc, j;
1380
1381 query = c->querybuf;
1382 c->querybuf = sdsempty();
1383 querylen = 1+(p-(query));
1384 if (sdslen(query) > querylen) {
1385 /* leave data after the first line of the query in the buffer */
1386 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1387 }
1388 *p = '\0'; /* remove "\n" */
1389 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1390 sdsupdatelen(query);
1391
1392 /* Now we can split the query in arguments */
1393 if (sdslen(query) == 0) {
1394 /* Ignore empty query */
1395 sdsfree(query);
1396 return;
1397 }
1398 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
1399 if (argv == NULL) oom("sdssplitlen");
1400 sdsfree(query);
1401
1402 if (c->argv) zfree(c->argv);
1403 c->argv = zmalloc(sizeof(robj*)*argc);
1404 if (c->argv == NULL) oom("allocating arguments list for client");
1405
1406 for (j = 0; j < argc; j++) {
1407 if (sdslen(argv[j])) {
1408 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1409 c->argc++;
1410 } else {
1411 sdsfree(argv[j]);
1412 }
1413 }
1414 zfree(argv);
1415 /* Execute the command. If the client is still valid
1416 * after processCommand() return and there is something
1417 * on the query buffer try to process the next command. */
1418 if (processCommand(c) && sdslen(c->querybuf)) goto again;
1419 return;
1420 } else if (sdslen(c->querybuf) >= 1024) {
1421 redisLog(REDIS_DEBUG, "Client protocol error");
1422 freeClient(c);
1423 return;
1424 }
1425 } else {
1426 /* Bulk read handling. Note that if we are at this point
1427 the client already sent a command terminated with a newline,
1428 we are reading the bulk data that is actually the last
1429 argument of the command. */
1430 int qbl = sdslen(c->querybuf);
1431
1432 if (c->bulklen <= qbl) {
1433 /* Copy everything but the final CRLF as final argument */
1434 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1435 c->argc++;
1436 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1437 processCommand(c);
1438 return;
1439 }
1440 }
1441}
1442
1443static int selectDb(redisClient *c, int id) {
1444 if (id < 0 || id >= server.dbnum)
1445 return REDIS_ERR;
1446 c->db = &server.db[id];
1447 return REDIS_OK;
1448}
1449
1450static void *dupClientReplyValue(void *o) {
1451 incrRefCount((robj*)o);
1452 return 0;
1453}
1454
1455static redisClient *createClient(int fd) {
1456 redisClient *c = zmalloc(sizeof(*c));
1457
1458 anetNonBlock(NULL,fd);
1459 anetTcpNoDelay(NULL,fd);
1460 if (!c) return NULL;
1461 selectDb(c,0);
1462 c->fd = fd;
1463 c->querybuf = sdsempty();
1464 c->argc = 0;
1465 c->argv = NULL;
1466 c->bulklen = -1;
1467 c->sentlen = 0;
1468 c->flags = 0;
1469 c->lastinteraction = time(NULL);
1470 c->authenticated = 0;
1471 c->replstate = REDIS_REPL_NONE;
1472 if ((c->reply = listCreate()) == NULL) oom("listCreate");
1473 listSetFreeMethod(c->reply,decrRefCount);
1474 listSetDupMethod(c->reply,dupClientReplyValue);
1475 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1476 readQueryFromClient, c, NULL) == AE_ERR) {
1477 freeClient(c);
1478 return NULL;
1479 }
1480 if (!listAddNodeTail(server.clients,c)) oom("listAddNodeTail");
1481 return c;
1482}
1483
1484static void addReply(redisClient *c, robj *obj) {
1485 if (listLength(c->reply) == 0 &&
1486 (c->replstate == REDIS_REPL_NONE ||
1487 c->replstate == REDIS_REPL_ONLINE) &&
1488 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
1489 sendReplyToClient, c, NULL) == AE_ERR) return;
1490 if (!listAddNodeTail(c->reply,obj)) oom("listAddNodeTail");
1491 incrRefCount(obj);
1492}
1493
1494static void addReplySds(redisClient *c, sds s) {
1495 robj *o = createObject(REDIS_STRING,s);
1496 addReply(c,o);
1497 decrRefCount(o);
1498}
1499
1500static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
1501 int cport, cfd;
1502 char cip[128];
1503 REDIS_NOTUSED(el);
1504 REDIS_NOTUSED(mask);
1505 REDIS_NOTUSED(privdata);
1506
1507 cfd = anetAccept(server.neterr, fd, cip, &cport);
1508 if (cfd == AE_ERR) {
1509 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
1510 return;
1511 }
1512 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
1513 if (createClient(cfd) == NULL) {
1514 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
1515 close(cfd); /* May be already closed, just ingore errors */
1516 return;
1517 }
1518 server.stat_numconnections++;
1519}
1520
1521/* ======================= Redis objects implementation ===================== */
1522
1523static robj *createObject(int type, void *ptr) {
1524 robj *o;
1525
1526 if (listLength(server.objfreelist)) {
1527 listNode *head = listFirst(server.objfreelist);
1528 o = listNodeValue(head);
1529 listDelNode(server.objfreelist,head);
1530 } else {
1531 o = zmalloc(sizeof(*o));
1532 }
1533 if (!o) oom("createObject");
1534 o->type = type;
1535 o->ptr = ptr;
1536 o->refcount = 1;
1537 return o;
1538}
1539
1540static robj *createStringObject(char *ptr, size_t len) {
1541 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
1542}
1543
1544static robj *createListObject(void) {
1545 list *l = listCreate();
1546
1547 if (!l) oom("listCreate");
1548 listSetFreeMethod(l,decrRefCount);
1549 return createObject(REDIS_LIST,l);
1550}
1551
1552static robj *createSetObject(void) {
1553 dict *d = dictCreate(&setDictType,NULL);
1554 if (!d) oom("dictCreate");
1555 return createObject(REDIS_SET,d);
1556}
1557
1558static void freeStringObject(robj *o) {
1559 sdsfree(o->ptr);
1560}
1561
1562static void freeListObject(robj *o) {
1563 listRelease((list*) o->ptr);
1564}
1565
1566static void freeSetObject(robj *o) {
1567 dictRelease((dict*) o->ptr);
1568}
1569
1570static void freeHashObject(robj *o) {
1571 dictRelease((dict*) o->ptr);
1572}
1573
1574static void incrRefCount(robj *o) {
1575 o->refcount++;
1576#ifdef DEBUG_REFCOUNT
1577 if (o->type == REDIS_STRING)
1578 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
1579#endif
1580}
1581
1582static void decrRefCount(void *obj) {
1583 robj *o = obj;
1584
1585#ifdef DEBUG_REFCOUNT
1586 if (o->type == REDIS_STRING)
1587 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
1588#endif
1589 if (--(o->refcount) == 0) {
1590 switch(o->type) {
1591 case REDIS_STRING: freeStringObject(o); break;
1592 case REDIS_LIST: freeListObject(o); break;
1593 case REDIS_SET: freeSetObject(o); break;
1594 case REDIS_HASH: freeHashObject(o); break;
1595 default: assert(0 != 0); break;
1596 }
1597 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
1598 !listAddNodeHead(server.objfreelist,o))
1599 zfree(o);
1600 }
1601}
1602
1603/* Try to share an object against the shared objects pool */
1604static robj *tryObjectSharing(robj *o) {
1605 struct dictEntry *de;
1606 unsigned long c;
1607
1608 if (o == NULL || server.shareobjects == 0) return o;
1609
1610 assert(o->type == REDIS_STRING);
1611 de = dictFind(server.sharingpool,o);
1612 if (de) {
1613 robj *shared = dictGetEntryKey(de);
1614
1615 c = ((unsigned long) dictGetEntryVal(de))+1;
1616 dictGetEntryVal(de) = (void*) c;
1617 incrRefCount(shared);
1618 decrRefCount(o);
1619 return shared;
1620 } else {
1621 /* Here we are using a stream algorihtm: Every time an object is
1622 * shared we increment its count, everytime there is a miss we
1623 * recrement the counter of a random object. If this object reaches
1624 * zero we remove the object and put the current object instead. */
1625 if (dictSize(server.sharingpool) >=
1626 server.sharingpoolsize) {
1627 de = dictGetRandomKey(server.sharingpool);
1628 assert(de != NULL);
1629 c = ((unsigned long) dictGetEntryVal(de))-1;
1630 dictGetEntryVal(de) = (void*) c;
1631 if (c == 0) {
1632 dictDelete(server.sharingpool,de->key);
1633 }
1634 } else {
1635 c = 0; /* If the pool is empty we want to add this object */
1636 }
1637 if (c == 0) {
1638 int retval;
1639
1640 retval = dictAdd(server.sharingpool,o,(void*)1);
1641 assert(retval == DICT_OK);
1642 incrRefCount(o);
1643 }
1644 return o;
1645 }
1646}
1647
1648static robj *lookupKey(redisDb *db, robj *key) {
1649 dictEntry *de = dictFind(db->dict,key);
1650 return de ? dictGetEntryVal(de) : NULL;
1651}
1652
1653static robj *lookupKeyRead(redisDb *db, robj *key) {
1654 expireIfNeeded(db,key);
1655 return lookupKey(db,key);
1656}
1657
1658static robj *lookupKeyWrite(redisDb *db, robj *key) {
1659 deleteIfVolatile(db,key);
1660 return lookupKey(db,key);
1661}
1662
1663static int deleteKey(redisDb *db, robj *key) {
1664 int retval;
1665
1666 /* We need to protect key from destruction: after the first dictDelete()
1667 * it may happen that 'key' is no longer valid if we don't increment
1668 * it's count. This may happen when we get the object reference directly
1669 * from the hash table with dictRandomKey() or dict iterators */
1670 incrRefCount(key);
1671 if (dictSize(db->expires)) dictDelete(db->expires,key);
1672 retval = dictDelete(db->dict,key);
1673 decrRefCount(key);
1674
1675 return retval == DICT_OK;
1676}
1677
1678/*============================ DB saving/loading ============================ */
1679
1680static int rdbSaveType(FILE *fp, unsigned char type) {
1681 if (fwrite(&type,1,1,fp) == 0) return -1;
1682 return 0;
1683}
1684
1685static int rdbSaveTime(FILE *fp, time_t t) {
1686 int32_t t32 = (int32_t) t;
1687 if (fwrite(&t32,4,1,fp) == 0) return -1;
1688 return 0;
1689}
1690
1691/* check rdbLoadLen() comments for more info */
1692static int rdbSaveLen(FILE *fp, uint32_t len) {
1693 unsigned char buf[2];
1694
1695 if (len < (1<<6)) {
1696 /* Save a 6 bit len */
1697 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
1698 if (fwrite(buf,1,1,fp) == 0) return -1;
1699 } else if (len < (1<<14)) {
1700 /* Save a 14 bit len */
1701 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
1702 buf[1] = len&0xFF;
1703 if (fwrite(buf,2,1,fp) == 0) return -1;
1704 } else {
1705 /* Save a 32 bit len */
1706 buf[0] = (REDIS_RDB_32BITLEN<<6);
1707 if (fwrite(buf,1,1,fp) == 0) return -1;
1708 len = htonl(len);
1709 if (fwrite(&len,4,1,fp) == 0) return -1;
1710 }
1711 return 0;
1712}
1713
1714/* String objects in the form "2391" "-100" without any space and with a
1715 * range of values that can fit in an 8, 16 or 32 bit signed value can be
1716 * encoded as integers to save space */
1717int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
1718 long long value;
1719 char *endptr, buf[32];
1720
1721 /* Check if it's possible to encode this value as a number */
1722 value = strtoll(s, &endptr, 10);
1723 if (endptr[0] != '\0') return 0;
1724 snprintf(buf,32,"%lld",value);
1725
1726 /* If the number converted back into a string is not identical
1727 * then it's not possible to encode the string as integer */
1728 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
1729
1730 /* Finally check if it fits in our ranges */
1731 if (value >= -(1<<7) && value <= (1<<7)-1) {
1732 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
1733 enc[1] = value&0xFF;
1734 return 2;
1735 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
1736 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
1737 enc[1] = value&0xFF;
1738 enc[2] = (value>>8)&0xFF;
1739 return 3;
1740 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
1741 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
1742 enc[1] = value&0xFF;
1743 enc[2] = (value>>8)&0xFF;
1744 enc[3] = (value>>16)&0xFF;
1745 enc[4] = (value>>24)&0xFF;
1746 return 5;
1747 } else {
1748 return 0;
1749 }
1750}
1751
1752static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
1753 unsigned int comprlen, outlen;
1754 unsigned char byte;
1755 void *out;
1756
1757 /* We require at least four bytes compression for this to be worth it */
1758 outlen = sdslen(obj->ptr)-4;
1759 if (outlen <= 0) return 0;
1760 if ((out = zmalloc(outlen+1)) == NULL) return 0;
1761 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
1762 if (comprlen == 0) {
1763 zfree(out);
1764 return 0;
1765 }
1766 /* Data compressed! Let's save it on disk */
1767 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
1768 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
1769 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
1770 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
1771 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
1772 zfree(out);
1773 return comprlen;
1774
1775writeerr:
1776 zfree(out);
1777 return -1;
1778}
1779
1780/* Save a string objet as [len][data] on disk. If the object is a string
1781 * representation of an integer value we try to safe it in a special form */
1782static int rdbSaveStringObject(FILE *fp, robj *obj) {
1783 size_t len = sdslen(obj->ptr);
1784 int enclen;
1785
1786 /* Try integer encoding */
1787 if (len <= 11) {
1788 unsigned char buf[5];
1789 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
1790 if (fwrite(buf,enclen,1,fp) == 0) return -1;
1791 return 0;
1792 }
1793 }
1794
1795 /* Try LZF compression - under 20 bytes it's unable to compress even
1796 * aaaaaaaaaaaaaaaaaa so skip it */
1797 if (1 && len > 20) {
1798 int retval;
1799
1800 retval = rdbSaveLzfStringObject(fp,obj);
1801 if (retval == -1) return -1;
1802 if (retval > 0) return 0;
1803 /* retval == 0 means data can't be compressed, save the old way */
1804 }
1805
1806 /* Store verbatim */
1807 if (rdbSaveLen(fp,len) == -1) return -1;
1808 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
1809 return 0;
1810}
1811
1812/* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
1813static int rdbSave(char *filename) {
1814 dictIterator *di = NULL;
1815 dictEntry *de;
1816 FILE *fp;
1817 char tmpfile[256];
1818 int j;
1819 time_t now = time(NULL);
1820
1821 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
1822 fp = fopen(tmpfile,"w");
1823 if (!fp) {
1824 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
1825 return REDIS_ERR;
1826 }
1827 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
1828 for (j = 0; j < server.dbnum; j++) {
1829 redisDb *db = server.db+j;
1830 dict *d = db->dict;
1831 if (dictSize(d) == 0) continue;
1832 di = dictGetIterator(d);
1833 if (!di) {
1834 fclose(fp);
1835 return REDIS_ERR;
1836 }
1837
1838 /* Write the SELECT DB opcode */
1839 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
1840 if (rdbSaveLen(fp,j) == -1) goto werr;
1841
1842 /* Iterate this DB writing every entry */
1843 while((de = dictNext(di)) != NULL) {
1844 robj *key = dictGetEntryKey(de);
1845 robj *o = dictGetEntryVal(de);
1846 time_t expiretime = getExpire(db,key);
1847
1848 /* Save the expire time */
1849 if (expiretime != -1) {
1850 /* If this key is already expired skip it */
1851 if (expiretime < now) continue;
1852 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
1853 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
1854 }
1855 /* Save the key and associated value */
1856 if (rdbSaveType(fp,o->type) == -1) goto werr;
1857 if (rdbSaveStringObject(fp,key) == -1) goto werr;
1858 if (o->type == REDIS_STRING) {
1859 /* Save a string value */
1860 if (rdbSaveStringObject(fp,o) == -1) goto werr;
1861 } else if (o->type == REDIS_LIST) {
1862 /* Save a list value */
1863 list *list = o->ptr;
1864 listNode *ln;
1865
1866 listRewind(list);
1867 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
1868 while((ln = listYield(list))) {
1869 robj *eleobj = listNodeValue(ln);
1870
1871 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
1872 }
1873 } else if (o->type == REDIS_SET) {
1874 /* Save a set value */
1875 dict *set = o->ptr;
1876 dictIterator *di = dictGetIterator(set);
1877 dictEntry *de;
1878
1879 if (!set) oom("dictGetIteraotr");
1880 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
1881 while((de = dictNext(di)) != NULL) {
1882 robj *eleobj = dictGetEntryKey(de);
1883
1884 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
1885 }
1886 dictReleaseIterator(di);
1887 } else {
1888 assert(0 != 0);
1889 }
1890 }
1891 dictReleaseIterator(di);
1892 }
1893 /* EOF opcode */
1894 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
1895
1896 /* Make sure data will not remain on the OS's output buffers */
1897 fflush(fp);
1898 fsync(fileno(fp));
1899 fclose(fp);
1900
1901 /* Use RENAME to make sure the DB file is changed atomically only
1902 * if the generate DB file is ok. */
1903 if (rename(tmpfile,filename) == -1) {
1904 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destionation: %s", strerror(errno));
1905 unlink(tmpfile);
1906 return REDIS_ERR;
1907 }
1908 redisLog(REDIS_NOTICE,"DB saved on disk");
1909 server.dirty = 0;
1910 server.lastsave = time(NULL);
1911 return REDIS_OK;
1912
1913werr:
1914 fclose(fp);
1915 unlink(tmpfile);
1916 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
1917 if (di) dictReleaseIterator(di);
1918 return REDIS_ERR;
1919}
1920
1921static int rdbSaveBackground(char *filename) {
1922 pid_t childpid;
1923
1924 if (server.bgsaveinprogress) return REDIS_ERR;
1925 if ((childpid = fork()) == 0) {
1926 /* Child */
1927 close(server.fd);
1928 if (rdbSave(filename) == REDIS_OK) {
1929 exit(0);
1930 } else {
1931 exit(1);
1932 }
1933 } else {
1934 /* Parent */
1935 if (childpid == -1) {
1936 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
1937 strerror(errno));
1938 return REDIS_ERR;
1939 }
1940 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
1941 server.bgsaveinprogress = 1;
1942 return REDIS_OK;
1943 }
1944 return REDIS_OK; /* unreached */
1945}
1946
1947static int rdbLoadType(FILE *fp) {
1948 unsigned char type;
1949 if (fread(&type,1,1,fp) == 0) return -1;
1950 return type;
1951}
1952
1953static time_t rdbLoadTime(FILE *fp) {
1954 int32_t t32;
1955 if (fread(&t32,4,1,fp) == 0) return -1;
1956 return (time_t) t32;
1957}
1958
1959/* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
1960 * of this file for a description of how this are stored on disk.
1961 *
1962 * isencoded is set to 1 if the readed length is not actually a length but
1963 * an "encoding type", check the above comments for more info */
1964static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
1965 unsigned char buf[2];
1966 uint32_t len;
1967
1968 if (isencoded) *isencoded = 0;
1969 if (rdbver == 0) {
1970 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
1971 return ntohl(len);
1972 } else {
1973 int type;
1974
1975 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
1976 type = (buf[0]&0xC0)>>6;
1977 if (type == REDIS_RDB_6BITLEN) {
1978 /* Read a 6 bit len */
1979 return buf[0]&0x3F;
1980 } else if (type == REDIS_RDB_ENCVAL) {
1981 /* Read a 6 bit len encoding type */
1982 if (isencoded) *isencoded = 1;
1983 return buf[0]&0x3F;
1984 } else if (type == REDIS_RDB_14BITLEN) {
1985 /* Read a 14 bit len */
1986 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
1987 return ((buf[0]&0x3F)<<8)|buf[1];
1988 } else {
1989 /* Read a 32 bit len */
1990 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
1991 return ntohl(len);
1992 }
1993 }
1994}
1995
1996static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
1997 unsigned char enc[4];
1998 long long val;
1999
2000 if (enctype == REDIS_RDB_ENC_INT8) {
2001 if (fread(enc,1,1,fp) == 0) return NULL;
2002 val = (signed char)enc[0];
2003 } else if (enctype == REDIS_RDB_ENC_INT16) {
2004 uint16_t v;
2005 if (fread(enc,2,1,fp) == 0) return NULL;
2006 v = enc[0]|(enc[1]<<8);
2007 val = (int16_t)v;
2008 } else if (enctype == REDIS_RDB_ENC_INT32) {
2009 uint32_t v;
2010 if (fread(enc,4,1,fp) == 0) return NULL;
2011 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2012 val = (int32_t)v;
2013 } else {
2014 val = 0; /* anti-warning */
2015 assert(0!=0);
2016 }
2017 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2018}
2019
2020static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2021 unsigned int len, clen;
2022 unsigned char *c = NULL;
2023 sds val = NULL;
2024
2025 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2026 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2027 if ((c = zmalloc(clen)) == NULL) goto err;
2028 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2029 if (fread(c,clen,1,fp) == 0) goto err;
2030 if (lzf_decompress(c,clen,val,len) == 0) goto err;
2031 zfree(c);
2032 return createObject(REDIS_STRING,val);
2033err:
2034 zfree(c);
2035 sdsfree(val);
2036 return NULL;
2037}
2038
2039static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2040 int isencoded;
2041 uint32_t len;
2042 sds val;
2043
2044 len = rdbLoadLen(fp,rdbver,&isencoded);
2045 if (isencoded) {
2046 switch(len) {
2047 case REDIS_RDB_ENC_INT8:
2048 case REDIS_RDB_ENC_INT16:
2049 case REDIS_RDB_ENC_INT32:
2050 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
2051 case REDIS_RDB_ENC_LZF:
2052 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
2053 default:
2054 assert(0!=0);
2055 }
2056 }
2057
2058 if (len == REDIS_RDB_LENERR) return NULL;
2059 val = sdsnewlen(NULL,len);
2060 if (len && fread(val,len,1,fp) == 0) {
2061 sdsfree(val);
2062 return NULL;
2063 }
2064 return tryObjectSharing(createObject(REDIS_STRING,val));
2065}
2066
2067static int rdbLoad(char *filename) {
2068 FILE *fp;
2069 robj *keyobj = NULL;
2070 uint32_t dbid;
2071 int type, retval, rdbver;
2072 dict *d = server.db[0].dict;
2073 redisDb *db = server.db+0;
2074 char buf[1024];
2075 time_t expiretime = -1, now = time(NULL);
2076
2077 fp = fopen(filename,"r");
2078 if (!fp) return REDIS_ERR;
2079 if (fread(buf,9,1,fp) == 0) goto eoferr;
2080 buf[9] = '\0';
2081 if (memcmp(buf,"REDIS",5) != 0) {
2082 fclose(fp);
2083 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2084 return REDIS_ERR;
2085 }
2086 rdbver = atoi(buf+5);
2087 if (rdbver > 1) {
2088 fclose(fp);
2089 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2090 return REDIS_ERR;
2091 }
2092 while(1) {
2093 robj *o;
2094
2095 /* Read type. */
2096 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2097 if (type == REDIS_EXPIRETIME) {
2098 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2099 /* We read the time so we need to read the object type again */
2100 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2101 }
2102 if (type == REDIS_EOF) break;
2103 /* Handle SELECT DB opcode as a special case */
2104 if (type == REDIS_SELECTDB) {
2105 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2106 goto eoferr;
2107 if (dbid >= (unsigned)server.dbnum) {
2108 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
2109 exit(1);
2110 }
2111 db = server.db+dbid;
2112 d = db->dict;
2113 continue;
2114 }
2115 /* Read key */
2116 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2117
2118 if (type == REDIS_STRING) {
2119 /* Read string value */
2120 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2121 } else if (type == REDIS_LIST || type == REDIS_SET) {
2122 /* Read list/set value */
2123 uint32_t listlen;
2124
2125 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2126 goto eoferr;
2127 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2128 /* Load every single element of the list/set */
2129 while(listlen--) {
2130 robj *ele;
2131
2132 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2133 if (type == REDIS_LIST) {
2134 if (!listAddNodeTail((list*)o->ptr,ele))
2135 oom("listAddNodeTail");
2136 } else {
2137 if (dictAdd((dict*)o->ptr,ele,NULL) == DICT_ERR)
2138 oom("dictAdd");
2139 }
2140 }
2141 } else {
2142 assert(0 != 0);
2143 }
2144 /* Add the new object in the hash table */
2145 retval = dictAdd(d,keyobj,o);
2146 if (retval == DICT_ERR) {
2147 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
2148 exit(1);
2149 }
2150 /* Set the expire time if needed */
2151 if (expiretime != -1) {
2152 setExpire(db,keyobj,expiretime);
2153 /* Delete this key if already expired */
2154 if (expiretime < now) deleteKey(db,keyobj);
2155 expiretime = -1;
2156 }
2157 keyobj = o = NULL;
2158 }
2159 fclose(fp);
2160 return REDIS_OK;
2161
2162eoferr: /* unexpected end of file is handled here with a fatal exit */
2163 if (keyobj) decrRefCount(keyobj);
2164 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, exiting now.");
2165 exit(1);
2166 return REDIS_ERR; /* Just to avoid warning */
2167}
2168
2169/*================================== Commands =============================== */
2170
2171static void authCommand(redisClient *c) {
2172 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
2173 c->authenticated = 1;
2174 addReply(c,shared.ok);
2175 } else {
2176 c->authenticated = 0;
2177 addReply(c,shared.err);
2178 }
2179}
2180
2181static void pingCommand(redisClient *c) {
2182 addReply(c,shared.pong);
2183}
2184
2185static void echoCommand(redisClient *c) {
2186 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",
2187 (int)sdslen(c->argv[1]->ptr)));
2188 addReply(c,c->argv[1]);
2189 addReply(c,shared.crlf);
2190}
2191
2192/*=================================== Strings =============================== */
2193
2194static void setGenericCommand(redisClient *c, int nx) {
2195 int retval;
2196
2197 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
2198 if (retval == DICT_ERR) {
2199 if (!nx) {
2200 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2201 incrRefCount(c->argv[2]);
2202 } else {
2203 addReply(c,shared.czero);
2204 return;
2205 }
2206 } else {
2207 incrRefCount(c->argv[1]);
2208 incrRefCount(c->argv[2]);
2209 }
2210 server.dirty++;
2211 removeExpire(c->db,c->argv[1]);
2212 addReply(c, nx ? shared.cone : shared.ok);
2213}
2214
2215static void setCommand(redisClient *c) {
2216 setGenericCommand(c,0);
2217}
2218
2219static void setnxCommand(redisClient *c) {
2220 setGenericCommand(c,1);
2221}
2222
2223static void getCommand(redisClient *c) {
2224 robj *o = lookupKeyRead(c->db,c->argv[1]);
2225
2226 if (o == NULL) {
2227 addReply(c,shared.nullbulk);
2228 } else {
2229 if (o->type != REDIS_STRING) {
2230 addReply(c,shared.wrongtypeerr);
2231 } else {
2232 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",(int)sdslen(o->ptr)));
2233 addReply(c,o);
2234 addReply(c,shared.crlf);
2235 }
2236 }
2237}
2238
2239static void getSetCommand(redisClient *c) {
2240 getCommand(c);
2241 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
2242 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2243 } else {
2244 incrRefCount(c->argv[1]);
2245 }
2246 incrRefCount(c->argv[2]);
2247 server.dirty++;
2248 removeExpire(c->db,c->argv[1]);
2249}
2250
2251static void mgetCommand(redisClient *c) {
2252 int j;
2253
2254 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
2255 for (j = 1; j < c->argc; j++) {
2256 robj *o = lookupKeyRead(c->db,c->argv[j]);
2257 if (o == NULL) {
2258 addReply(c,shared.nullbulk);
2259 } else {
2260 if (o->type != REDIS_STRING) {
2261 addReply(c,shared.nullbulk);
2262 } else {
2263 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",(int)sdslen(o->ptr)));
2264 addReply(c,o);
2265 addReply(c,shared.crlf);
2266 }
2267 }
2268 }
2269}
2270
2271static void incrDecrCommand(redisClient *c, long long incr) {
2272 long long value;
2273 int retval;
2274 robj *o;
2275
2276 o = lookupKeyWrite(c->db,c->argv[1]);
2277 if (o == NULL) {
2278 value = 0;
2279 } else {
2280 if (o->type != REDIS_STRING) {
2281 value = 0;
2282 } else {
2283 char *eptr;
2284
2285 value = strtoll(o->ptr, &eptr, 10);
2286 }
2287 }
2288
2289 value += incr;
2290 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
2291 retval = dictAdd(c->db->dict,c->argv[1],o);
2292 if (retval == DICT_ERR) {
2293 dictReplace(c->db->dict,c->argv[1],o);
2294 removeExpire(c->db,c->argv[1]);
2295 } else {
2296 incrRefCount(c->argv[1]);
2297 }
2298 server.dirty++;
2299 addReply(c,shared.colon);
2300 addReply(c,o);
2301 addReply(c,shared.crlf);
2302}
2303
2304static void incrCommand(redisClient *c) {
2305 incrDecrCommand(c,1);
2306}
2307
2308static void decrCommand(redisClient *c) {
2309 incrDecrCommand(c,-1);
2310}
2311
2312static void incrbyCommand(redisClient *c) {
2313 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
2314 incrDecrCommand(c,incr);
2315}
2316
2317static void decrbyCommand(redisClient *c) {
2318 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
2319 incrDecrCommand(c,-incr);
2320}
2321
2322/* ========================= Type agnostic commands ========================= */
2323
2324static void delCommand(redisClient *c) {
2325 int deleted = 0, j;
2326
2327 for (j = 1; j < c->argc; j++) {
2328 if (deleteKey(c->db,c->argv[j])) {
2329 server.dirty++;
2330 deleted++;
2331 }
2332 }
2333 switch(deleted) {
2334 case 0:
2335 addReply(c,shared.czero);
2336 break;
2337 case 1:
2338 addReply(c,shared.cone);
2339 break;
2340 default:
2341 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
2342 break;
2343 }
2344}
2345
2346static void existsCommand(redisClient *c) {
2347 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
2348}
2349
2350static void selectCommand(redisClient *c) {
2351 int id = atoi(c->argv[1]->ptr);
2352
2353 if (selectDb(c,id) == REDIS_ERR) {
2354 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
2355 } else {
2356 addReply(c,shared.ok);
2357 }
2358}
2359
2360static void randomkeyCommand(redisClient *c) {
2361 dictEntry *de;
2362
2363 while(1) {
2364 de = dictGetRandomKey(c->db->dict);
2365 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
2366 }
2367 if (de == NULL) {
2368 addReply(c,shared.plus);
2369 addReply(c,shared.crlf);
2370 } else {
2371 addReply(c,shared.plus);
2372 addReply(c,dictGetEntryKey(de));
2373 addReply(c,shared.crlf);
2374 }
2375}
2376
2377static void keysCommand(redisClient *c) {
2378 dictIterator *di;
2379 dictEntry *de;
2380 sds pattern = c->argv[1]->ptr;
2381 int plen = sdslen(pattern);
2382 int numkeys = 0, keyslen = 0;
2383 robj *lenobj = createObject(REDIS_STRING,NULL);
2384
2385 di = dictGetIterator(c->db->dict);
2386 if (!di) oom("dictGetIterator");
2387 addReply(c,lenobj);
2388 decrRefCount(lenobj);
2389 while((de = dictNext(di)) != NULL) {
2390 robj *keyobj = dictGetEntryKey(de);
2391
2392 sds key = keyobj->ptr;
2393 if ((pattern[0] == '*' && pattern[1] == '\0') ||
2394 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
2395 if (expireIfNeeded(c->db,keyobj) == 0) {
2396 if (numkeys != 0)
2397 addReply(c,shared.space);
2398 addReply(c,keyobj);
2399 numkeys++;
2400 keyslen += sdslen(key);
2401 }
2402 }
2403 }
2404 dictReleaseIterator(di);
2405 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
2406 addReply(c,shared.crlf);
2407}
2408
2409static void dbsizeCommand(redisClient *c) {
2410 addReplySds(c,
2411 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
2412}
2413
2414static void lastsaveCommand(redisClient *c) {
2415 addReplySds(c,
2416 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
2417}
2418
2419static void typeCommand(redisClient *c) {
2420 robj *o;
2421 char *type;
2422
2423 o = lookupKeyRead(c->db,c->argv[1]);
2424 if (o == NULL) {
2425 type = "+none";
2426 } else {
2427 switch(o->type) {
2428 case REDIS_STRING: type = "+string"; break;
2429 case REDIS_LIST: type = "+list"; break;
2430 case REDIS_SET: type = "+set"; break;
2431 default: type = "unknown"; break;
2432 }
2433 }
2434 addReplySds(c,sdsnew(type));
2435 addReply(c,shared.crlf);
2436}
2437
2438static void saveCommand(redisClient *c) {
2439 if (server.bgsaveinprogress) {
2440 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
2441 return;
2442 }
2443 if (rdbSave(server.dbfilename) == REDIS_OK) {
2444 addReply(c,shared.ok);
2445 } else {
2446 addReply(c,shared.err);
2447 }
2448}
2449
2450static void bgsaveCommand(redisClient *c) {
2451 if (server.bgsaveinprogress) {
2452 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
2453 return;
2454 }
2455 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
2456 addReply(c,shared.ok);
2457 } else {
2458 addReply(c,shared.err);
2459 }
2460}
2461
2462static void shutdownCommand(redisClient *c) {
2463 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
2464 /* XXX: TODO kill the child if there is a bgsave in progress */
2465 if (rdbSave(server.dbfilename) == REDIS_OK) {
2466 if (server.daemonize) {
2467 unlink(server.pidfile);
2468 }
2469 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
2470 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
2471 exit(1);
2472 } else {
2473 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
2474 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
2475 }
2476}
2477
2478static void renameGenericCommand(redisClient *c, int nx) {
2479 robj *o;
2480
2481 /* To use the same key as src and dst is probably an error */
2482 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
2483 addReply(c,shared.sameobjecterr);
2484 return;
2485 }
2486
2487 o = lookupKeyWrite(c->db,c->argv[1]);
2488 if (o == NULL) {
2489 addReply(c,shared.nokeyerr);
2490 return;
2491 }
2492 incrRefCount(o);
2493 deleteIfVolatile(c->db,c->argv[2]);
2494 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
2495 if (nx) {
2496 decrRefCount(o);
2497 addReply(c,shared.czero);
2498 return;
2499 }
2500 dictReplace(c->db->dict,c->argv[2],o);
2501 } else {
2502 incrRefCount(c->argv[2]);
2503 }
2504 deleteKey(c->db,c->argv[1]);
2505 server.dirty++;
2506 addReply(c,nx ? shared.cone : shared.ok);
2507}
2508
2509static void renameCommand(redisClient *c) {
2510 renameGenericCommand(c,0);
2511}
2512
2513static void renamenxCommand(redisClient *c) {
2514 renameGenericCommand(c,1);
2515}
2516
2517static void moveCommand(redisClient *c) {
2518 robj *o;
2519 redisDb *src, *dst;
2520 int srcid;
2521
2522 /* Obtain source and target DB pointers */
2523 src = c->db;
2524 srcid = c->db->id;
2525 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
2526 addReply(c,shared.outofrangeerr);
2527 return;
2528 }
2529 dst = c->db;
2530 selectDb(c,srcid); /* Back to the source DB */
2531
2532 /* If the user is moving using as target the same
2533 * DB as the source DB it is probably an error. */
2534 if (src == dst) {
2535 addReply(c,shared.sameobjecterr);
2536 return;
2537 }
2538
2539 /* Check if the element exists and get a reference */
2540 o = lookupKeyWrite(c->db,c->argv[1]);
2541 if (!o) {
2542 addReply(c,shared.czero);
2543 return;
2544 }
2545
2546 /* Try to add the element to the target DB */
2547 deleteIfVolatile(dst,c->argv[1]);
2548 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
2549 addReply(c,shared.czero);
2550 return;
2551 }
2552 incrRefCount(c->argv[1]);
2553 incrRefCount(o);
2554
2555 /* OK! key moved, free the entry in the source DB */
2556 deleteKey(src,c->argv[1]);
2557 server.dirty++;
2558 addReply(c,shared.cone);
2559}
2560
2561/* =================================== Lists ================================ */
2562static void pushGenericCommand(redisClient *c, int where) {
2563 robj *lobj;
2564 list *list;
2565
2566 lobj = lookupKeyWrite(c->db,c->argv[1]);
2567 if (lobj == NULL) {
2568 lobj = createListObject();
2569 list = lobj->ptr;
2570 if (where == REDIS_HEAD) {
2571 if (!listAddNodeHead(list,c->argv[2])) oom("listAddNodeHead");
2572 } else {
2573 if (!listAddNodeTail(list,c->argv[2])) oom("listAddNodeTail");
2574 }
2575 dictAdd(c->db->dict,c->argv[1],lobj);
2576 incrRefCount(c->argv[1]);
2577 incrRefCount(c->argv[2]);
2578 } else {
2579 if (lobj->type != REDIS_LIST) {
2580 addReply(c,shared.wrongtypeerr);
2581 return;
2582 }
2583 list = lobj->ptr;
2584 if (where == REDIS_HEAD) {
2585 if (!listAddNodeHead(list,c->argv[2])) oom("listAddNodeHead");
2586 } else {
2587 if (!listAddNodeTail(list,c->argv[2])) oom("listAddNodeTail");
2588 }
2589 incrRefCount(c->argv[2]);
2590 }
2591 server.dirty++;
2592 addReply(c,shared.ok);
2593}
2594
2595static void lpushCommand(redisClient *c) {
2596 pushGenericCommand(c,REDIS_HEAD);
2597}
2598
2599static void rpushCommand(redisClient *c) {
2600 pushGenericCommand(c,REDIS_TAIL);
2601}
2602
2603static void llenCommand(redisClient *c) {
2604 robj *o;
2605 list *l;
2606
2607 o = lookupKeyRead(c->db,c->argv[1]);
2608 if (o == NULL) {
2609 addReply(c,shared.czero);
2610 return;
2611 } else {
2612 if (o->type != REDIS_LIST) {
2613 addReply(c,shared.wrongtypeerr);
2614 } else {
2615 l = o->ptr;
2616 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
2617 }
2618 }
2619}
2620
2621static void lindexCommand(redisClient *c) {
2622 robj *o;
2623 int index = atoi(c->argv[2]->ptr);
2624
2625 o = lookupKeyRead(c->db,c->argv[1]);
2626 if (o == NULL) {
2627 addReply(c,shared.nullbulk);
2628 } else {
2629 if (o->type != REDIS_LIST) {
2630 addReply(c,shared.wrongtypeerr);
2631 } else {
2632 list *list = o->ptr;
2633 listNode *ln;
2634
2635 ln = listIndex(list, index);
2636 if (ln == NULL) {
2637 addReply(c,shared.nullbulk);
2638 } else {
2639 robj *ele = listNodeValue(ln);
2640 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",(int)sdslen(ele->ptr)));
2641 addReply(c,ele);
2642 addReply(c,shared.crlf);
2643 }
2644 }
2645 }
2646}
2647
2648static void lsetCommand(redisClient *c) {
2649 robj *o;
2650 int index = atoi(c->argv[2]->ptr);
2651
2652 o = lookupKeyWrite(c->db,c->argv[1]);
2653 if (o == NULL) {
2654 addReply(c,shared.nokeyerr);
2655 } else {
2656 if (o->type != REDIS_LIST) {
2657 addReply(c,shared.wrongtypeerr);
2658 } else {
2659 list *list = o->ptr;
2660 listNode *ln;
2661
2662 ln = listIndex(list, index);
2663 if (ln == NULL) {
2664 addReply(c,shared.outofrangeerr);
2665 } else {
2666 robj *ele = listNodeValue(ln);
2667
2668 decrRefCount(ele);
2669 listNodeValue(ln) = c->argv[3];
2670 incrRefCount(c->argv[3]);
2671 addReply(c,shared.ok);
2672 server.dirty++;
2673 }
2674 }
2675 }
2676}
2677
2678static void popGenericCommand(redisClient *c, int where) {
2679 robj *o;
2680
2681 o = lookupKeyWrite(c->db,c->argv[1]);
2682 if (o == NULL) {
2683 addReply(c,shared.nullbulk);
2684 } else {
2685 if (o->type != REDIS_LIST) {
2686 addReply(c,shared.wrongtypeerr);
2687 } else {
2688 list *list = o->ptr;
2689 listNode *ln;
2690
2691 if (where == REDIS_HEAD)
2692 ln = listFirst(list);
2693 else
2694 ln = listLast(list);
2695
2696 if (ln == NULL) {
2697 addReply(c,shared.nullbulk);
2698 } else {
2699 robj *ele = listNodeValue(ln);
2700 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",(int)sdslen(ele->ptr)));
2701 addReply(c,ele);
2702 addReply(c,shared.crlf);
2703 listDelNode(list,ln);
2704 server.dirty++;
2705 }
2706 }
2707 }
2708}
2709
2710static void lpopCommand(redisClient *c) {
2711 popGenericCommand(c,REDIS_HEAD);
2712}
2713
2714static void rpopCommand(redisClient *c) {
2715 popGenericCommand(c,REDIS_TAIL);
2716}
2717
2718static void lrangeCommand(redisClient *c) {
2719 robj *o;
2720 int start = atoi(c->argv[2]->ptr);
2721 int end = atoi(c->argv[3]->ptr);
2722
2723 o = lookupKeyRead(c->db,c->argv[1]);
2724 if (o == NULL) {
2725 addReply(c,shared.nullmultibulk);
2726 } else {
2727 if (o->type != REDIS_LIST) {
2728 addReply(c,shared.wrongtypeerr);
2729 } else {
2730 list *list = o->ptr;
2731 listNode *ln;
2732 int llen = listLength(list);
2733 int rangelen, j;
2734 robj *ele;
2735
2736 /* convert negative indexes */
2737 if (start < 0) start = llen+start;
2738 if (end < 0) end = llen+end;
2739 if (start < 0) start = 0;
2740 if (end < 0) end = 0;
2741
2742 /* indexes sanity checks */
2743 if (start > end || start >= llen) {
2744 /* Out of range start or start > end result in empty list */
2745 addReply(c,shared.emptymultibulk);
2746 return;
2747 }
2748 if (end >= llen) end = llen-1;
2749 rangelen = (end-start)+1;
2750
2751 /* Return the result in form of a multi-bulk reply */
2752 ln = listIndex(list, start);
2753 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
2754 for (j = 0; j < rangelen; j++) {
2755 ele = listNodeValue(ln);
2756 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",(int)sdslen(ele->ptr)));
2757 addReply(c,ele);
2758 addReply(c,shared.crlf);
2759 ln = ln->next;
2760 }
2761 }
2762 }
2763}
2764
2765static void ltrimCommand(redisClient *c) {
2766 robj *o;
2767 int start = atoi(c->argv[2]->ptr);
2768 int end = atoi(c->argv[3]->ptr);
2769
2770 o = lookupKeyWrite(c->db,c->argv[1]);
2771 if (o == NULL) {
2772 addReply(c,shared.nokeyerr);
2773 } else {
2774 if (o->type != REDIS_LIST) {
2775 addReply(c,shared.wrongtypeerr);
2776 } else {
2777 list *list = o->ptr;
2778 listNode *ln;
2779 int llen = listLength(list);
2780 int j, ltrim, rtrim;
2781
2782 /* convert negative indexes */
2783 if (start < 0) start = llen+start;
2784 if (end < 0) end = llen+end;
2785 if (start < 0) start = 0;
2786 if (end < 0) end = 0;
2787
2788 /* indexes sanity checks */
2789 if (start > end || start >= llen) {
2790 /* Out of range start or start > end result in empty list */
2791 ltrim = llen;
2792 rtrim = 0;
2793 } else {
2794 if (end >= llen) end = llen-1;
2795 ltrim = start;
2796 rtrim = llen-end-1;
2797 }
2798
2799 /* Remove list elements to perform the trim */
2800 for (j = 0; j < ltrim; j++) {
2801 ln = listFirst(list);
2802 listDelNode(list,ln);
2803 }
2804 for (j = 0; j < rtrim; j++) {
2805 ln = listLast(list);
2806 listDelNode(list,ln);
2807 }
2808 addReply(c,shared.ok);
2809 server.dirty++;
2810 }
2811 }
2812}
2813
2814static void lremCommand(redisClient *c) {
2815 robj *o;
2816
2817 o = lookupKeyWrite(c->db,c->argv[1]);
2818 if (o == NULL) {
2819 addReply(c,shared.nokeyerr);
2820 } else {
2821 if (o->type != REDIS_LIST) {
2822 addReply(c,shared.wrongtypeerr);
2823 } else {
2824 list *list = o->ptr;
2825 listNode *ln, *next;
2826 int toremove = atoi(c->argv[2]->ptr);
2827 int removed = 0;
2828 int fromtail = 0;
2829
2830 if (toremove < 0) {
2831 toremove = -toremove;
2832 fromtail = 1;
2833 }
2834 ln = fromtail ? list->tail : list->head;
2835 while (ln) {
2836 robj *ele = listNodeValue(ln);
2837
2838 next = fromtail ? ln->prev : ln->next;
2839 if (sdscmp(ele->ptr,c->argv[3]->ptr) == 0) {
2840 listDelNode(list,ln);
2841 server.dirty++;
2842 removed++;
2843 if (toremove && removed == toremove) break;
2844 }
2845 ln = next;
2846 }
2847 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
2848 }
2849 }
2850}
2851
2852/* ==================================== Sets ================================ */
2853
2854static void saddCommand(redisClient *c) {
2855 robj *set;
2856
2857 set = lookupKeyWrite(c->db,c->argv[1]);
2858 if (set == NULL) {
2859 set = createSetObject();
2860 dictAdd(c->db->dict,c->argv[1],set);
2861 incrRefCount(c->argv[1]);
2862 } else {
2863 if (set->type != REDIS_SET) {
2864 addReply(c,shared.wrongtypeerr);
2865 return;
2866 }
2867 }
2868 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
2869 incrRefCount(c->argv[2]);
2870 server.dirty++;
2871 addReply(c,shared.cone);
2872 } else {
2873 addReply(c,shared.czero);
2874 }
2875}
2876
2877static void sremCommand(redisClient *c) {
2878 robj *set;
2879
2880 set = lookupKeyWrite(c->db,c->argv[1]);
2881 if (set == NULL) {
2882 addReply(c,shared.czero);
2883 } else {
2884 if (set->type != REDIS_SET) {
2885 addReply(c,shared.wrongtypeerr);
2886 return;
2887 }
2888 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
2889 server.dirty++;
2890 addReply(c,shared.cone);
2891 } else {
2892 addReply(c,shared.czero);
2893 }
2894 }
2895}
2896
2897static void smoveCommand(redisClient *c) {
2898 robj *srcset, *dstset;
2899
2900 srcset = lookupKeyWrite(c->db,c->argv[1]);
2901 dstset = lookupKeyWrite(c->db,c->argv[2]);
2902
2903 /* If the source key does not exist return 0, if it's of the wrong type
2904 * raise an error */
2905 if (srcset == NULL || srcset->type != REDIS_SET) {
2906 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
2907 return;
2908 }
2909 /* Error if the destination key is not a set as well */
2910 if (dstset && dstset->type != REDIS_SET) {
2911 addReply(c,shared.wrongtypeerr);
2912 return;
2913 }
2914 /* Remove the element from the source set */
2915 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
2916 /* Key not found in the src set! return zero */
2917 addReply(c,shared.czero);
2918 return;
2919 }
2920 server.dirty++;
2921 /* Add the element to the destination set */
2922 if (!dstset) {
2923 dstset = createSetObject();
2924 dictAdd(c->db->dict,c->argv[2],dstset);
2925 incrRefCount(c->argv[2]);
2926 }
2927 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
2928 incrRefCount(c->argv[3]);
2929 addReply(c,shared.cone);
2930}
2931
2932static void sismemberCommand(redisClient *c) {
2933 robj *set;
2934
2935 set = lookupKeyRead(c->db,c->argv[1]);
2936 if (set == NULL) {
2937 addReply(c,shared.czero);
2938 } else {
2939 if (set->type != REDIS_SET) {
2940 addReply(c,shared.wrongtypeerr);
2941 return;
2942 }
2943 if (dictFind(set->ptr,c->argv[2]))
2944 addReply(c,shared.cone);
2945 else
2946 addReply(c,shared.czero);
2947 }
2948}
2949
2950static void scardCommand(redisClient *c) {
2951 robj *o;
2952 dict *s;
2953
2954 o = lookupKeyRead(c->db,c->argv[1]);
2955 if (o == NULL) {
2956 addReply(c,shared.czero);
2957 return;
2958 } else {
2959 if (o->type != REDIS_SET) {
2960 addReply(c,shared.wrongtypeerr);
2961 } else {
2962 s = o->ptr;
2963 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
2964 dictSize(s)));
2965 }
2966 }
2967}
2968
2969static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
2970 dict **d1 = (void*) s1, **d2 = (void*) s2;
2971
2972 return dictSize(*d1)-dictSize(*d2);
2973}
2974
2975static void sinterGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey) {
2976 dict **dv = zmalloc(sizeof(dict*)*setsnum);
2977 dictIterator *di;
2978 dictEntry *de;
2979 robj *lenobj = NULL, *dstset = NULL;
2980 int j, cardinality = 0;
2981
2982 if (!dv) oom("sinterGenericCommand");
2983 for (j = 0; j < setsnum; j++) {
2984 robj *setobj;
2985
2986 setobj = dstkey ?
2987 lookupKeyWrite(c->db,setskeys[j]) :
2988 lookupKeyRead(c->db,setskeys[j]);
2989 if (!setobj) {
2990 zfree(dv);
2991 if (dstkey) {
2992 deleteKey(c->db,dstkey);
2993 addReply(c,shared.ok);
2994 } else {
2995 addReply(c,shared.nullmultibulk);
2996 }
2997 return;
2998 }
2999 if (setobj->type != REDIS_SET) {
3000 zfree(dv);
3001 addReply(c,shared.wrongtypeerr);
3002 return;
3003 }
3004 dv[j] = setobj->ptr;
3005 }
3006 /* Sort sets from the smallest to largest, this will improve our
3007 * algorithm's performace */
3008 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3009
3010 /* The first thing we should output is the total number of elements...
3011 * since this is a multi-bulk write, but at this stage we don't know
3012 * the intersection set size, so we use a trick, append an empty object
3013 * to the output list and save the pointer to later modify it with the
3014 * right length */
3015 if (!dstkey) {
3016 lenobj = createObject(REDIS_STRING,NULL);
3017 addReply(c,lenobj);
3018 decrRefCount(lenobj);
3019 } else {
3020 /* If we have a target key where to store the resulting set
3021 * create this key with an empty set inside */
3022 dstset = createSetObject();
3023 }
3024
3025 /* Iterate all the elements of the first (smallest) set, and test
3026 * the element against all the other sets, if at least one set does
3027 * not include the element it is discarded */
3028 di = dictGetIterator(dv[0]);
3029 if (!di) oom("dictGetIterator");
3030
3031 while((de = dictNext(di)) != NULL) {
3032 robj *ele;
3033
3034 for (j = 1; j < setsnum; j++)
3035 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
3036 if (j != setsnum)
3037 continue; /* at least one set does not contain the member */
3038 ele = dictGetEntryKey(de);
3039 if (!dstkey) {
3040 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(ele->ptr)));
3041 addReply(c,ele);
3042 addReply(c,shared.crlf);
3043 cardinality++;
3044 } else {
3045 dictAdd(dstset->ptr,ele,NULL);
3046 incrRefCount(ele);
3047 }
3048 }
3049 dictReleaseIterator(di);
3050
3051 if (dstkey) {
3052 /* Store the resulting set into the target */
3053 deleteKey(c->db,dstkey);
3054 dictAdd(c->db->dict,dstkey,dstset);
3055 incrRefCount(dstkey);
3056 }
3057
3058 if (!dstkey) {
3059 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",cardinality);
3060 } else {
3061 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3062 dictSize((dict*)dstset->ptr)));
3063 server.dirty++;
3064 }
3065 zfree(dv);
3066}
3067
3068static void sinterCommand(redisClient *c) {
3069 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
3070}
3071
3072static void sinterstoreCommand(redisClient *c) {
3073 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
3074}
3075
3076#define REDIS_OP_UNION 0
3077#define REDIS_OP_DIFF 1
3078
3079static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
3080 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3081 dictIterator *di;
3082 dictEntry *de;
3083 robj *dstset = NULL;
3084 int j, cardinality = 0;
3085
3086 if (!dv) oom("sunionDiffGenericCommand");
3087 for (j = 0; j < setsnum; j++) {
3088 robj *setobj;
3089
3090 setobj = dstkey ?
3091 lookupKeyWrite(c->db,setskeys[j]) :
3092 lookupKeyRead(c->db,setskeys[j]);
3093 if (!setobj) {
3094 dv[j] = NULL;
3095 continue;
3096 }
3097 if (setobj->type != REDIS_SET) {
3098 zfree(dv);
3099 addReply(c,shared.wrongtypeerr);
3100 return;
3101 }
3102 dv[j] = setobj->ptr;
3103 }
3104
3105 /* We need a temp set object to store our union. If the dstkey
3106 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
3107 * this set object will be the resulting object to set into the target key*/
3108 dstset = createSetObject();
3109
3110 /* Iterate all the elements of all the sets, add every element a single
3111 * time to the result set */
3112 for (j = 0; j < setsnum; j++) {
3113 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
3114 if (!dv[j]) continue; /* non existing keys are like empty sets */
3115
3116 di = dictGetIterator(dv[j]);
3117 if (!di) oom("dictGetIterator");
3118
3119 while((de = dictNext(di)) != NULL) {
3120 robj *ele;
3121
3122 /* dictAdd will not add the same element multiple times */
3123 ele = dictGetEntryKey(de);
3124 if (op == REDIS_OP_UNION || j == 0) {
3125 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
3126 incrRefCount(ele);
3127 cardinality++;
3128 }
3129 } else if (op == REDIS_OP_DIFF) {
3130 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
3131 cardinality--;
3132 }
3133 }
3134 }
3135 dictReleaseIterator(di);
3136
3137 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
3138 }
3139
3140 /* Output the content of the resulting set, if not in STORE mode */
3141 if (!dstkey) {
3142 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
3143 di = dictGetIterator(dstset->ptr);
3144 if (!di) oom("dictGetIterator");
3145 while((de = dictNext(di)) != NULL) {
3146 robj *ele;
3147
3148 ele = dictGetEntryKey(de);
3149 addReplySds(c,sdscatprintf(sdsempty(),
3150 "$%d\r\n",sdslen(ele->ptr)));
3151 addReply(c,ele);
3152 addReply(c,shared.crlf);
3153 }
3154 dictReleaseIterator(di);
3155 } else {
3156 /* If we have a target key where to store the resulting set
3157 * create this key with the result set inside */
3158 deleteKey(c->db,dstkey);
3159 dictAdd(c->db->dict,dstkey,dstset);
3160 incrRefCount(dstkey);
3161 }
3162
3163 /* Cleanup */
3164 if (!dstkey) {
3165 decrRefCount(dstset);
3166 } else {
3167 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3168 dictSize((dict*)dstset->ptr)));
3169 server.dirty++;
3170 }
3171 zfree(dv);
3172}
3173
3174static void sunionCommand(redisClient *c) {
3175 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
3176}
3177
3178static void sunionstoreCommand(redisClient *c) {
3179 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
3180}
3181
3182static void sdiffCommand(redisClient *c) {
3183 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
3184}
3185
3186static void sdiffstoreCommand(redisClient *c) {
3187 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
3188}
3189
3190static void flushdbCommand(redisClient *c) {
3191 server.dirty += dictSize(c->db->dict);
3192 dictEmpty(c->db->dict);
3193 dictEmpty(c->db->expires);
3194 addReply(c,shared.ok);
3195}
3196
3197static void flushallCommand(redisClient *c) {
3198 server.dirty += emptyDb();
3199 addReply(c,shared.ok);
3200 rdbSave(server.dbfilename);
3201 server.dirty++;
3202}
3203
3204redisSortOperation *createSortOperation(int type, robj *pattern) {
3205 redisSortOperation *so = zmalloc(sizeof(*so));
3206 if (!so) oom("createSortOperation");
3207 so->type = type;
3208 so->pattern = pattern;
3209 return so;
3210}
3211
3212/* Return the value associated to the key with a name obtained
3213 * substituting the first occurence of '*' in 'pattern' with 'subst' */
3214robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
3215 char *p;
3216 sds spat, ssub;
3217 robj keyobj;
3218 int prefixlen, sublen, postfixlen;
3219 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
3220 struct {
3221 long len;
3222 long free;
3223 char buf[REDIS_SORTKEY_MAX+1];
3224 } keyname;
3225
3226 spat = pattern->ptr;
3227 ssub = subst->ptr;
3228 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
3229 p = strchr(spat,'*');
3230 if (!p) return NULL;
3231
3232 prefixlen = p-spat;
3233 sublen = sdslen(ssub);
3234 postfixlen = sdslen(spat)-(prefixlen+1);
3235 memcpy(keyname.buf,spat,prefixlen);
3236 memcpy(keyname.buf+prefixlen,ssub,sublen);
3237 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
3238 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
3239 keyname.len = prefixlen+sublen+postfixlen;
3240
3241 keyobj.refcount = 1;
3242 keyobj.type = REDIS_STRING;
3243 keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
3244
3245 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
3246 return lookupKeyRead(db,&keyobj);
3247}
3248
3249/* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
3250 * the additional parameter is not standard but a BSD-specific we have to
3251 * pass sorting parameters via the global 'server' structure */
3252static int sortCompare(const void *s1, const void *s2) {
3253 const redisSortObject *so1 = s1, *so2 = s2;
3254 int cmp;
3255
3256 if (!server.sort_alpha) {
3257 /* Numeric sorting. Here it's trivial as we precomputed scores */
3258 if (so1->u.score > so2->u.score) {
3259 cmp = 1;
3260 } else if (so1->u.score < so2->u.score) {
3261 cmp = -1;
3262 } else {
3263 cmp = 0;
3264 }
3265 } else {
3266 /* Alphanumeric sorting */
3267 if (server.sort_bypattern) {
3268 if (!so1->u.cmpobj || !so2->u.cmpobj) {
3269 /* At least one compare object is NULL */
3270 if (so1->u.cmpobj == so2->u.cmpobj)
3271 cmp = 0;
3272 else if (so1->u.cmpobj == NULL)
3273 cmp = -1;
3274 else
3275 cmp = 1;
3276 } else {
3277 /* We have both the objects, use strcoll */
3278 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
3279 }
3280 } else {
3281 /* Compare elements directly */
3282 cmp = strcoll(so1->obj->ptr,so2->obj->ptr);
3283 }
3284 }
3285 return server.sort_desc ? -cmp : cmp;
3286}
3287
3288/* The SORT command is the most complex command in Redis. Warning: this code
3289 * is optimized for speed and a bit less for readability */
3290static void sortCommand(redisClient *c) {
3291 list *operations;
3292 int outputlen = 0;
3293 int desc = 0, alpha = 0;
3294 int limit_start = 0, limit_count = -1, start, end;
3295 int j, dontsort = 0, vectorlen;
3296 int getop = 0; /* GET operation counter */
3297 robj *sortval, *sortby = NULL;
3298 redisSortObject *vector; /* Resulting vector to sort */
3299
3300 /* Lookup the key to sort. It must be of the right types */
3301 sortval = lookupKeyRead(c->db,c->argv[1]);
3302 if (sortval == NULL) {
3303 addReply(c,shared.nokeyerr);
3304 return;
3305 }
3306 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
3307 addReply(c,shared.wrongtypeerr);
3308 return;
3309 }
3310
3311 /* Create a list of operations to perform for every sorted element.
3312 * Operations can be GET/DEL/INCR/DECR */
3313 operations = listCreate();
3314 listSetFreeMethod(operations,zfree);
3315 j = 2;
3316
3317 /* Now we need to protect sortval incrementing its count, in the future
3318 * SORT may have options able to overwrite/delete keys during the sorting
3319 * and the sorted key itself may get destroied */
3320 incrRefCount(sortval);
3321
3322 /* The SORT command has an SQL-alike syntax, parse it */
3323 while(j < c->argc) {
3324 int leftargs = c->argc-j-1;
3325 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
3326 desc = 0;
3327 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
3328 desc = 1;
3329 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
3330 alpha = 1;
3331 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
3332 limit_start = atoi(c->argv[j+1]->ptr);
3333 limit_count = atoi(c->argv[j+2]->ptr);
3334 j+=2;
3335 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
3336 sortby = c->argv[j+1];
3337 /* If the BY pattern does not contain '*', i.e. it is constant,
3338 * we don't need to sort nor to lookup the weight keys. */
3339 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
3340 j++;
3341 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
3342 listAddNodeTail(operations,createSortOperation(
3343 REDIS_SORT_GET,c->argv[j+1]));
3344 getop++;
3345 j++;
3346 } else if (!strcasecmp(c->argv[j]->ptr,"del") && leftargs >= 1) {
3347 listAddNodeTail(operations,createSortOperation(
3348 REDIS_SORT_DEL,c->argv[j+1]));
3349 j++;
3350 } else if (!strcasecmp(c->argv[j]->ptr,"incr") && leftargs >= 1) {
3351 listAddNodeTail(operations,createSortOperation(
3352 REDIS_SORT_INCR,c->argv[j+1]));
3353 j++;
3354 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
3355 listAddNodeTail(operations,createSortOperation(
3356 REDIS_SORT_DECR,c->argv[j+1]));
3357 j++;
3358 } else {
3359 decrRefCount(sortval);
3360 listRelease(operations);
3361 addReply(c,shared.syntaxerr);
3362 return;
3363 }
3364 j++;
3365 }
3366
3367 /* Load the sorting vector with all the objects to sort */
3368 vectorlen = (sortval->type == REDIS_LIST) ?
3369 listLength((list*)sortval->ptr) :
3370 dictSize((dict*)sortval->ptr);
3371 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
3372 if (!vector) oom("allocating objects vector for SORT");
3373 j = 0;
3374 if (sortval->type == REDIS_LIST) {
3375 list *list = sortval->ptr;
3376 listNode *ln;
3377
3378 listRewind(list);
3379 while((ln = listYield(list))) {
3380 robj *ele = ln->value;
3381 vector[j].obj = ele;
3382 vector[j].u.score = 0;
3383 vector[j].u.cmpobj = NULL;
3384 j++;
3385 }
3386 } else {
3387 dict *set = sortval->ptr;
3388 dictIterator *di;
3389 dictEntry *setele;
3390
3391 di = dictGetIterator(set);
3392 if (!di) oom("dictGetIterator");
3393 while((setele = dictNext(di)) != NULL) {
3394 vector[j].obj = dictGetEntryKey(setele);
3395 vector[j].u.score = 0;
3396 vector[j].u.cmpobj = NULL;
3397 j++;
3398 }
3399 dictReleaseIterator(di);
3400 }
3401 assert(j == vectorlen);
3402
3403 /* Now it's time to load the right scores in the sorting vector */
3404 if (dontsort == 0) {
3405 for (j = 0; j < vectorlen; j++) {
3406 if (sortby) {
3407 robj *byval;
3408
3409 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
3410 if (!byval || byval->type != REDIS_STRING) continue;
3411 if (alpha) {
3412 vector[j].u.cmpobj = byval;
3413 incrRefCount(byval);
3414 } else {
3415 vector[j].u.score = strtod(byval->ptr,NULL);
3416 }
3417 } else {
3418 if (!alpha) vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
3419 }
3420 }
3421 }
3422
3423 /* We are ready to sort the vector... perform a bit of sanity check
3424 * on the LIMIT option too. We'll use a partial version of quicksort. */
3425 start = (limit_start < 0) ? 0 : limit_start;
3426 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
3427 if (start >= vectorlen) {
3428 start = vectorlen-1;
3429 end = vectorlen-2;
3430 }
3431 if (end >= vectorlen) end = vectorlen-1;
3432
3433 if (dontsort == 0) {
3434 server.sort_desc = desc;
3435 server.sort_alpha = alpha;
3436 server.sort_bypattern = sortby ? 1 : 0;
3437 if (sortby && (start != 0 || end != vectorlen-1))
3438 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
3439 else
3440 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
3441 }
3442
3443 /* Send command output to the output buffer, performing the specified
3444 * GET/DEL/INCR/DECR operations if any. */
3445 outputlen = getop ? getop*(end-start+1) : end-start+1;
3446 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
3447 for (j = start; j <= end; j++) {
3448 listNode *ln;
3449 if (!getop) {
3450 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",
3451 sdslen(vector[j].obj->ptr)));
3452 addReply(c,vector[j].obj);
3453 addReply(c,shared.crlf);
3454 }
3455 listRewind(operations);
3456 while((ln = listYield(operations))) {
3457 redisSortOperation *sop = ln->value;
3458 robj *val = lookupKeyByPattern(c->db,sop->pattern,
3459 vector[j].obj);
3460
3461 if (sop->type == REDIS_SORT_GET) {
3462 if (!val || val->type != REDIS_STRING) {
3463 addReply(c,shared.nullbulk);
3464 } else {
3465 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",
3466 sdslen(val->ptr)));
3467 addReply(c,val);
3468 addReply(c,shared.crlf);
3469 }
3470 } else if (sop->type == REDIS_SORT_DEL) {
3471 /* TODO */
3472 }
3473 }
3474 }
3475
3476 /* Cleanup */
3477 decrRefCount(sortval);
3478 listRelease(operations);
3479 for (j = 0; j < vectorlen; j++) {
3480 if (sortby && alpha && vector[j].u.cmpobj)
3481 decrRefCount(vector[j].u.cmpobj);
3482 }
3483 zfree(vector);
3484}
3485
3486static void infoCommand(redisClient *c) {
3487 sds info;
3488 time_t uptime = time(NULL)-server.stat_starttime;
3489
3490 info = sdscatprintf(sdsempty(),
3491 "redis_version:%s\r\n"
3492 "connected_clients:%d\r\n"
3493 "connected_slaves:%d\r\n"
3494 "used_memory:%zu\r\n"
3495 "changes_since_last_save:%lld\r\n"
3496 "bgsave_in_progress:%d\r\n"
3497 "last_save_time:%d\r\n"
3498 "total_connections_received:%lld\r\n"
3499 "total_commands_processed:%lld\r\n"
3500 "uptime_in_seconds:%d\r\n"
3501 "uptime_in_days:%d\r\n"
3502 ,REDIS_VERSION,
3503 listLength(server.clients)-listLength(server.slaves),
3504 listLength(server.slaves),
3505 server.usedmemory,
3506 server.dirty,
3507 server.bgsaveinprogress,
3508 server.lastsave,
3509 server.stat_numconnections,
3510 server.stat_numcommands,
3511 uptime,
3512 uptime/(3600*24)
3513 );
3514 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
3515 addReplySds(c,info);
3516 addReply(c,shared.crlf);
3517}
3518
3519static void monitorCommand(redisClient *c) {
3520 /* ignore MONITOR if aleady slave or in monitor mode */
3521 if (c->flags & REDIS_SLAVE) return;
3522
3523 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
3524 c->slaveseldb = 0;
3525 if (!listAddNodeTail(server.monitors,c)) oom("listAddNodeTail");
3526 addReply(c,shared.ok);
3527}
3528
3529/* ================================= Expire ================================= */
3530static int removeExpire(redisDb *db, robj *key) {
3531 if (dictDelete(db->expires,key) == DICT_OK) {
3532 return 1;
3533 } else {
3534 return 0;
3535 }
3536}
3537
3538static int setExpire(redisDb *db, robj *key, time_t when) {
3539 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
3540 return 0;
3541 } else {
3542 incrRefCount(key);
3543 return 1;
3544 }
3545}
3546
3547/* Return the expire time of the specified key, or -1 if no expire
3548 * is associated with this key (i.e. the key is non volatile) */
3549static time_t getExpire(redisDb *db, robj *key) {
3550 dictEntry *de;
3551
3552 /* No expire? return ASAP */
3553 if (dictSize(db->expires) == 0 ||
3554 (de = dictFind(db->expires,key)) == NULL) return -1;
3555
3556 return (time_t) dictGetEntryVal(de);
3557}
3558
3559static int expireIfNeeded(redisDb *db, robj *key) {
3560 time_t when;
3561 dictEntry *de;
3562
3563 /* No expire? return ASAP */
3564 if (dictSize(db->expires) == 0 ||
3565 (de = dictFind(db->expires,key)) == NULL) return 0;
3566
3567 /* Lookup the expire */
3568 when = (time_t) dictGetEntryVal(de);
3569 if (time(NULL) <= when) return 0;
3570
3571 /* Delete the key */
3572 dictDelete(db->expires,key);
3573 return dictDelete(db->dict,key) == DICT_OK;
3574}
3575
3576static int deleteIfVolatile(redisDb *db, robj *key) {
3577 dictEntry *de;
3578
3579 /* No expire? return ASAP */
3580 if (dictSize(db->expires) == 0 ||
3581 (de = dictFind(db->expires,key)) == NULL) return 0;
3582
3583 /* Delete the key */
3584 server.dirty++;
3585 dictDelete(db->expires,key);
3586 return dictDelete(db->dict,key) == DICT_OK;
3587}
3588
3589static void expireCommand(redisClient *c) {
3590 dictEntry *de;
3591 int seconds = atoi(c->argv[2]->ptr);
3592
3593 de = dictFind(c->db->dict,c->argv[1]);
3594 if (de == NULL) {
3595 addReply(c,shared.czero);
3596 return;
3597 }
3598 if (seconds <= 0) {
3599 addReply(c, shared.czero);
3600 return;
3601 } else {
3602 time_t when = time(NULL)+seconds;
3603 if (setExpire(c->db,c->argv[1],when))
3604 addReply(c,shared.cone);
3605 else
3606 addReply(c,shared.czero);
3607 return;
3608 }
3609}
3610
3611static void ttlCommand(redisClient *c) {
3612 time_t expire;
3613 int ttl = -1;
3614
3615 expire = getExpire(c->db,c->argv[1]);
3616 if (expire != -1) {
3617 ttl = (int) (expire-time(NULL));
3618 if (ttl < 0) ttl = -1;
3619 }
3620 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
3621}
3622
3623/* =============================== Replication ============================= */
3624
3625static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
3626 ssize_t nwritten, ret = size;
3627 time_t start = time(NULL);
3628
3629 timeout++;
3630 while(size) {
3631 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
3632 nwritten = write(fd,ptr,size);
3633 if (nwritten == -1) return -1;
3634 ptr += nwritten;
3635 size -= nwritten;
3636 }
3637 if ((time(NULL)-start) > timeout) {
3638 errno = ETIMEDOUT;
3639 return -1;
3640 }
3641 }
3642 return ret;
3643}
3644
3645static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
3646 ssize_t nread, totread = 0;
3647 time_t start = time(NULL);
3648
3649 timeout++;
3650 while(size) {
3651 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
3652 nread = read(fd,ptr,size);
3653 if (nread == -1) return -1;
3654 ptr += nread;
3655 size -= nread;
3656 totread += nread;
3657 }
3658 if ((time(NULL)-start) > timeout) {
3659 errno = ETIMEDOUT;
3660 return -1;
3661 }
3662 }
3663 return totread;
3664}
3665
3666static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
3667 ssize_t nread = 0;
3668
3669 size--;
3670 while(size) {
3671 char c;
3672
3673 if (syncRead(fd,&c,1,timeout) == -1) return -1;
3674 if (c == '\n') {
3675 *ptr = '\0';
3676 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
3677 return nread;
3678 } else {
3679 *ptr++ = c;
3680 *ptr = '\0';
3681 nread++;
3682 }
3683 }
3684 return nread;
3685}
3686
3687static void syncCommand(redisClient *c) {
3688 /* ignore SYNC if aleady slave or in monitor mode */
3689 if (c->flags & REDIS_SLAVE) return;
3690
3691 /* SYNC can't be issued when the server has pending data to send to
3692 * the client about already issued commands. We need a fresh reply
3693 * buffer registering the differences between the BGSAVE and the current
3694 * dataset, so that we can copy to other slaves if needed. */
3695 if (listLength(c->reply) != 0) {
3696 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
3697 return;
3698 }
3699
3700 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
3701 /* Here we need to check if there is a background saving operation
3702 * in progress, or if it is required to start one */
3703 if (server.bgsaveinprogress) {
3704 /* Ok a background save is in progress. Let's check if it is a good
3705 * one for replication, i.e. if there is another slave that is
3706 * registering differences since the server forked to save */
3707 redisClient *slave;
3708 listNode *ln;
3709
3710 listRewind(server.slaves);
3711 while((ln = listYield(server.slaves))) {
3712 slave = ln->value;
3713 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
3714 }
3715 if (ln) {
3716 /* Perfect, the server is already registering differences for
3717 * another slave. Set the right state, and copy the buffer. */
3718 listRelease(c->reply);
3719 c->reply = listDup(slave->reply);
3720 if (!c->reply) oom("listDup copying slave reply list");
3721 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
3722 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
3723 } else {
3724 /* No way, we need to wait for the next BGSAVE in order to
3725 * register differences */
3726 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
3727 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
3728 }
3729 } else {
3730 /* Ok we don't have a BGSAVE in progress, let's start one */
3731 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
3732 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
3733 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
3734 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
3735 return;
3736 }
3737 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
3738 }
3739 c->repldbfd = -1;
3740 c->flags |= REDIS_SLAVE;
3741 c->slaveseldb = 0;
3742 if (!listAddNodeTail(server.slaves,c)) oom("listAddNodeTail");
3743 return;
3744}
3745
3746static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
3747 redisClient *slave = privdata;
3748 REDIS_NOTUSED(el);
3749 REDIS_NOTUSED(mask);
3750 char buf[REDIS_IOBUF_LEN];
3751 ssize_t nwritten, buflen;
3752
3753 if (slave->repldboff == 0) {
3754 /* Write the bulk write count before to transfer the DB. In theory here
3755 * we don't know how much room there is in the output buffer of the
3756 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
3757 * operations) will never be smaller than the few bytes we need. */
3758 sds bulkcount;
3759
3760 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
3761 slave->repldbsize);
3762 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
3763 {
3764 sdsfree(bulkcount);
3765 freeClient(slave);
3766 return;
3767 }
3768 sdsfree(bulkcount);
3769 }
3770 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
3771 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
3772 if (buflen <= 0) {
3773 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
3774 (buflen == 0) ? "premature EOF" : strerror(errno));
3775 freeClient(slave);
3776 return;
3777 }
3778 if ((nwritten = write(fd,buf,buflen)) == -1) {
3779 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
3780 strerror(errno));
3781 freeClient(slave);
3782 return;
3783 }
3784 slave->repldboff += nwritten;
3785 if (slave->repldboff == slave->repldbsize) {
3786 close(slave->repldbfd);
3787 slave->repldbfd = -1;
3788 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
3789 slave->replstate = REDIS_REPL_ONLINE;
3790 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
3791 sendReplyToClient, slave, NULL) == AE_ERR) {
3792 freeClient(slave);
3793 return;
3794 }
3795 addReplySds(slave,sdsempty());
3796 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
3797 }
3798}
3799
3800static void updateSalvesWaitingBgsave(int bgsaveerr) {
3801 listNode *ln;
3802 int startbgsave = 0;
3803
3804 listRewind(server.slaves);
3805 while((ln = listYield(server.slaves))) {
3806 redisClient *slave = ln->value;
3807
3808 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
3809 startbgsave = 1;
3810 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
3811 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
3812 struct stat buf;
3813
3814 if (bgsaveerr != REDIS_OK) {
3815 freeClient(slave);
3816 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
3817 continue;
3818 }
3819 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
3820 fstat(slave->repldbfd,&buf) == -1) {
3821 freeClient(slave);
3822 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
3823 continue;
3824 }
3825 slave->repldboff = 0;
3826 slave->repldbsize = buf.st_size;
3827 slave->replstate = REDIS_REPL_SEND_BULK;
3828 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
3829 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave, NULL) == AE_ERR) {
3830 freeClient(slave);
3831 continue;
3832 }
3833 }
3834 }
3835 if (startbgsave) {
3836 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
3837 listRewind(server.slaves);
3838 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
3839 while((ln = listYield(server.slaves))) {
3840 redisClient *slave = ln->value;
3841
3842 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
3843 freeClient(slave);
3844 }
3845 }
3846 }
3847}
3848
3849static int syncWithMaster(void) {
3850 char buf[1024], tmpfile[256];
3851 int dumpsize;
3852 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
3853 int dfd;
3854
3855 if (fd == -1) {
3856 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
3857 strerror(errno));
3858 return REDIS_ERR;
3859 }
3860 /* Issue the SYNC command */
3861 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
3862 close(fd);
3863 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
3864 strerror(errno));
3865 return REDIS_ERR;
3866 }
3867 /* Read the bulk write count */
3868 if (syncReadLine(fd,buf,1024,3600) == -1) {
3869 close(fd);
3870 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
3871 strerror(errno));
3872 return REDIS_ERR;
3873 }
3874 dumpsize = atoi(buf+1);
3875 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
3876 /* Read the bulk write data on a temp file */
3877 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
3878 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
3879 if (dfd == -1) {
3880 close(fd);
3881 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
3882 return REDIS_ERR;
3883 }
3884 while(dumpsize) {
3885 int nread, nwritten;
3886
3887 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
3888 if (nread == -1) {
3889 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
3890 strerror(errno));
3891 close(fd);
3892 close(dfd);
3893 return REDIS_ERR;
3894 }
3895 nwritten = write(dfd,buf,nread);
3896 if (nwritten == -1) {
3897 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
3898 close(fd);
3899 close(dfd);
3900 return REDIS_ERR;
3901 }
3902 dumpsize -= nread;
3903 }
3904 close(dfd);
3905 if (rename(tmpfile,server.dbfilename) == -1) {
3906 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
3907 unlink(tmpfile);
3908 close(fd);
3909 return REDIS_ERR;
3910 }
3911 emptyDb();
3912 if (rdbLoad(server.dbfilename) != REDIS_OK) {
3913 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
3914 close(fd);
3915 return REDIS_ERR;
3916 }
3917 server.master = createClient(fd);
3918 server.master->flags |= REDIS_MASTER;
3919 server.replstate = REDIS_REPL_CONNECTED;
3920 return REDIS_OK;
3921}
3922
3923static void slaveofCommand(redisClient *c) {
3924 if (!strcasecmp(c->argv[1]->ptr,"no") &&
3925 !strcasecmp(c->argv[2]->ptr,"one")) {
3926 if (server.masterhost) {
3927 sdsfree(server.masterhost);
3928 server.masterhost = NULL;
3929 if (server.master) freeClient(server.master);
3930 server.replstate = REDIS_REPL_NONE;
3931 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
3932 }
3933 } else {
3934 sdsfree(server.masterhost);
3935 server.masterhost = sdsdup(c->argv[1]->ptr);
3936 server.masterport = atoi(c->argv[2]->ptr);
3937 if (server.master) freeClient(server.master);
3938 server.replstate = REDIS_REPL_CONNECT;
3939 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
3940 server.masterhost, server.masterport);
3941 }
3942 addReply(c,shared.ok);
3943}
3944
3945/* =================================== Main! ================================ */
3946
3947#ifdef __linux__
3948int linuxOvercommitMemoryValue(void) {
3949 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
3950 char buf[64];
3951
3952 if (!fp) return -1;
3953 if (fgets(buf,64,fp) == NULL) {
3954 fclose(fp);
3955 return -1;
3956 }
3957 fclose(fp);
3958
3959 return atoi(buf);
3960}
3961
3962void linuxOvercommitMemoryWarning(void) {
3963 if (linuxOvercommitMemoryValue() == 0) {
3964 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'echo 1 > /proc/sys/vm/overcommit_memory' in your init scripts.");
3965 }
3966}
3967#endif /* __linux__ */
3968
3969static void daemonize(void) {
3970 int fd;
3971 FILE *fp;
3972
3973 if (fork() != 0) exit(0); /* parent exits */
3974 setsid(); /* create a new session */
3975
3976 /* Every output goes to /dev/null. If Redis is daemonized but
3977 * the 'logfile' is set to 'stdout' in the configuration file
3978 * it will not log at all. */
3979 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
3980 dup2(fd, STDIN_FILENO);
3981 dup2(fd, STDOUT_FILENO);
3982 dup2(fd, STDERR_FILENO);
3983 if (fd > STDERR_FILENO) close(fd);
3984 }
3985 /* Try to write the pid file */
3986 fp = fopen(server.pidfile,"w");
3987 if (fp) {
3988 fprintf(fp,"%d\n",getpid());
3989 fclose(fp);
3990 }
3991}
3992
3993int main(int argc, char **argv) {
3994#ifdef __linux__
3995 linuxOvercommitMemoryWarning();
3996#endif
3997
3998 initServerConfig();
3999 if (argc == 2) {
4000 ResetServerSaveParams();
4001 loadServerConfig(argv[1]);
4002 } else if (argc > 2) {
4003 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
4004 exit(1);
4005 }
4006 initServer();
4007 if (server.daemonize) daemonize();
4008 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
4009 if (rdbLoad(server.dbfilename) == REDIS_OK)
4010 redisLog(REDIS_NOTICE,"DB loaded from disk");
4011 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
4012 acceptHandler, NULL, NULL) == AE_ERR) oom("creating file event");
4013 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
4014 aeMain(server.el);
4015 aeDeleteEventLoop(server.el);
4016 return 0;
4017}