]> git.saurik.com Git - redis.git/blob - redis.c
f5fb5e45586e5ea8aa40a9cab9a20810ff5bf2ed
[redis.git] / redis.c
1 /*
2 * Copyright (c) 2006-2009, Salvatore Sanfilippo <antirez at gmail dot com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Redis nor the names of its contributors may be used
14 * to endorse or promote products derived from this software without
15 * specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #define REDIS_VERSION "1.050"
31
32 #include "fmacros.h"
33 #include "config.h"
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #define __USE_POSIX199309
41 #include <signal.h>
42
43 #ifdef HAVE_BACKTRACE
44 #include <execinfo.h>
45 #include <ucontext.h>
46 #endif /* HAVE_BACKTRACE */
47
48 #include <sys/wait.h>
49 #include <errno.h>
50 #include <assert.h>
51 #include <ctype.h>
52 #include <stdarg.h>
53 #include <inttypes.h>
54 #include <arpa/inet.h>
55 #include <sys/stat.h>
56 #include <fcntl.h>
57 #include <sys/time.h>
58 #include <sys/resource.h>
59 #include <limits.h>
60 #include <math.h>
61
62 #if defined(__sun)
63 #include "solarisfixes.h"
64 #endif
65
66 #include "redis.h"
67 #include "ae.h" /* Event driven programming library */
68 #include "sds.h" /* Dynamic safe strings */
69 #include "anet.h" /* Networking the easy way */
70 #include "dict.h" /* Hash tables */
71 #include "adlist.h" /* Linked lists */
72 #include "zmalloc.h" /* total memory usage aware version of malloc/free */
73 #include "lzf.h" /* LZF compression library */
74 #include "pqsort.h" /* Partial qsort for SORT+LIMIT */
75
76 /* Error codes */
77 #define REDIS_OK 0
78 #define REDIS_ERR -1
79
80 /* Static server configuration */
81 #define REDIS_SERVERPORT 6379 /* TCP port */
82 #define REDIS_MAXIDLETIME (60*5) /* default client timeout */
83 #define REDIS_IOBUF_LEN 1024
84 #define REDIS_LOADBUF_LEN 1024
85 #define REDIS_STATIC_ARGS 4
86 #define REDIS_DEFAULT_DBNUM 16
87 #define REDIS_CONFIGLINE_MAX 1024
88 #define REDIS_OBJFREELIST_MAX 1000000 /* Max number of objects to cache */
89 #define REDIS_MAX_SYNC_TIME 60 /* Slave can't take more to sync */
90 #define REDIS_EXPIRELOOKUPS_PER_CRON 100 /* try to expire 100 keys/second */
91 #define REDIS_MAX_WRITE_PER_EVENT (1024*64)
92 #define REDIS_REQUEST_MAX_SIZE (1024*1024*256) /* max bytes in inline command */
93
94 /* Hash table parameters */
95 #define REDIS_HT_MINFILL 10 /* Minimal hash table fill 10% */
96
97 /* Command flags */
98 #define REDIS_CMD_BULK 1 /* Bulk write command */
99 #define REDIS_CMD_INLINE 2 /* Inline command */
100 /* REDIS_CMD_DENYOOM reserves a longer comment: all the commands marked with
101 this flags will return an error when the 'maxmemory' option is set in the
102 config file and the server is using more than maxmemory bytes of memory.
103 In short this commands are denied on low memory conditions. */
104 #define REDIS_CMD_DENYOOM 4
105
106 /* Object types */
107 #define REDIS_STRING 0
108 #define REDIS_LIST 1
109 #define REDIS_SET 2
110 #define REDIS_ZSET 3
111 #define REDIS_HASH 4
112
113 /* Objects encoding */
114 #define REDIS_ENCODING_RAW 0 /* Raw representation */
115 #define REDIS_ENCODING_INT 1 /* Encoded as integer */
116
117 /* Object types only used for dumping to disk */
118 #define REDIS_EXPIRETIME 253
119 #define REDIS_SELECTDB 254
120 #define REDIS_EOF 255
121
122 /* Defines related to the dump file format. To store 32 bits lengths for short
123 * keys requires a lot of space, so we check the most significant 2 bits of
124 * the first byte to interpreter the length:
125 *
126 * 00|000000 => if the two MSB are 00 the len is the 6 bits of this byte
127 * 01|000000 00000000 => 01, the len is 14 byes, 6 bits + 8 bits of next byte
128 * 10|000000 [32 bit integer] => if it's 01, a full 32 bit len will follow
129 * 11|000000 this means: specially encoded object will follow. The six bits
130 * number specify the kind of object that follows.
131 * See the REDIS_RDB_ENC_* defines.
132 *
133 * Lenghts up to 63 are stored using a single byte, most DB keys, and may
134 * values, will fit inside. */
135 #define REDIS_RDB_6BITLEN 0
136 #define REDIS_RDB_14BITLEN 1
137 #define REDIS_RDB_32BITLEN 2
138 #define REDIS_RDB_ENCVAL 3
139 #define REDIS_RDB_LENERR UINT_MAX
140
141 /* When a length of a string object stored on disk has the first two bits
142 * set, the remaining two bits specify a special encoding for the object
143 * accordingly to the following defines: */
144 #define REDIS_RDB_ENC_INT8 0 /* 8 bit signed integer */
145 #define REDIS_RDB_ENC_INT16 1 /* 16 bit signed integer */
146 #define REDIS_RDB_ENC_INT32 2 /* 32 bit signed integer */
147 #define REDIS_RDB_ENC_LZF 3 /* string compressed with FASTLZ */
148
149 /* Client flags */
150 #define REDIS_CLOSE 1 /* This client connection should be closed ASAP */
151 #define REDIS_SLAVE 2 /* This client is a slave server */
152 #define REDIS_MASTER 4 /* This client is a master server */
153 #define REDIS_MONITOR 8 /* This client is a slave monitor, see MONITOR */
154
155 /* Slave replication state - slave side */
156 #define REDIS_REPL_NONE 0 /* No active replication */
157 #define REDIS_REPL_CONNECT 1 /* Must connect to master */
158 #define REDIS_REPL_CONNECTED 2 /* Connected to master */
159
160 /* Slave replication state - from the point of view of master
161 * Note that in SEND_BULK and ONLINE state the slave receives new updates
162 * in its output queue. In the WAIT_BGSAVE state instead the server is waiting
163 * to start the next background saving in order to send updates to it. */
164 #define REDIS_REPL_WAIT_BGSAVE_START 3 /* master waits bgsave to start feeding it */
165 #define REDIS_REPL_WAIT_BGSAVE_END 4 /* master waits bgsave to start bulk DB transmission */
166 #define REDIS_REPL_SEND_BULK 5 /* master is sending the bulk DB */
167 #define REDIS_REPL_ONLINE 6 /* bulk DB already transmitted, receive updates */
168
169 /* List related stuff */
170 #define REDIS_HEAD 0
171 #define REDIS_TAIL 1
172
173 /* Sort operations */
174 #define REDIS_SORT_GET 0
175 #define REDIS_SORT_ASC 1
176 #define REDIS_SORT_DESC 2
177 #define REDIS_SORTKEY_MAX 1024
178
179 /* Log levels */
180 #define REDIS_DEBUG 0
181 #define REDIS_NOTICE 1
182 #define REDIS_WARNING 2
183
184 /* Anti-warning macro... */
185 #define REDIS_NOTUSED(V) ((void) V)
186
187 #define ZSKIPLIST_MAXLEVEL 32 /* Should be enough for 2^32 elements */
188 #define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
189
190 /* Append only defines */
191 #define APPENDFSYNC_NO 0
192 #define APPENDFSYNC_ALWAYS 1
193 #define APPENDFSYNC_EVERYSEC 2
194
195 /*================================= Data types ============================== */
196
197 /* A redis object, that is a type able to hold a string / list / set */
198 typedef struct redisObject {
199 void *ptr;
200 unsigned char type;
201 unsigned char encoding;
202 unsigned char notused[2];
203 int refcount;
204 } robj;
205
206 typedef struct redisDb {
207 dict *dict;
208 dict *expires;
209 int id;
210 } redisDb;
211
212 /* With multiplexing we need to take per-clinet state.
213 * Clients are taken in a liked list. */
214 typedef struct redisClient {
215 int fd;
216 redisDb *db;
217 int dictid;
218 sds querybuf;
219 robj **argv, **mbargv;
220 int argc, mbargc;
221 int bulklen; /* bulk read len. -1 if not in bulk read mode */
222 int multibulk; /* multi bulk command format active */
223 list *reply;
224 int sentlen;
225 time_t lastinteraction; /* time of the last interaction, used for timeout */
226 int flags; /* REDIS_CLOSE | REDIS_SLAVE | REDIS_MONITOR */
227 int slaveseldb; /* slave selected db, if this client is a slave */
228 int authenticated; /* when requirepass is non-NULL */
229 int replstate; /* replication state if this is a slave */
230 int repldbfd; /* replication DB file descriptor */
231 long repldboff; /* replication DB file offset */
232 off_t repldbsize; /* replication DB file size */
233 } redisClient;
234
235 struct saveparam {
236 time_t seconds;
237 int changes;
238 };
239
240 /* Global server state structure */
241 struct redisServer {
242 int port;
243 int fd;
244 redisDb *db;
245 dict *sharingpool;
246 unsigned int sharingpoolsize;
247 long long dirty; /* changes to DB from the last save */
248 list *clients;
249 list *slaves, *monitors;
250 char neterr[ANET_ERR_LEN];
251 aeEventLoop *el;
252 int cronloops; /* number of times the cron function run */
253 list *objfreelist; /* A list of freed objects to avoid malloc() */
254 time_t lastsave; /* Unix time of last save succeeede */
255 size_t usedmemory; /* Used memory in megabytes */
256 /* Fields used only for stats */
257 time_t stat_starttime; /* server start time */
258 long long stat_numcommands; /* number of processed commands */
259 long long stat_numconnections; /* number of connections received */
260 /* Configuration */
261 int verbosity;
262 int glueoutputbuf;
263 int maxidletime;
264 int dbnum;
265 int daemonize;
266 int appendonly;
267 int appendfsync;
268 time_t lastfsync;
269 int appendfd;
270 int appendseldb;
271 char *pidfile;
272 int bgsaveinprogress;
273 pid_t bgsavechildpid;
274 struct saveparam *saveparams;
275 int saveparamslen;
276 char *logfile;
277 char *bindaddr;
278 char *dbfilename;
279 char *appendfilename;
280 char *requirepass;
281 int shareobjects;
282 /* Replication related */
283 int isslave;
284 char *masterhost;
285 int masterport;
286 redisClient *master; /* client that is master for this slave */
287 int replstate;
288 unsigned int maxclients;
289 unsigned long maxmemory;
290 /* Sort parameters - qsort_r() is only available under BSD so we
291 * have to take this state global, in order to pass it to sortCompare() */
292 int sort_desc;
293 int sort_alpha;
294 int sort_bypattern;
295 };
296
297 typedef void redisCommandProc(redisClient *c);
298 struct redisCommand {
299 char *name;
300 redisCommandProc *proc;
301 int arity;
302 int flags;
303 };
304
305 struct redisFunctionSym {
306 char *name;
307 unsigned long pointer;
308 };
309
310 typedef struct _redisSortObject {
311 robj *obj;
312 union {
313 double score;
314 robj *cmpobj;
315 } u;
316 } redisSortObject;
317
318 typedef struct _redisSortOperation {
319 int type;
320 robj *pattern;
321 } redisSortOperation;
322
323 /* ZSETs use a specialized version of Skiplists */
324
325 typedef struct zskiplistNode {
326 struct zskiplistNode **forward;
327 struct zskiplistNode *backward;
328 double score;
329 robj *obj;
330 } zskiplistNode;
331
332 typedef struct zskiplist {
333 struct zskiplistNode *header, *tail;
334 unsigned long length;
335 int level;
336 } zskiplist;
337
338 typedef struct zset {
339 dict *dict;
340 zskiplist *zsl;
341 } zset;
342
343 /* Our shared "common" objects */
344
345 struct sharedObjectsStruct {
346 robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
347 *colon, *nullbulk, *nullmultibulk,
348 *emptymultibulk, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
349 *outofrangeerr, *plus,
350 *select0, *select1, *select2, *select3, *select4,
351 *select5, *select6, *select7, *select8, *select9;
352 } shared;
353
354 /* Global vars that are actally used as constants. The following double
355 * values are used for double on-disk serialization, and are initialized
356 * at runtime to avoid strange compiler optimizations. */
357
358 static double R_Zero, R_PosInf, R_NegInf, R_Nan;
359
360 /*================================ Prototypes =============================== */
361
362 static void freeStringObject(robj *o);
363 static void freeListObject(robj *o);
364 static void freeSetObject(robj *o);
365 static void decrRefCount(void *o);
366 static robj *createObject(int type, void *ptr);
367 static void freeClient(redisClient *c);
368 static int rdbLoad(char *filename);
369 static void addReply(redisClient *c, robj *obj);
370 static void addReplySds(redisClient *c, sds s);
371 static void incrRefCount(robj *o);
372 static int rdbSaveBackground(char *filename);
373 static robj *createStringObject(char *ptr, size_t len);
374 static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc);
375 static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
376 static int syncWithMaster(void);
377 static robj *tryObjectSharing(robj *o);
378 static int tryObjectEncoding(robj *o);
379 static robj *getDecodedObject(const robj *o);
380 static int removeExpire(redisDb *db, robj *key);
381 static int expireIfNeeded(redisDb *db, robj *key);
382 static int deleteIfVolatile(redisDb *db, robj *key);
383 static int deleteKey(redisDb *db, robj *key);
384 static time_t getExpire(redisDb *db, robj *key);
385 static int setExpire(redisDb *db, robj *key, time_t when);
386 static void updateSlavesWaitingBgsave(int bgsaveerr);
387 static void freeMemoryIfNeeded(void);
388 static int processCommand(redisClient *c);
389 static void setupSigSegvAction(void);
390 static void rdbRemoveTempFile(pid_t childpid);
391 static size_t stringObjectLen(robj *o);
392 static void processInputBuffer(redisClient *c);
393 static zskiplist *zslCreate(void);
394 static void zslFree(zskiplist *zsl);
395 static void zslInsert(zskiplist *zsl, double score, robj *obj);
396
397 static void authCommand(redisClient *c);
398 static void pingCommand(redisClient *c);
399 static void echoCommand(redisClient *c);
400 static void setCommand(redisClient *c);
401 static void setnxCommand(redisClient *c);
402 static void getCommand(redisClient *c);
403 static void delCommand(redisClient *c);
404 static void existsCommand(redisClient *c);
405 static void incrCommand(redisClient *c);
406 static void decrCommand(redisClient *c);
407 static void incrbyCommand(redisClient *c);
408 static void decrbyCommand(redisClient *c);
409 static void selectCommand(redisClient *c);
410 static void randomkeyCommand(redisClient *c);
411 static void keysCommand(redisClient *c);
412 static void dbsizeCommand(redisClient *c);
413 static void lastsaveCommand(redisClient *c);
414 static void saveCommand(redisClient *c);
415 static void bgsaveCommand(redisClient *c);
416 static void shutdownCommand(redisClient *c);
417 static void moveCommand(redisClient *c);
418 static void renameCommand(redisClient *c);
419 static void renamenxCommand(redisClient *c);
420 static void lpushCommand(redisClient *c);
421 static void rpushCommand(redisClient *c);
422 static void lpopCommand(redisClient *c);
423 static void rpopCommand(redisClient *c);
424 static void llenCommand(redisClient *c);
425 static void lindexCommand(redisClient *c);
426 static void lrangeCommand(redisClient *c);
427 static void ltrimCommand(redisClient *c);
428 static void typeCommand(redisClient *c);
429 static void lsetCommand(redisClient *c);
430 static void saddCommand(redisClient *c);
431 static void sremCommand(redisClient *c);
432 static void smoveCommand(redisClient *c);
433 static void sismemberCommand(redisClient *c);
434 static void scardCommand(redisClient *c);
435 static void spopCommand(redisClient *c);
436 static void srandmemberCommand(redisClient *c);
437 static void sinterCommand(redisClient *c);
438 static void sinterstoreCommand(redisClient *c);
439 static void sunionCommand(redisClient *c);
440 static void sunionstoreCommand(redisClient *c);
441 static void sdiffCommand(redisClient *c);
442 static void sdiffstoreCommand(redisClient *c);
443 static void syncCommand(redisClient *c);
444 static void flushdbCommand(redisClient *c);
445 static void flushallCommand(redisClient *c);
446 static void sortCommand(redisClient *c);
447 static void lremCommand(redisClient *c);
448 static void infoCommand(redisClient *c);
449 static void mgetCommand(redisClient *c);
450 static void monitorCommand(redisClient *c);
451 static void expireCommand(redisClient *c);
452 static void expireatCommand(redisClient *c);
453 static void getsetCommand(redisClient *c);
454 static void ttlCommand(redisClient *c);
455 static void slaveofCommand(redisClient *c);
456 static void debugCommand(redisClient *c);
457 static void msetCommand(redisClient *c);
458 static void msetnxCommand(redisClient *c);
459 static void zaddCommand(redisClient *c);
460 static void zrangeCommand(redisClient *c);
461 static void zrangebyscoreCommand(redisClient *c);
462 static void zrevrangeCommand(redisClient *c);
463 static void zcardCommand(redisClient *c);
464 static void zremCommand(redisClient *c);
465 static void zscoreCommand(redisClient *c);
466 static void zremrangebyscoreCommand(redisClient *c);
467
468 /*================================= Globals ================================= */
469
470 /* Global vars */
471 static struct redisServer server; /* server global state */
472 static struct redisCommand cmdTable[] = {
473 {"get",getCommand,2,REDIS_CMD_INLINE},
474 {"set",setCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
475 {"setnx",setnxCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
476 {"del",delCommand,-2,REDIS_CMD_INLINE},
477 {"exists",existsCommand,2,REDIS_CMD_INLINE},
478 {"incr",incrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
479 {"decr",decrCommand,2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
480 {"mget",mgetCommand,-2,REDIS_CMD_INLINE},
481 {"rpush",rpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
482 {"lpush",lpushCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
483 {"rpop",rpopCommand,2,REDIS_CMD_INLINE},
484 {"lpop",lpopCommand,2,REDIS_CMD_INLINE},
485 {"llen",llenCommand,2,REDIS_CMD_INLINE},
486 {"lindex",lindexCommand,3,REDIS_CMD_INLINE},
487 {"lset",lsetCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
488 {"lrange",lrangeCommand,4,REDIS_CMD_INLINE},
489 {"ltrim",ltrimCommand,4,REDIS_CMD_INLINE},
490 {"lrem",lremCommand,4,REDIS_CMD_BULK},
491 {"sadd",saddCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
492 {"srem",sremCommand,3,REDIS_CMD_BULK},
493 {"smove",smoveCommand,4,REDIS_CMD_BULK},
494 {"sismember",sismemberCommand,3,REDIS_CMD_BULK},
495 {"scard",scardCommand,2,REDIS_CMD_INLINE},
496 {"spop",spopCommand,2,REDIS_CMD_INLINE},
497 {"srandmember",srandmemberCommand,2,REDIS_CMD_INLINE},
498 {"sinter",sinterCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
499 {"sinterstore",sinterstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
500 {"sunion",sunionCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
501 {"sunionstore",sunionstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
502 {"sdiff",sdiffCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
503 {"sdiffstore",sdiffstoreCommand,-3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
504 {"smembers",sinterCommand,2,REDIS_CMD_INLINE},
505 {"zadd",zaddCommand,4,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
506 {"zrem",zremCommand,3,REDIS_CMD_BULK},
507 {"zremrangebyscore",zremrangebyscoreCommand,4,REDIS_CMD_INLINE},
508 {"zrange",zrangeCommand,4,REDIS_CMD_INLINE},
509 {"zrangebyscore",zrangebyscoreCommand,4,REDIS_CMD_INLINE},
510 {"zrevrange",zrevrangeCommand,4,REDIS_CMD_INLINE},
511 {"zcard",zcardCommand,2,REDIS_CMD_INLINE},
512 {"zscore",zscoreCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
513 {"incrby",incrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
514 {"decrby",decrbyCommand,3,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
515 {"getset",getsetCommand,3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
516 {"mset",msetCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
517 {"msetnx",msetnxCommand,-3,REDIS_CMD_BULK|REDIS_CMD_DENYOOM},
518 {"randomkey",randomkeyCommand,1,REDIS_CMD_INLINE},
519 {"select",selectCommand,2,REDIS_CMD_INLINE},
520 {"move",moveCommand,3,REDIS_CMD_INLINE},
521 {"rename",renameCommand,3,REDIS_CMD_INLINE},
522 {"renamenx",renamenxCommand,3,REDIS_CMD_INLINE},
523 {"expire",expireCommand,3,REDIS_CMD_INLINE},
524 {"expireat",expireatCommand,3,REDIS_CMD_INLINE},
525 {"keys",keysCommand,2,REDIS_CMD_INLINE},
526 {"dbsize",dbsizeCommand,1,REDIS_CMD_INLINE},
527 {"auth",authCommand,2,REDIS_CMD_INLINE},
528 {"ping",pingCommand,1,REDIS_CMD_INLINE},
529 {"echo",echoCommand,2,REDIS_CMD_BULK},
530 {"save",saveCommand,1,REDIS_CMD_INLINE},
531 {"bgsave",bgsaveCommand,1,REDIS_CMD_INLINE},
532 {"shutdown",shutdownCommand,1,REDIS_CMD_INLINE},
533 {"lastsave",lastsaveCommand,1,REDIS_CMD_INLINE},
534 {"type",typeCommand,2,REDIS_CMD_INLINE},
535 {"sync",syncCommand,1,REDIS_CMD_INLINE},
536 {"flushdb",flushdbCommand,1,REDIS_CMD_INLINE},
537 {"flushall",flushallCommand,1,REDIS_CMD_INLINE},
538 {"sort",sortCommand,-2,REDIS_CMD_INLINE|REDIS_CMD_DENYOOM},
539 {"info",infoCommand,1,REDIS_CMD_INLINE},
540 {"monitor",monitorCommand,1,REDIS_CMD_INLINE},
541 {"ttl",ttlCommand,2,REDIS_CMD_INLINE},
542 {"slaveof",slaveofCommand,3,REDIS_CMD_INLINE},
543 {"debug",debugCommand,-2,REDIS_CMD_INLINE},
544 {NULL,NULL,0,0}
545 };
546 /*============================ Utility functions ============================ */
547
548 /* Glob-style pattern matching. */
549 int stringmatchlen(const char *pattern, int patternLen,
550 const char *string, int stringLen, int nocase)
551 {
552 while(patternLen) {
553 switch(pattern[0]) {
554 case '*':
555 while (pattern[1] == '*') {
556 pattern++;
557 patternLen--;
558 }
559 if (patternLen == 1)
560 return 1; /* match */
561 while(stringLen) {
562 if (stringmatchlen(pattern+1, patternLen-1,
563 string, stringLen, nocase))
564 return 1; /* match */
565 string++;
566 stringLen--;
567 }
568 return 0; /* no match */
569 break;
570 case '?':
571 if (stringLen == 0)
572 return 0; /* no match */
573 string++;
574 stringLen--;
575 break;
576 case '[':
577 {
578 int not, match;
579
580 pattern++;
581 patternLen--;
582 not = pattern[0] == '^';
583 if (not) {
584 pattern++;
585 patternLen--;
586 }
587 match = 0;
588 while(1) {
589 if (pattern[0] == '\\') {
590 pattern++;
591 patternLen--;
592 if (pattern[0] == string[0])
593 match = 1;
594 } else if (pattern[0] == ']') {
595 break;
596 } else if (patternLen == 0) {
597 pattern--;
598 patternLen++;
599 break;
600 } else if (pattern[1] == '-' && patternLen >= 3) {
601 int start = pattern[0];
602 int end = pattern[2];
603 int c = string[0];
604 if (start > end) {
605 int t = start;
606 start = end;
607 end = t;
608 }
609 if (nocase) {
610 start = tolower(start);
611 end = tolower(end);
612 c = tolower(c);
613 }
614 pattern += 2;
615 patternLen -= 2;
616 if (c >= start && c <= end)
617 match = 1;
618 } else {
619 if (!nocase) {
620 if (pattern[0] == string[0])
621 match = 1;
622 } else {
623 if (tolower((int)pattern[0]) == tolower((int)string[0]))
624 match = 1;
625 }
626 }
627 pattern++;
628 patternLen--;
629 }
630 if (not)
631 match = !match;
632 if (!match)
633 return 0; /* no match */
634 string++;
635 stringLen--;
636 break;
637 }
638 case '\\':
639 if (patternLen >= 2) {
640 pattern++;
641 patternLen--;
642 }
643 /* fall through */
644 default:
645 if (!nocase) {
646 if (pattern[0] != string[0])
647 return 0; /* no match */
648 } else {
649 if (tolower((int)pattern[0]) != tolower((int)string[0]))
650 return 0; /* no match */
651 }
652 string++;
653 stringLen--;
654 break;
655 }
656 pattern++;
657 patternLen--;
658 if (stringLen == 0) {
659 while(*pattern == '*') {
660 pattern++;
661 patternLen--;
662 }
663 break;
664 }
665 }
666 if (patternLen == 0 && stringLen == 0)
667 return 1;
668 return 0;
669 }
670
671 static void redisLog(int level, const char *fmt, ...) {
672 va_list ap;
673 FILE *fp;
674
675 fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a");
676 if (!fp) return;
677
678 va_start(ap, fmt);
679 if (level >= server.verbosity) {
680 char *c = ".-*";
681 char buf[64];
682 time_t now;
683
684 now = time(NULL);
685 strftime(buf,64,"%d %b %H:%M:%S",localtime(&now));
686 fprintf(fp,"%s %c ",buf,c[level]);
687 vfprintf(fp, fmt, ap);
688 fprintf(fp,"\n");
689 fflush(fp);
690 }
691 va_end(ap);
692
693 if (server.logfile) fclose(fp);
694 }
695
696 /*====================== Hash table type implementation ==================== */
697
698 /* This is an hash table type that uses the SDS dynamic strings libary as
699 * keys and radis objects as values (objects can hold SDS strings,
700 * lists, sets). */
701
702 static void dictVanillaFree(void *privdata, void *val)
703 {
704 DICT_NOTUSED(privdata);
705 zfree(val);
706 }
707
708 static int sdsDictKeyCompare(void *privdata, const void *key1,
709 const void *key2)
710 {
711 int l1,l2;
712 DICT_NOTUSED(privdata);
713
714 l1 = sdslen((sds)key1);
715 l2 = sdslen((sds)key2);
716 if (l1 != l2) return 0;
717 return memcmp(key1, key2, l1) == 0;
718 }
719
720 static void dictRedisObjectDestructor(void *privdata, void *val)
721 {
722 DICT_NOTUSED(privdata);
723
724 decrRefCount(val);
725 }
726
727 static int dictObjKeyCompare(void *privdata, const void *key1,
728 const void *key2)
729 {
730 const robj *o1 = key1, *o2 = key2;
731 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
732 }
733
734 static unsigned int dictObjHash(const void *key) {
735 const robj *o = key;
736 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
737 }
738
739 static int dictEncObjKeyCompare(void *privdata, const void *key1,
740 const void *key2)
741 {
742 const robj *o1 = key1, *o2 = key2;
743
744 if (o1->encoding == REDIS_ENCODING_RAW &&
745 o2->encoding == REDIS_ENCODING_RAW)
746 return sdsDictKeyCompare(privdata,o1->ptr,o2->ptr);
747 else {
748 robj *dec1, *dec2;
749 int cmp;
750
751 dec1 = o1->encoding != REDIS_ENCODING_RAW ?
752 getDecodedObject(o1) : (robj*)o1;
753 dec2 = o2->encoding != REDIS_ENCODING_RAW ?
754 getDecodedObject(o2) : (robj*)o2;
755 cmp = sdsDictKeyCompare(privdata,dec1->ptr,dec2->ptr);
756 if (dec1 != o1) decrRefCount(dec1);
757 if (dec2 != o2) decrRefCount(dec2);
758 return cmp;
759 }
760 }
761
762 static unsigned int dictEncObjHash(const void *key) {
763 const robj *o = key;
764
765 if (o->encoding == REDIS_ENCODING_RAW)
766 return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
767 else {
768 robj *dec = getDecodedObject(o);
769 unsigned int hash = dictGenHashFunction(dec->ptr, sdslen((sds)dec->ptr));
770 decrRefCount(dec);
771 return hash;
772 }
773 }
774
775 static dictType setDictType = {
776 dictEncObjHash, /* hash function */
777 NULL, /* key dup */
778 NULL, /* val dup */
779 dictEncObjKeyCompare, /* key compare */
780 dictRedisObjectDestructor, /* key destructor */
781 NULL /* val destructor */
782 };
783
784 static dictType zsetDictType = {
785 dictEncObjHash, /* hash function */
786 NULL, /* key dup */
787 NULL, /* val dup */
788 dictEncObjKeyCompare, /* key compare */
789 dictRedisObjectDestructor, /* key destructor */
790 dictVanillaFree /* val destructor */
791 };
792
793 static dictType hashDictType = {
794 dictObjHash, /* hash function */
795 NULL, /* key dup */
796 NULL, /* val dup */
797 dictObjKeyCompare, /* key compare */
798 dictRedisObjectDestructor, /* key destructor */
799 dictRedisObjectDestructor /* val destructor */
800 };
801
802 /* ========================= Random utility functions ======================= */
803
804 /* Redis generally does not try to recover from out of memory conditions
805 * when allocating objects or strings, it is not clear if it will be possible
806 * to report this condition to the client since the networking layer itself
807 * is based on heap allocation for send buffers, so we simply abort.
808 * At least the code will be simpler to read... */
809 static void oom(const char *msg) {
810 fprintf(stderr, "%s: Out of memory\n",msg);
811 fflush(stderr);
812 sleep(1);
813 abort();
814 }
815
816 /* ====================== Redis server networking stuff ===================== */
817 static void closeTimedoutClients(void) {
818 redisClient *c;
819 listNode *ln;
820 time_t now = time(NULL);
821
822 listRewind(server.clients);
823 while ((ln = listYield(server.clients)) != NULL) {
824 c = listNodeValue(ln);
825 if (!(c->flags & REDIS_SLAVE) && /* no timeout for slaves */
826 !(c->flags & REDIS_MASTER) && /* no timeout for masters */
827 (now - c->lastinteraction > server.maxidletime)) {
828 redisLog(REDIS_DEBUG,"Closing idle client");
829 freeClient(c);
830 }
831 }
832 }
833
834 static int htNeedsResize(dict *dict) {
835 long long size, used;
836
837 size = dictSlots(dict);
838 used = dictSize(dict);
839 return (size && used && size > DICT_HT_INITIAL_SIZE &&
840 (used*100/size < REDIS_HT_MINFILL));
841 }
842
843 /* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL
844 * we resize the hash table to save memory */
845 static void tryResizeHashTables(void) {
846 int j;
847
848 for (j = 0; j < server.dbnum; j++) {
849 if (htNeedsResize(server.db[j].dict)) {
850 redisLog(REDIS_DEBUG,"The hash table %d is too sparse, resize it...",j);
851 dictResize(server.db[j].dict);
852 redisLog(REDIS_DEBUG,"Hash table %d resized.",j);
853 }
854 if (htNeedsResize(server.db[j].expires))
855 dictResize(server.db[j].expires);
856 }
857 }
858
859 static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
860 int j, loops = server.cronloops++;
861 REDIS_NOTUSED(eventLoop);
862 REDIS_NOTUSED(id);
863 REDIS_NOTUSED(clientData);
864
865 /* Update the global state with the amount of used memory */
866 server.usedmemory = zmalloc_used_memory();
867
868 /* Show some info about non-empty databases */
869 for (j = 0; j < server.dbnum; j++) {
870 long long size, used, vkeys;
871
872 size = dictSlots(server.db[j].dict);
873 used = dictSize(server.db[j].dict);
874 vkeys = dictSize(server.db[j].expires);
875 if (!(loops % 5) && (used || vkeys)) {
876 redisLog(REDIS_DEBUG,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
877 /* dictPrintStats(server.dict); */
878 }
879 }
880
881 /* We don't want to resize the hash tables while a bacground saving
882 * is in progress: the saving child is created using fork() that is
883 * implemented with a copy-on-write semantic in most modern systems, so
884 * if we resize the HT while there is the saving child at work actually
885 * a lot of memory movements in the parent will cause a lot of pages
886 * copied. */
887 if (!server.bgsaveinprogress) tryResizeHashTables();
888
889 /* Show information about connected clients */
890 if (!(loops % 5)) {
891 redisLog(REDIS_DEBUG,"%d clients connected (%d slaves), %zu bytes in use, %d shared objects",
892 listLength(server.clients)-listLength(server.slaves),
893 listLength(server.slaves),
894 server.usedmemory,
895 dictSize(server.sharingpool));
896 }
897
898 /* Close connections of timedout clients */
899 if (server.maxidletime && !(loops % 10))
900 closeTimedoutClients();
901
902 /* Check if a background saving in progress terminated */
903 if (server.bgsaveinprogress) {
904 int statloc;
905 if (wait4(-1,&statloc,WNOHANG,NULL)) {
906 int exitcode = WEXITSTATUS(statloc);
907 int bysignal = WIFSIGNALED(statloc);
908
909 if (!bysignal && exitcode == 0) {
910 redisLog(REDIS_NOTICE,
911 "Background saving terminated with success");
912 server.dirty = 0;
913 server.lastsave = time(NULL);
914 } else if (!bysignal && exitcode != 0) {
915 redisLog(REDIS_WARNING, "Background saving error");
916 } else {
917 redisLog(REDIS_WARNING,
918 "Background saving terminated by signal");
919 rdbRemoveTempFile(server.bgsavechildpid);
920 }
921 server.bgsaveinprogress = 0;
922 server.bgsavechildpid = -1;
923 updateSlavesWaitingBgsave(exitcode == 0 ? REDIS_OK : REDIS_ERR);
924 }
925 } else {
926 /* If there is not a background saving in progress check if
927 * we have to save now */
928 time_t now = time(NULL);
929 for (j = 0; j < server.saveparamslen; j++) {
930 struct saveparam *sp = server.saveparams+j;
931
932 if (server.dirty >= sp->changes &&
933 now-server.lastsave > sp->seconds) {
934 redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...",
935 sp->changes, sp->seconds);
936 rdbSaveBackground(server.dbfilename);
937 break;
938 }
939 }
940 }
941
942 /* Try to expire a few timed out keys */
943 for (j = 0; j < server.dbnum; j++) {
944 redisDb *db = server.db+j;
945 int num = dictSize(db->expires);
946
947 if (num) {
948 time_t now = time(NULL);
949
950 if (num > REDIS_EXPIRELOOKUPS_PER_CRON)
951 num = REDIS_EXPIRELOOKUPS_PER_CRON;
952 while (num--) {
953 dictEntry *de;
954 time_t t;
955
956 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
957 t = (time_t) dictGetEntryVal(de);
958 if (now > t) {
959 deleteKey(db,dictGetEntryKey(de));
960 }
961 }
962 }
963 }
964
965 /* Check if we should connect to a MASTER */
966 if (server.replstate == REDIS_REPL_CONNECT) {
967 redisLog(REDIS_NOTICE,"Connecting to MASTER...");
968 if (syncWithMaster() == REDIS_OK) {
969 redisLog(REDIS_NOTICE,"MASTER <-> SLAVE sync succeeded");
970 }
971 }
972 return 1000;
973 }
974
975 static void createSharedObjects(void) {
976 shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n"));
977 shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n"));
978 shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n"));
979 shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n"));
980 shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n"));
981 shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n"));
982 shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n"));
983 shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n"));
984 shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n"));
985 /* no such key */
986 shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n"));
987 shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew(
988 "-ERR Operation against a key holding the wrong kind of value\r\n"));
989 shared.nokeyerr = createObject(REDIS_STRING,sdsnew(
990 "-ERR no such key\r\n"));
991 shared.syntaxerr = createObject(REDIS_STRING,sdsnew(
992 "-ERR syntax error\r\n"));
993 shared.sameobjecterr = createObject(REDIS_STRING,sdsnew(
994 "-ERR source and destination objects are the same\r\n"));
995 shared.outofrangeerr = createObject(REDIS_STRING,sdsnew(
996 "-ERR index out of range\r\n"));
997 shared.space = createObject(REDIS_STRING,sdsnew(" "));
998 shared.colon = createObject(REDIS_STRING,sdsnew(":"));
999 shared.plus = createObject(REDIS_STRING,sdsnew("+"));
1000 shared.select0 = createStringObject("select 0\r\n",10);
1001 shared.select1 = createStringObject("select 1\r\n",10);
1002 shared.select2 = createStringObject("select 2\r\n",10);
1003 shared.select3 = createStringObject("select 3\r\n",10);
1004 shared.select4 = createStringObject("select 4\r\n",10);
1005 shared.select5 = createStringObject("select 5\r\n",10);
1006 shared.select6 = createStringObject("select 6\r\n",10);
1007 shared.select7 = createStringObject("select 7\r\n",10);
1008 shared.select8 = createStringObject("select 8\r\n",10);
1009 shared.select9 = createStringObject("select 9\r\n",10);
1010 }
1011
1012 static void appendServerSaveParams(time_t seconds, int changes) {
1013 server.saveparams = zrealloc(server.saveparams,sizeof(struct saveparam)*(server.saveparamslen+1));
1014 server.saveparams[server.saveparamslen].seconds = seconds;
1015 server.saveparams[server.saveparamslen].changes = changes;
1016 server.saveparamslen++;
1017 }
1018
1019 static void ResetServerSaveParams() {
1020 zfree(server.saveparams);
1021 server.saveparams = NULL;
1022 server.saveparamslen = 0;
1023 }
1024
1025 static void initServerConfig() {
1026 server.dbnum = REDIS_DEFAULT_DBNUM;
1027 server.port = REDIS_SERVERPORT;
1028 server.verbosity = REDIS_DEBUG;
1029 server.maxidletime = REDIS_MAXIDLETIME;
1030 server.saveparams = NULL;
1031 server.logfile = NULL; /* NULL = log on standard output */
1032 server.bindaddr = NULL;
1033 server.glueoutputbuf = 1;
1034 server.daemonize = 0;
1035 server.appendonly = 0;
1036 server.appendfsync = APPENDFSYNC_ALWAYS;
1037 server.lastfsync = time(NULL);
1038 server.appendfd = -1;
1039 server.appendseldb = -1; /* Make sure the first time will not match */
1040 server.pidfile = "/var/run/redis.pid";
1041 server.dbfilename = "dump.rdb";
1042 server.appendfilename = "appendonly.log";
1043 server.requirepass = NULL;
1044 server.shareobjects = 0;
1045 server.sharingpoolsize = 1024;
1046 server.maxclients = 0;
1047 server.maxmemory = 0;
1048 ResetServerSaveParams();
1049
1050 appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */
1051 appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */
1052 appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1053 /* Replication related */
1054 server.isslave = 0;
1055 server.masterhost = NULL;
1056 server.masterport = 6379;
1057 server.master = NULL;
1058 server.replstate = REDIS_REPL_NONE;
1059
1060 /* Double constants initialization */
1061 R_Zero = 0.0;
1062 R_PosInf = 1.0/R_Zero;
1063 R_NegInf = -1.0/R_Zero;
1064 R_Nan = R_Zero/R_Zero;
1065 }
1066
1067 static void initServer() {
1068 int j;
1069
1070 signal(SIGHUP, SIG_IGN);
1071 signal(SIGPIPE, SIG_IGN);
1072 setupSigSegvAction();
1073
1074 server.clients = listCreate();
1075 server.slaves = listCreate();
1076 server.monitors = listCreate();
1077 server.objfreelist = listCreate();
1078 createSharedObjects();
1079 server.el = aeCreateEventLoop();
1080 server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1081 server.sharingpool = dictCreate(&setDictType,NULL);
1082 server.fd = anetTcpServer(server.neterr, server.port, server.bindaddr);
1083 if (server.fd == -1) {
1084 redisLog(REDIS_WARNING, "Opening TCP port: %s", server.neterr);
1085 exit(1);
1086 }
1087 for (j = 0; j < server.dbnum; j++) {
1088 server.db[j].dict = dictCreate(&hashDictType,NULL);
1089 server.db[j].expires = dictCreate(&setDictType,NULL);
1090 server.db[j].id = j;
1091 }
1092 server.cronloops = 0;
1093 server.bgsaveinprogress = 0;
1094 server.bgsavechildpid = -1;
1095 server.lastsave = time(NULL);
1096 server.dirty = 0;
1097 server.usedmemory = 0;
1098 server.stat_numcommands = 0;
1099 server.stat_numconnections = 0;
1100 server.stat_starttime = time(NULL);
1101 aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL);
1102
1103 if (server.appendonly) {
1104 server.appendfd = open(server.appendfilename,O_WRONLY|O_APPEND|O_CREAT,0644);
1105 if (server.appendfd == -1) {
1106 redisLog(REDIS_WARNING, "Can't open the append-only file: %s",
1107 strerror(errno));
1108 exit(1);
1109 }
1110 }
1111 }
1112
1113 /* Empty the whole database */
1114 static long long emptyDb() {
1115 int j;
1116 long long removed = 0;
1117
1118 for (j = 0; j < server.dbnum; j++) {
1119 removed += dictSize(server.db[j].dict);
1120 dictEmpty(server.db[j].dict);
1121 dictEmpty(server.db[j].expires);
1122 }
1123 return removed;
1124 }
1125
1126 static int yesnotoi(char *s) {
1127 if (!strcasecmp(s,"yes")) return 1;
1128 else if (!strcasecmp(s,"no")) return 0;
1129 else return -1;
1130 }
1131
1132 /* I agree, this is a very rudimental way to load a configuration...
1133 will improve later if the config gets more complex */
1134 static void loadServerConfig(char *filename) {
1135 FILE *fp;
1136 char buf[REDIS_CONFIGLINE_MAX+1], *err = NULL;
1137 int linenum = 0;
1138 sds line = NULL;
1139
1140 if (filename[0] == '-' && filename[1] == '\0')
1141 fp = stdin;
1142 else {
1143 if ((fp = fopen(filename,"r")) == NULL) {
1144 redisLog(REDIS_WARNING,"Fatal error, can't open config file");
1145 exit(1);
1146 }
1147 }
1148
1149 while(fgets(buf,REDIS_CONFIGLINE_MAX+1,fp) != NULL) {
1150 sds *argv;
1151 int argc, j;
1152
1153 linenum++;
1154 line = sdsnew(buf);
1155 line = sdstrim(line," \t\r\n");
1156
1157 /* Skip comments and blank lines*/
1158 if (line[0] == '#' || line[0] == '\0') {
1159 sdsfree(line);
1160 continue;
1161 }
1162
1163 /* Split into arguments */
1164 argv = sdssplitlen(line,sdslen(line)," ",1,&argc);
1165 sdstolower(argv[0]);
1166
1167 /* Execute config directives */
1168 if (!strcasecmp(argv[0],"timeout") && argc == 2) {
1169 server.maxidletime = atoi(argv[1]);
1170 if (server.maxidletime < 0) {
1171 err = "Invalid timeout value"; goto loaderr;
1172 }
1173 } else if (!strcasecmp(argv[0],"port") && argc == 2) {
1174 server.port = atoi(argv[1]);
1175 if (server.port < 1 || server.port > 65535) {
1176 err = "Invalid port"; goto loaderr;
1177 }
1178 } else if (!strcasecmp(argv[0],"bind") && argc == 2) {
1179 server.bindaddr = zstrdup(argv[1]);
1180 } else if (!strcasecmp(argv[0],"save") && argc == 3) {
1181 int seconds = atoi(argv[1]);
1182 int changes = atoi(argv[2]);
1183 if (seconds < 1 || changes < 0) {
1184 err = "Invalid save parameters"; goto loaderr;
1185 }
1186 appendServerSaveParams(seconds,changes);
1187 } else if (!strcasecmp(argv[0],"dir") && argc == 2) {
1188 if (chdir(argv[1]) == -1) {
1189 redisLog(REDIS_WARNING,"Can't chdir to '%s': %s",
1190 argv[1], strerror(errno));
1191 exit(1);
1192 }
1193 } else if (!strcasecmp(argv[0],"loglevel") && argc == 2) {
1194 if (!strcasecmp(argv[1],"debug")) server.verbosity = REDIS_DEBUG;
1195 else if (!strcasecmp(argv[1],"notice")) server.verbosity = REDIS_NOTICE;
1196 else if (!strcasecmp(argv[1],"warning")) server.verbosity = REDIS_WARNING;
1197 else {
1198 err = "Invalid log level. Must be one of debug, notice, warning";
1199 goto loaderr;
1200 }
1201 } else if (!strcasecmp(argv[0],"logfile") && argc == 2) {
1202 FILE *logfp;
1203
1204 server.logfile = zstrdup(argv[1]);
1205 if (!strcasecmp(server.logfile,"stdout")) {
1206 zfree(server.logfile);
1207 server.logfile = NULL;
1208 }
1209 if (server.logfile) {
1210 /* Test if we are able to open the file. The server will not
1211 * be able to abort just for this problem later... */
1212 logfp = fopen(server.logfile,"a");
1213 if (logfp == NULL) {
1214 err = sdscatprintf(sdsempty(),
1215 "Can't open the log file: %s", strerror(errno));
1216 goto loaderr;
1217 }
1218 fclose(logfp);
1219 }
1220 } else if (!strcasecmp(argv[0],"databases") && argc == 2) {
1221 server.dbnum = atoi(argv[1]);
1222 if (server.dbnum < 1) {
1223 err = "Invalid number of databases"; goto loaderr;
1224 }
1225 } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) {
1226 server.maxclients = atoi(argv[1]);
1227 } else if (!strcasecmp(argv[0],"maxmemory") && argc == 2) {
1228 server.maxmemory = strtoll(argv[1], NULL, 10);
1229 } else if (!strcasecmp(argv[0],"slaveof") && argc == 3) {
1230 server.masterhost = sdsnew(argv[1]);
1231 server.masterport = atoi(argv[2]);
1232 server.replstate = REDIS_REPL_CONNECT;
1233 } else if (!strcasecmp(argv[0],"glueoutputbuf") && argc == 2) {
1234 if ((server.glueoutputbuf = yesnotoi(argv[1])) == -1) {
1235 err = "argument must be 'yes' or 'no'"; goto loaderr;
1236 }
1237 } else if (!strcasecmp(argv[0],"shareobjects") && argc == 2) {
1238 if ((server.shareobjects = yesnotoi(argv[1])) == -1) {
1239 err = "argument must be 'yes' or 'no'"; goto loaderr;
1240 }
1241 } else if (!strcasecmp(argv[0],"shareobjectspoolsize") && argc == 2) {
1242 server.sharingpoolsize = atoi(argv[1]);
1243 if (server.sharingpoolsize < 1) {
1244 err = "invalid object sharing pool size"; goto loaderr;
1245 }
1246 } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) {
1247 if ((server.daemonize = yesnotoi(argv[1])) == -1) {
1248 err = "argument must be 'yes' or 'no'"; goto loaderr;
1249 }
1250 } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) {
1251 if ((server.appendonly = yesnotoi(argv[1])) == -1) {
1252 err = "argument must be 'yes' or 'no'"; goto loaderr;
1253 }
1254 } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) {
1255 if (!strcasecmp(argv[1],"no")) {
1256 server.appendfsync = APPENDFSYNC_NO;
1257 } else if (!strcasecmp(argv[1],"always")) {
1258 server.appendfsync = APPENDFSYNC_ALWAYS;
1259 } else if (!strcasecmp(argv[1],"everysec")) {
1260 server.appendfsync = APPENDFSYNC_EVERYSEC;
1261 } else {
1262 err = "argument must be 'no', 'always' or 'everysec'";
1263 goto loaderr;
1264 }
1265 } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) {
1266 server.requirepass = zstrdup(argv[1]);
1267 } else if (!strcasecmp(argv[0],"pidfile") && argc == 2) {
1268 server.pidfile = zstrdup(argv[1]);
1269 } else if (!strcasecmp(argv[0],"dbfilename") && argc == 2) {
1270 server.dbfilename = zstrdup(argv[1]);
1271 } else {
1272 err = "Bad directive or wrong number of arguments"; goto loaderr;
1273 }
1274 for (j = 0; j < argc; j++)
1275 sdsfree(argv[j]);
1276 zfree(argv);
1277 sdsfree(line);
1278 }
1279 if (fp != stdin) fclose(fp);
1280 return;
1281
1282 loaderr:
1283 fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR ***\n");
1284 fprintf(stderr, "Reading the configuration file, at line %d\n", linenum);
1285 fprintf(stderr, ">>> '%s'\n", line);
1286 fprintf(stderr, "%s\n", err);
1287 exit(1);
1288 }
1289
1290 static void freeClientArgv(redisClient *c) {
1291 int j;
1292
1293 for (j = 0; j < c->argc; j++)
1294 decrRefCount(c->argv[j]);
1295 for (j = 0; j < c->mbargc; j++)
1296 decrRefCount(c->mbargv[j]);
1297 c->argc = 0;
1298 c->mbargc = 0;
1299 }
1300
1301 static void freeClient(redisClient *c) {
1302 listNode *ln;
1303
1304 aeDeleteFileEvent(server.el,c->fd,AE_READABLE);
1305 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1306 sdsfree(c->querybuf);
1307 listRelease(c->reply);
1308 freeClientArgv(c);
1309 close(c->fd);
1310 ln = listSearchKey(server.clients,c);
1311 assert(ln != NULL);
1312 listDelNode(server.clients,ln);
1313 if (c->flags & REDIS_SLAVE) {
1314 if (c->replstate == REDIS_REPL_SEND_BULK && c->repldbfd != -1)
1315 close(c->repldbfd);
1316 list *l = (c->flags & REDIS_MONITOR) ? server.monitors : server.slaves;
1317 ln = listSearchKey(l,c);
1318 assert(ln != NULL);
1319 listDelNode(l,ln);
1320 }
1321 if (c->flags & REDIS_MASTER) {
1322 server.master = NULL;
1323 server.replstate = REDIS_REPL_CONNECT;
1324 }
1325 zfree(c->argv);
1326 zfree(c->mbargv);
1327 zfree(c);
1328 }
1329
1330 static void glueReplyBuffersIfNeeded(redisClient *c) {
1331 int totlen = 0;
1332 listNode *ln;
1333 robj *o;
1334
1335 listRewind(c->reply);
1336 while((ln = listYield(c->reply))) {
1337 o = ln->value;
1338 totlen += sdslen(o->ptr);
1339 /* This optimization makes more sense if we don't have to copy
1340 * too much data */
1341 if (totlen > 1024) return;
1342 }
1343 if (totlen > 0) {
1344 char buf[1024];
1345 int copylen = 0;
1346
1347 listRewind(c->reply);
1348 while((ln = listYield(c->reply))) {
1349 o = ln->value;
1350 memcpy(buf+copylen,o->ptr,sdslen(o->ptr));
1351 copylen += sdslen(o->ptr);
1352 listDelNode(c->reply,ln);
1353 }
1354 /* Now the output buffer is empty, add the new single element */
1355 o = createObject(REDIS_STRING,sdsnewlen(buf,totlen));
1356 listAddNodeTail(c->reply,o);
1357 }
1358 }
1359
1360 static void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1361 redisClient *c = privdata;
1362 int nwritten = 0, totwritten = 0, objlen;
1363 robj *o;
1364 REDIS_NOTUSED(el);
1365 REDIS_NOTUSED(mask);
1366
1367 if (server.glueoutputbuf && listLength(c->reply) > 1)
1368 glueReplyBuffersIfNeeded(c);
1369 while(listLength(c->reply)) {
1370 o = listNodeValue(listFirst(c->reply));
1371 objlen = sdslen(o->ptr);
1372
1373 if (objlen == 0) {
1374 listDelNode(c->reply,listFirst(c->reply));
1375 continue;
1376 }
1377
1378 if (c->flags & REDIS_MASTER) {
1379 /* Don't reply to a master */
1380 nwritten = objlen - c->sentlen;
1381 } else {
1382 nwritten = write(fd, ((char*)o->ptr)+c->sentlen, objlen - c->sentlen);
1383 if (nwritten <= 0) break;
1384 }
1385 c->sentlen += nwritten;
1386 totwritten += nwritten;
1387 /* If we fully sent the object on head go to the next one */
1388 if (c->sentlen == objlen) {
1389 listDelNode(c->reply,listFirst(c->reply));
1390 c->sentlen = 0;
1391 }
1392 /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
1393 * bytes, in a single threaded server it's a good idea to server
1394 * other clients as well, even if a very large request comes from
1395 * super fast link that is always able to accept data (in real world
1396 * terms think to 'KEYS *' against the loopback interfae) */
1397 if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
1398 }
1399 if (nwritten == -1) {
1400 if (errno == EAGAIN) {
1401 nwritten = 0;
1402 } else {
1403 redisLog(REDIS_DEBUG,
1404 "Error writing to client: %s", strerror(errno));
1405 freeClient(c);
1406 return;
1407 }
1408 }
1409 if (totwritten > 0) c->lastinteraction = time(NULL);
1410 if (listLength(c->reply) == 0) {
1411 c->sentlen = 0;
1412 aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);
1413 }
1414 }
1415
1416 static struct redisCommand *lookupCommand(char *name) {
1417 int j = 0;
1418 while(cmdTable[j].name != NULL) {
1419 if (!strcasecmp(name,cmdTable[j].name)) return &cmdTable[j];
1420 j++;
1421 }
1422 return NULL;
1423 }
1424
1425 /* resetClient prepare the client to process the next command */
1426 static void resetClient(redisClient *c) {
1427 freeClientArgv(c);
1428 c->bulklen = -1;
1429 c->multibulk = 0;
1430 }
1431
1432 /* If this function gets called we already read a whole
1433 * command, argments are in the client argv/argc fields.
1434 * processCommand() execute the command or prepare the
1435 * server for a bulk read from the client.
1436 *
1437 * If 1 is returned the client is still alive and valid and
1438 * and other operations can be performed by the caller. Otherwise
1439 * if 0 is returned the client was destroied (i.e. after QUIT). */
1440 static int processCommand(redisClient *c) {
1441 struct redisCommand *cmd;
1442 long long dirty;
1443
1444 /* Free some memory if needed (maxmemory setting) */
1445 if (server.maxmemory) freeMemoryIfNeeded();
1446
1447 /* Handle the multi bulk command type. This is an alternative protocol
1448 * supported by Redis in order to receive commands that are composed of
1449 * multiple binary-safe "bulk" arguments. The latency of processing is
1450 * a bit higher but this allows things like multi-sets, so if this
1451 * protocol is used only for MSET and similar commands this is a big win. */
1452 if (c->multibulk == 0 && c->argc == 1 && ((char*)(c->argv[0]->ptr))[0] == '*') {
1453 c->multibulk = atoi(((char*)c->argv[0]->ptr)+1);
1454 if (c->multibulk <= 0) {
1455 resetClient(c);
1456 return 1;
1457 } else {
1458 decrRefCount(c->argv[c->argc-1]);
1459 c->argc--;
1460 return 1;
1461 }
1462 } else if (c->multibulk) {
1463 if (c->bulklen == -1) {
1464 if (((char*)c->argv[0]->ptr)[0] != '$') {
1465 addReplySds(c,sdsnew("-ERR multi bulk protocol error\r\n"));
1466 resetClient(c);
1467 return 1;
1468 } else {
1469 int bulklen = atoi(((char*)c->argv[0]->ptr)+1);
1470 decrRefCount(c->argv[0]);
1471 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1472 c->argc--;
1473 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1474 resetClient(c);
1475 return 1;
1476 }
1477 c->argc--;
1478 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1479 return 1;
1480 }
1481 } else {
1482 c->mbargv = zrealloc(c->mbargv,(sizeof(robj*))*(c->mbargc+1));
1483 c->mbargv[c->mbargc] = c->argv[0];
1484 c->mbargc++;
1485 c->argc--;
1486 c->multibulk--;
1487 if (c->multibulk == 0) {
1488 robj **auxargv;
1489 int auxargc;
1490
1491 /* Here we need to swap the multi-bulk argc/argv with the
1492 * normal argc/argv of the client structure. */
1493 auxargv = c->argv;
1494 c->argv = c->mbargv;
1495 c->mbargv = auxargv;
1496
1497 auxargc = c->argc;
1498 c->argc = c->mbargc;
1499 c->mbargc = auxargc;
1500
1501 /* We need to set bulklen to something different than -1
1502 * in order for the code below to process the command without
1503 * to try to read the last argument of a bulk command as
1504 * a special argument. */
1505 c->bulklen = 0;
1506 /* continue below and process the command */
1507 } else {
1508 c->bulklen = -1;
1509 return 1;
1510 }
1511 }
1512 }
1513 /* -- end of multi bulk commands processing -- */
1514
1515 /* The QUIT command is handled as a special case. Normal command
1516 * procs are unable to close the client connection safely */
1517 if (!strcasecmp(c->argv[0]->ptr,"quit")) {
1518 freeClient(c);
1519 return 0;
1520 }
1521 cmd = lookupCommand(c->argv[0]->ptr);
1522 if (!cmd) {
1523 addReplySds(c,sdsnew("-ERR unknown command\r\n"));
1524 resetClient(c);
1525 return 1;
1526 } else if ((cmd->arity > 0 && cmd->arity != c->argc) ||
1527 (c->argc < -cmd->arity)) {
1528 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
1529 resetClient(c);
1530 return 1;
1531 } else if (server.maxmemory && cmd->flags & REDIS_CMD_DENYOOM && zmalloc_used_memory() > server.maxmemory) {
1532 addReplySds(c,sdsnew("-ERR command not allowed when used memory > 'maxmemory'\r\n"));
1533 resetClient(c);
1534 return 1;
1535 } else if (cmd->flags & REDIS_CMD_BULK && c->bulklen == -1) {
1536 int bulklen = atoi(c->argv[c->argc-1]->ptr);
1537
1538 decrRefCount(c->argv[c->argc-1]);
1539 if (bulklen < 0 || bulklen > 1024*1024*1024) {
1540 c->argc--;
1541 addReplySds(c,sdsnew("-ERR invalid bulk write count\r\n"));
1542 resetClient(c);
1543 return 1;
1544 }
1545 c->argc--;
1546 c->bulklen = bulklen+2; /* add two bytes for CR+LF */
1547 /* It is possible that the bulk read is already in the
1548 * buffer. Check this condition and handle it accordingly.
1549 * This is just a fast path, alternative to call processInputBuffer().
1550 * It's a good idea since the code is small and this condition
1551 * happens most of the times. */
1552 if ((signed)sdslen(c->querybuf) >= c->bulklen) {
1553 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1554 c->argc++;
1555 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1556 } else {
1557 return 1;
1558 }
1559 }
1560 /* Let's try to share objects on the command arguments vector */
1561 if (server.shareobjects) {
1562 int j;
1563 for(j = 1; j < c->argc; j++)
1564 c->argv[j] = tryObjectSharing(c->argv[j]);
1565 }
1566 /* Let's try to encode the bulk object to save space. */
1567 if (cmd->flags & REDIS_CMD_BULK)
1568 tryObjectEncoding(c->argv[c->argc-1]);
1569
1570 /* Check if the user is authenticated */
1571 if (server.requirepass && !c->authenticated && cmd->proc != authCommand) {
1572 addReplySds(c,sdsnew("-ERR operation not permitted\r\n"));
1573 resetClient(c);
1574 return 1;
1575 }
1576
1577 /* Exec the command */
1578 dirty = server.dirty;
1579 cmd->proc(c);
1580 if (server.appendonly && server.dirty-dirty)
1581 feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
1582 if (server.dirty-dirty && listLength(server.slaves))
1583 replicationFeedSlaves(server.slaves,cmd,c->db->id,c->argv,c->argc);
1584 if (listLength(server.monitors))
1585 replicationFeedSlaves(server.monitors,cmd,c->db->id,c->argv,c->argc);
1586 server.stat_numcommands++;
1587
1588 /* Prepare the client for the next command */
1589 if (c->flags & REDIS_CLOSE) {
1590 freeClient(c);
1591 return 0;
1592 }
1593 resetClient(c);
1594 return 1;
1595 }
1596
1597 static void replicationFeedSlaves(list *slaves, struct redisCommand *cmd, int dictid, robj **argv, int argc) {
1598 listNode *ln;
1599 int outc = 0, j;
1600 robj **outv;
1601 /* (args*2)+1 is enough room for args, spaces, newlines */
1602 robj *static_outv[REDIS_STATIC_ARGS*2+1];
1603
1604 if (argc <= REDIS_STATIC_ARGS) {
1605 outv = static_outv;
1606 } else {
1607 outv = zmalloc(sizeof(robj*)*(argc*2+1));
1608 }
1609
1610 for (j = 0; j < argc; j++) {
1611 if (j != 0) outv[outc++] = shared.space;
1612 if ((cmd->flags & REDIS_CMD_BULK) && j == argc-1) {
1613 robj *lenobj;
1614
1615 lenobj = createObject(REDIS_STRING,
1616 sdscatprintf(sdsempty(),"%d\r\n",
1617 stringObjectLen(argv[j])));
1618 lenobj->refcount = 0;
1619 outv[outc++] = lenobj;
1620 }
1621 outv[outc++] = argv[j];
1622 }
1623 outv[outc++] = shared.crlf;
1624
1625 /* Increment all the refcounts at start and decrement at end in order to
1626 * be sure to free objects if there is no slave in a replication state
1627 * able to be feed with commands */
1628 for (j = 0; j < outc; j++) incrRefCount(outv[j]);
1629 listRewind(slaves);
1630 while((ln = listYield(slaves))) {
1631 redisClient *slave = ln->value;
1632
1633 /* Don't feed slaves that are still waiting for BGSAVE to start */
1634 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) continue;
1635
1636 /* Feed all the other slaves, MONITORs and so on */
1637 if (slave->slaveseldb != dictid) {
1638 robj *selectcmd;
1639
1640 switch(dictid) {
1641 case 0: selectcmd = shared.select0; break;
1642 case 1: selectcmd = shared.select1; break;
1643 case 2: selectcmd = shared.select2; break;
1644 case 3: selectcmd = shared.select3; break;
1645 case 4: selectcmd = shared.select4; break;
1646 case 5: selectcmd = shared.select5; break;
1647 case 6: selectcmd = shared.select6; break;
1648 case 7: selectcmd = shared.select7; break;
1649 case 8: selectcmd = shared.select8; break;
1650 case 9: selectcmd = shared.select9; break;
1651 default:
1652 selectcmd = createObject(REDIS_STRING,
1653 sdscatprintf(sdsempty(),"select %d\r\n",dictid));
1654 selectcmd->refcount = 0;
1655 break;
1656 }
1657 addReply(slave,selectcmd);
1658 slave->slaveseldb = dictid;
1659 }
1660 for (j = 0; j < outc; j++) addReply(slave,outv[j]);
1661 }
1662 for (j = 0; j < outc; j++) decrRefCount(outv[j]);
1663 if (outv != static_outv) zfree(outv);
1664 }
1665
1666 static void processInputBuffer(redisClient *c) {
1667 again:
1668 if (c->bulklen == -1) {
1669 /* Read the first line of the query */
1670 char *p = strchr(c->querybuf,'\n');
1671 size_t querylen;
1672
1673 if (p) {
1674 sds query, *argv;
1675 int argc, j;
1676
1677 query = c->querybuf;
1678 c->querybuf = sdsempty();
1679 querylen = 1+(p-(query));
1680 if (sdslen(query) > querylen) {
1681 /* leave data after the first line of the query in the buffer */
1682 c->querybuf = sdscatlen(c->querybuf,query+querylen,sdslen(query)-querylen);
1683 }
1684 *p = '\0'; /* remove "\n" */
1685 if (*(p-1) == '\r') *(p-1) = '\0'; /* and "\r" if any */
1686 sdsupdatelen(query);
1687
1688 /* Now we can split the query in arguments */
1689 if (sdslen(query) == 0) {
1690 /* Ignore empty query */
1691 sdsfree(query);
1692 return;
1693 }
1694 argv = sdssplitlen(query,sdslen(query)," ",1,&argc);
1695 sdsfree(query);
1696
1697 if (c->argv) zfree(c->argv);
1698 c->argv = zmalloc(sizeof(robj*)*argc);
1699
1700 for (j = 0; j < argc; j++) {
1701 if (sdslen(argv[j])) {
1702 c->argv[c->argc] = createObject(REDIS_STRING,argv[j]);
1703 c->argc++;
1704 } else {
1705 sdsfree(argv[j]);
1706 }
1707 }
1708 zfree(argv);
1709 /* Execute the command. If the client is still valid
1710 * after processCommand() return and there is something
1711 * on the query buffer try to process the next command. */
1712 if (c->argc && processCommand(c) && sdslen(c->querybuf)) goto again;
1713 return;
1714 } else if (sdslen(c->querybuf) >= REDIS_REQUEST_MAX_SIZE) {
1715 redisLog(REDIS_DEBUG, "Client protocol error");
1716 freeClient(c);
1717 return;
1718 }
1719 } else {
1720 /* Bulk read handling. Note that if we are at this point
1721 the client already sent a command terminated with a newline,
1722 we are reading the bulk data that is actually the last
1723 argument of the command. */
1724 int qbl = sdslen(c->querybuf);
1725
1726 if (c->bulklen <= qbl) {
1727 /* Copy everything but the final CRLF as final argument */
1728 c->argv[c->argc] = createStringObject(c->querybuf,c->bulklen-2);
1729 c->argc++;
1730 c->querybuf = sdsrange(c->querybuf,c->bulklen,-1);
1731 /* Process the command. If the client is still valid after
1732 * the processing and there is more data in the buffer
1733 * try to parse it. */
1734 if (processCommand(c) && sdslen(c->querybuf)) goto again;
1735 return;
1736 }
1737 }
1738 }
1739
1740 static void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
1741 redisClient *c = (redisClient*) privdata;
1742 char buf[REDIS_IOBUF_LEN];
1743 int nread;
1744 REDIS_NOTUSED(el);
1745 REDIS_NOTUSED(mask);
1746
1747 nread = read(fd, buf, REDIS_IOBUF_LEN);
1748 if (nread == -1) {
1749 if (errno == EAGAIN) {
1750 nread = 0;
1751 } else {
1752 redisLog(REDIS_DEBUG, "Reading from client: %s",strerror(errno));
1753 freeClient(c);
1754 return;
1755 }
1756 } else if (nread == 0) {
1757 redisLog(REDIS_DEBUG, "Client closed connection");
1758 freeClient(c);
1759 return;
1760 }
1761 if (nread) {
1762 c->querybuf = sdscatlen(c->querybuf, buf, nread);
1763 c->lastinteraction = time(NULL);
1764 } else {
1765 return;
1766 }
1767 processInputBuffer(c);
1768 }
1769
1770 static int selectDb(redisClient *c, int id) {
1771 if (id < 0 || id >= server.dbnum)
1772 return REDIS_ERR;
1773 c->db = &server.db[id];
1774 return REDIS_OK;
1775 }
1776
1777 static void *dupClientReplyValue(void *o) {
1778 incrRefCount((robj*)o);
1779 return 0;
1780 }
1781
1782 static redisClient *createClient(int fd) {
1783 redisClient *c = zmalloc(sizeof(*c));
1784
1785 anetNonBlock(NULL,fd);
1786 anetTcpNoDelay(NULL,fd);
1787 if (!c) return NULL;
1788 selectDb(c,0);
1789 c->fd = fd;
1790 c->querybuf = sdsempty();
1791 c->argc = 0;
1792 c->argv = NULL;
1793 c->bulklen = -1;
1794 c->multibulk = 0;
1795 c->mbargc = 0;
1796 c->mbargv = NULL;
1797 c->sentlen = 0;
1798 c->flags = 0;
1799 c->lastinteraction = time(NULL);
1800 c->authenticated = 0;
1801 c->replstate = REDIS_REPL_NONE;
1802 c->reply = listCreate();
1803 listSetFreeMethod(c->reply,decrRefCount);
1804 listSetDupMethod(c->reply,dupClientReplyValue);
1805 if (aeCreateFileEvent(server.el, c->fd, AE_READABLE,
1806 readQueryFromClient, c, NULL) == AE_ERR) {
1807 freeClient(c);
1808 return NULL;
1809 }
1810 listAddNodeTail(server.clients,c);
1811 return c;
1812 }
1813
1814 static void addReply(redisClient *c, robj *obj) {
1815 if (listLength(c->reply) == 0 &&
1816 (c->replstate == REDIS_REPL_NONE ||
1817 c->replstate == REDIS_REPL_ONLINE) &&
1818 aeCreateFileEvent(server.el, c->fd, AE_WRITABLE,
1819 sendReplyToClient, c, NULL) == AE_ERR) return;
1820 if (obj->encoding != REDIS_ENCODING_RAW) {
1821 obj = getDecodedObject(obj);
1822 } else {
1823 incrRefCount(obj);
1824 }
1825 listAddNodeTail(c->reply,obj);
1826 }
1827
1828 static void addReplySds(redisClient *c, sds s) {
1829 robj *o = createObject(REDIS_STRING,s);
1830 addReply(c,o);
1831 decrRefCount(o);
1832 }
1833
1834 static void addReplyBulkLen(redisClient *c, robj *obj) {
1835 size_t len;
1836
1837 if (obj->encoding == REDIS_ENCODING_RAW) {
1838 len = sdslen(obj->ptr);
1839 } else {
1840 long n = (long)obj->ptr;
1841
1842 len = 1;
1843 if (n < 0) {
1844 len++;
1845 n = -n;
1846 }
1847 while((n = n/10) != 0) {
1848 len++;
1849 }
1850 }
1851 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",len));
1852 }
1853
1854 static void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
1855 int cport, cfd;
1856 char cip[128];
1857 redisClient *c;
1858 REDIS_NOTUSED(el);
1859 REDIS_NOTUSED(mask);
1860 REDIS_NOTUSED(privdata);
1861
1862 cfd = anetAccept(server.neterr, fd, cip, &cport);
1863 if (cfd == AE_ERR) {
1864 redisLog(REDIS_DEBUG,"Accepting client connection: %s", server.neterr);
1865 return;
1866 }
1867 redisLog(REDIS_DEBUG,"Accepted %s:%d", cip, cport);
1868 if ((c = createClient(cfd)) == NULL) {
1869 redisLog(REDIS_WARNING,"Error allocating resoures for the client");
1870 close(cfd); /* May be already closed, just ingore errors */
1871 return;
1872 }
1873 /* If maxclient directive is set and this is one client more... close the
1874 * connection. Note that we create the client instead to check before
1875 * for this condition, since now the socket is already set in nonblocking
1876 * mode and we can send an error for free using the Kernel I/O */
1877 if (server.maxclients && listLength(server.clients) > server.maxclients) {
1878 char *err = "-ERR max number of clients reached\r\n";
1879
1880 /* That's a best effort error message, don't check write errors */
1881 (void) write(c->fd,err,strlen(err));
1882 freeClient(c);
1883 return;
1884 }
1885 server.stat_numconnections++;
1886 }
1887
1888 /* ======================= Redis objects implementation ===================== */
1889
1890 static robj *createObject(int type, void *ptr) {
1891 robj *o;
1892
1893 if (listLength(server.objfreelist)) {
1894 listNode *head = listFirst(server.objfreelist);
1895 o = listNodeValue(head);
1896 listDelNode(server.objfreelist,head);
1897 } else {
1898 o = zmalloc(sizeof(*o));
1899 }
1900 o->type = type;
1901 o->encoding = REDIS_ENCODING_RAW;
1902 o->ptr = ptr;
1903 o->refcount = 1;
1904 return o;
1905 }
1906
1907 static robj *createStringObject(char *ptr, size_t len) {
1908 return createObject(REDIS_STRING,sdsnewlen(ptr,len));
1909 }
1910
1911 static robj *createListObject(void) {
1912 list *l = listCreate();
1913
1914 listSetFreeMethod(l,decrRefCount);
1915 return createObject(REDIS_LIST,l);
1916 }
1917
1918 static robj *createSetObject(void) {
1919 dict *d = dictCreate(&setDictType,NULL);
1920 return createObject(REDIS_SET,d);
1921 }
1922
1923 static robj *createZsetObject(void) {
1924 zset *zs = zmalloc(sizeof(*zs));
1925
1926 zs->dict = dictCreate(&zsetDictType,NULL);
1927 zs->zsl = zslCreate();
1928 return createObject(REDIS_ZSET,zs);
1929 }
1930
1931 static void freeStringObject(robj *o) {
1932 if (o->encoding == REDIS_ENCODING_RAW) {
1933 sdsfree(o->ptr);
1934 }
1935 }
1936
1937 static void freeListObject(robj *o) {
1938 listRelease((list*) o->ptr);
1939 }
1940
1941 static void freeSetObject(robj *o) {
1942 dictRelease((dict*) o->ptr);
1943 }
1944
1945 static void freeZsetObject(robj *o) {
1946 zset *zs = o->ptr;
1947
1948 dictRelease(zs->dict);
1949 zslFree(zs->zsl);
1950 zfree(zs);
1951 }
1952
1953 static void freeHashObject(robj *o) {
1954 dictRelease((dict*) o->ptr);
1955 }
1956
1957 static void incrRefCount(robj *o) {
1958 o->refcount++;
1959 #ifdef DEBUG_REFCOUNT
1960 if (o->type == REDIS_STRING)
1961 printf("Increment '%s'(%p), now is: %d\n",o->ptr,o,o->refcount);
1962 #endif
1963 }
1964
1965 static void decrRefCount(void *obj) {
1966 robj *o = obj;
1967
1968 #ifdef DEBUG_REFCOUNT
1969 if (o->type == REDIS_STRING)
1970 printf("Decrement '%s'(%p), now is: %d\n",o->ptr,o,o->refcount-1);
1971 #endif
1972 if (--(o->refcount) == 0) {
1973 switch(o->type) {
1974 case REDIS_STRING: freeStringObject(o); break;
1975 case REDIS_LIST: freeListObject(o); break;
1976 case REDIS_SET: freeSetObject(o); break;
1977 case REDIS_ZSET: freeZsetObject(o); break;
1978 case REDIS_HASH: freeHashObject(o); break;
1979 default: assert(0 != 0); break;
1980 }
1981 if (listLength(server.objfreelist) > REDIS_OBJFREELIST_MAX ||
1982 !listAddNodeHead(server.objfreelist,o))
1983 zfree(o);
1984 }
1985 }
1986
1987 static robj *lookupKey(redisDb *db, robj *key) {
1988 dictEntry *de = dictFind(db->dict,key);
1989 return de ? dictGetEntryVal(de) : NULL;
1990 }
1991
1992 static robj *lookupKeyRead(redisDb *db, robj *key) {
1993 expireIfNeeded(db,key);
1994 return lookupKey(db,key);
1995 }
1996
1997 static robj *lookupKeyWrite(redisDb *db, robj *key) {
1998 deleteIfVolatile(db,key);
1999 return lookupKey(db,key);
2000 }
2001
2002 static int deleteKey(redisDb *db, robj *key) {
2003 int retval;
2004
2005 /* We need to protect key from destruction: after the first dictDelete()
2006 * it may happen that 'key' is no longer valid if we don't increment
2007 * it's count. This may happen when we get the object reference directly
2008 * from the hash table with dictRandomKey() or dict iterators */
2009 incrRefCount(key);
2010 if (dictSize(db->expires)) dictDelete(db->expires,key);
2011 retval = dictDelete(db->dict,key);
2012 decrRefCount(key);
2013
2014 return retval == DICT_OK;
2015 }
2016
2017 /* Try to share an object against the shared objects pool */
2018 static robj *tryObjectSharing(robj *o) {
2019 struct dictEntry *de;
2020 unsigned long c;
2021
2022 if (o == NULL || server.shareobjects == 0) return o;
2023
2024 assert(o->type == REDIS_STRING);
2025 de = dictFind(server.sharingpool,o);
2026 if (de) {
2027 robj *shared = dictGetEntryKey(de);
2028
2029 c = ((unsigned long) dictGetEntryVal(de))+1;
2030 dictGetEntryVal(de) = (void*) c;
2031 incrRefCount(shared);
2032 decrRefCount(o);
2033 return shared;
2034 } else {
2035 /* Here we are using a stream algorihtm: Every time an object is
2036 * shared we increment its count, everytime there is a miss we
2037 * recrement the counter of a random object. If this object reaches
2038 * zero we remove the object and put the current object instead. */
2039 if (dictSize(server.sharingpool) >=
2040 server.sharingpoolsize) {
2041 de = dictGetRandomKey(server.sharingpool);
2042 assert(de != NULL);
2043 c = ((unsigned long) dictGetEntryVal(de))-1;
2044 dictGetEntryVal(de) = (void*) c;
2045 if (c == 0) {
2046 dictDelete(server.sharingpool,de->key);
2047 }
2048 } else {
2049 c = 0; /* If the pool is empty we want to add this object */
2050 }
2051 if (c == 0) {
2052 int retval;
2053
2054 retval = dictAdd(server.sharingpool,o,(void*)1);
2055 assert(retval == DICT_OK);
2056 incrRefCount(o);
2057 }
2058 return o;
2059 }
2060 }
2061
2062 /* Check if the nul-terminated string 's' can be represented by a long
2063 * (that is, is a number that fits into long without any other space or
2064 * character before or after the digits).
2065 *
2066 * If so, the function returns REDIS_OK and *longval is set to the value
2067 * of the number. Otherwise REDIS_ERR is returned */
2068 static int isStringRepresentableAsLong(sds s, long *longval) {
2069 char buf[32], *endptr;
2070 long value;
2071 int slen;
2072
2073 value = strtol(s, &endptr, 10);
2074 if (endptr[0] != '\0') return REDIS_ERR;
2075 slen = snprintf(buf,32,"%ld",value);
2076
2077 /* If the number converted back into a string is not identical
2078 * then it's not possible to encode the string as integer */
2079 if (sdslen(s) != (unsigned)slen || memcmp(buf,s,slen)) return REDIS_ERR;
2080 if (longval) *longval = value;
2081 return REDIS_OK;
2082 }
2083
2084 /* Try to encode a string object in order to save space */
2085 static int tryObjectEncoding(robj *o) {
2086 long value;
2087 sds s = o->ptr;
2088
2089 if (o->encoding != REDIS_ENCODING_RAW)
2090 return REDIS_ERR; /* Already encoded */
2091
2092 /* It's not save to encode shared objects: shared objects can be shared
2093 * everywhere in the "object space" of Redis. Encoded objects can only
2094 * appear as "values" (and not, for instance, as keys) */
2095 if (o->refcount > 1) return REDIS_ERR;
2096
2097 /* Currently we try to encode only strings */
2098 assert(o->type == REDIS_STRING);
2099
2100 /* Check if we can represent this string as a long integer */
2101 if (isStringRepresentableAsLong(s,&value) == REDIS_ERR) return REDIS_ERR;
2102
2103 /* Ok, this object can be encoded */
2104 o->encoding = REDIS_ENCODING_INT;
2105 sdsfree(o->ptr);
2106 o->ptr = (void*) value;
2107 return REDIS_OK;
2108 }
2109
2110 /* Get a decoded version of an encoded object (returned as a new object) */
2111 static robj *getDecodedObject(const robj *o) {
2112 robj *dec;
2113
2114 assert(o->encoding != REDIS_ENCODING_RAW);
2115 if (o->type == REDIS_STRING && o->encoding == REDIS_ENCODING_INT) {
2116 char buf[32];
2117
2118 snprintf(buf,32,"%ld",(long)o->ptr);
2119 dec = createStringObject(buf,strlen(buf));
2120 return dec;
2121 } else {
2122 assert(1 != 1);
2123 }
2124 }
2125
2126 /* Compare two string objects via strcmp() or alike.
2127 * Note that the objects may be integer-encoded. In such a case we
2128 * use snprintf() to get a string representation of the numbers on the stack
2129 * and compare the strings, it's much faster than calling getDecodedObject(). */
2130 static int compareStringObjects(robj *a, robj *b) {
2131 assert(a->type == REDIS_STRING && b->type == REDIS_STRING);
2132 char bufa[128], bufb[128], *astr, *bstr;
2133 int bothsds = 1;
2134
2135 if (a == b) return 0;
2136 if (a->encoding != REDIS_ENCODING_RAW) {
2137 snprintf(bufa,sizeof(bufa),"%ld",(long) a->ptr);
2138 astr = bufa;
2139 bothsds = 0;
2140 } else {
2141 astr = a->ptr;
2142 }
2143 if (b->encoding != REDIS_ENCODING_RAW) {
2144 snprintf(bufb,sizeof(bufb),"%ld",(long) b->ptr);
2145 bstr = bufb;
2146 bothsds = 0;
2147 } else {
2148 bstr = b->ptr;
2149 }
2150 return bothsds ? sdscmp(astr,bstr) : strcmp(astr,bstr);
2151 }
2152
2153 static size_t stringObjectLen(robj *o) {
2154 assert(o->type == REDIS_STRING);
2155 if (o->encoding == REDIS_ENCODING_RAW) {
2156 return sdslen(o->ptr);
2157 } else {
2158 char buf[32];
2159
2160 return snprintf(buf,32,"%ld",(long)o->ptr);
2161 }
2162 }
2163
2164 /*============================ DB saving/loading ============================ */
2165
2166 static int rdbSaveType(FILE *fp, unsigned char type) {
2167 if (fwrite(&type,1,1,fp) == 0) return -1;
2168 return 0;
2169 }
2170
2171 static int rdbSaveTime(FILE *fp, time_t t) {
2172 int32_t t32 = (int32_t) t;
2173 if (fwrite(&t32,4,1,fp) == 0) return -1;
2174 return 0;
2175 }
2176
2177 /* check rdbLoadLen() comments for more info */
2178 static int rdbSaveLen(FILE *fp, uint32_t len) {
2179 unsigned char buf[2];
2180
2181 if (len < (1<<6)) {
2182 /* Save a 6 bit len */
2183 buf[0] = (len&0xFF)|(REDIS_RDB_6BITLEN<<6);
2184 if (fwrite(buf,1,1,fp) == 0) return -1;
2185 } else if (len < (1<<14)) {
2186 /* Save a 14 bit len */
2187 buf[0] = ((len>>8)&0xFF)|(REDIS_RDB_14BITLEN<<6);
2188 buf[1] = len&0xFF;
2189 if (fwrite(buf,2,1,fp) == 0) return -1;
2190 } else {
2191 /* Save a 32 bit len */
2192 buf[0] = (REDIS_RDB_32BITLEN<<6);
2193 if (fwrite(buf,1,1,fp) == 0) return -1;
2194 len = htonl(len);
2195 if (fwrite(&len,4,1,fp) == 0) return -1;
2196 }
2197 return 0;
2198 }
2199
2200 /* String objects in the form "2391" "-100" without any space and with a
2201 * range of values that can fit in an 8, 16 or 32 bit signed value can be
2202 * encoded as integers to save space */
2203 static int rdbTryIntegerEncoding(sds s, unsigned char *enc) {
2204 long long value;
2205 char *endptr, buf[32];
2206
2207 /* Check if it's possible to encode this value as a number */
2208 value = strtoll(s, &endptr, 10);
2209 if (endptr[0] != '\0') return 0;
2210 snprintf(buf,32,"%lld",value);
2211
2212 /* If the number converted back into a string is not identical
2213 * then it's not possible to encode the string as integer */
2214 if (strlen(buf) != sdslen(s) || memcmp(buf,s,sdslen(s))) return 0;
2215
2216 /* Finally check if it fits in our ranges */
2217 if (value >= -(1<<7) && value <= (1<<7)-1) {
2218 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT8;
2219 enc[1] = value&0xFF;
2220 return 2;
2221 } else if (value >= -(1<<15) && value <= (1<<15)-1) {
2222 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT16;
2223 enc[1] = value&0xFF;
2224 enc[2] = (value>>8)&0xFF;
2225 return 3;
2226 } else if (value >= -((long long)1<<31) && value <= ((long long)1<<31)-1) {
2227 enc[0] = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_INT32;
2228 enc[1] = value&0xFF;
2229 enc[2] = (value>>8)&0xFF;
2230 enc[3] = (value>>16)&0xFF;
2231 enc[4] = (value>>24)&0xFF;
2232 return 5;
2233 } else {
2234 return 0;
2235 }
2236 }
2237
2238 static int rdbSaveLzfStringObject(FILE *fp, robj *obj) {
2239 unsigned int comprlen, outlen;
2240 unsigned char byte;
2241 void *out;
2242
2243 /* We require at least four bytes compression for this to be worth it */
2244 outlen = sdslen(obj->ptr)-4;
2245 if (outlen <= 0) return 0;
2246 if ((out = zmalloc(outlen+1)) == NULL) return 0;
2247 comprlen = lzf_compress(obj->ptr, sdslen(obj->ptr), out, outlen);
2248 if (comprlen == 0) {
2249 zfree(out);
2250 return 0;
2251 }
2252 /* Data compressed! Let's save it on disk */
2253 byte = (REDIS_RDB_ENCVAL<<6)|REDIS_RDB_ENC_LZF;
2254 if (fwrite(&byte,1,1,fp) == 0) goto writeerr;
2255 if (rdbSaveLen(fp,comprlen) == -1) goto writeerr;
2256 if (rdbSaveLen(fp,sdslen(obj->ptr)) == -1) goto writeerr;
2257 if (fwrite(out,comprlen,1,fp) == 0) goto writeerr;
2258 zfree(out);
2259 return comprlen;
2260
2261 writeerr:
2262 zfree(out);
2263 return -1;
2264 }
2265
2266 /* Save a string objet as [len][data] on disk. If the object is a string
2267 * representation of an integer value we try to safe it in a special form */
2268 static int rdbSaveStringObjectRaw(FILE *fp, robj *obj) {
2269 size_t len;
2270 int enclen;
2271
2272 len = sdslen(obj->ptr);
2273
2274 /* Try integer encoding */
2275 if (len <= 11) {
2276 unsigned char buf[5];
2277 if ((enclen = rdbTryIntegerEncoding(obj->ptr,buf)) > 0) {
2278 if (fwrite(buf,enclen,1,fp) == 0) return -1;
2279 return 0;
2280 }
2281 }
2282
2283 /* Try LZF compression - under 20 bytes it's unable to compress even
2284 * aaaaaaaaaaaaaaaaaa so skip it */
2285 if (len > 20) {
2286 int retval;
2287
2288 retval = rdbSaveLzfStringObject(fp,obj);
2289 if (retval == -1) return -1;
2290 if (retval > 0) return 0;
2291 /* retval == 0 means data can't be compressed, save the old way */
2292 }
2293
2294 /* Store verbatim */
2295 if (rdbSaveLen(fp,len) == -1) return -1;
2296 if (len && fwrite(obj->ptr,len,1,fp) == 0) return -1;
2297 return 0;
2298 }
2299
2300 /* Like rdbSaveStringObjectRaw() but handle encoded objects */
2301 static int rdbSaveStringObject(FILE *fp, robj *obj) {
2302 int retval;
2303 robj *dec;
2304
2305 if (obj->encoding != REDIS_ENCODING_RAW) {
2306 dec = getDecodedObject(obj);
2307 retval = rdbSaveStringObjectRaw(fp,dec);
2308 decrRefCount(dec);
2309 return retval;
2310 } else {
2311 return rdbSaveStringObjectRaw(fp,obj);
2312 }
2313 }
2314
2315 /* Save a double value. Doubles are saved as strings prefixed by an unsigned
2316 * 8 bit integer specifing the length of the representation.
2317 * This 8 bit integer has special values in order to specify the following
2318 * conditions:
2319 * 253: not a number
2320 * 254: + inf
2321 * 255: - inf
2322 */
2323 static int rdbSaveDoubleValue(FILE *fp, double val) {
2324 unsigned char buf[128];
2325 int len;
2326
2327 if (isnan(val)) {
2328 buf[0] = 253;
2329 len = 1;
2330 } else if (!isfinite(val)) {
2331 len = 1;
2332 buf[0] = (val < 0) ? 255 : 254;
2333 } else {
2334 snprintf((char*)buf+1,sizeof(buf)-1,"%.16g",val);
2335 buf[0] = strlen((char*)buf);
2336 len = buf[0]+1;
2337 }
2338 if (fwrite(buf,len,1,fp) == 0) return -1;
2339 return 0;
2340 }
2341
2342 /* Save the DB on disk. Return REDIS_ERR on error, REDIS_OK on success */
2343 static int rdbSave(char *filename) {
2344 dictIterator *di = NULL;
2345 dictEntry *de;
2346 FILE *fp;
2347 char tmpfile[256];
2348 int j;
2349 time_t now = time(NULL);
2350
2351 snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
2352 fp = fopen(tmpfile,"w");
2353 if (!fp) {
2354 redisLog(REDIS_WARNING, "Failed saving the DB: %s", strerror(errno));
2355 return REDIS_ERR;
2356 }
2357 if (fwrite("REDIS0001",9,1,fp) == 0) goto werr;
2358 for (j = 0; j < server.dbnum; j++) {
2359 redisDb *db = server.db+j;
2360 dict *d = db->dict;
2361 if (dictSize(d) == 0) continue;
2362 di = dictGetIterator(d);
2363 if (!di) {
2364 fclose(fp);
2365 return REDIS_ERR;
2366 }
2367
2368 /* Write the SELECT DB opcode */
2369 if (rdbSaveType(fp,REDIS_SELECTDB) == -1) goto werr;
2370 if (rdbSaveLen(fp,j) == -1) goto werr;
2371
2372 /* Iterate this DB writing every entry */
2373 while((de = dictNext(di)) != NULL) {
2374 robj *key = dictGetEntryKey(de);
2375 robj *o = dictGetEntryVal(de);
2376 time_t expiretime = getExpire(db,key);
2377
2378 /* Save the expire time */
2379 if (expiretime != -1) {
2380 /* If this key is already expired skip it */
2381 if (expiretime < now) continue;
2382 if (rdbSaveType(fp,REDIS_EXPIRETIME) == -1) goto werr;
2383 if (rdbSaveTime(fp,expiretime) == -1) goto werr;
2384 }
2385 /* Save the key and associated value */
2386 if (rdbSaveType(fp,o->type) == -1) goto werr;
2387 if (rdbSaveStringObject(fp,key) == -1) goto werr;
2388 if (o->type == REDIS_STRING) {
2389 /* Save a string value */
2390 if (rdbSaveStringObject(fp,o) == -1) goto werr;
2391 } else if (o->type == REDIS_LIST) {
2392 /* Save a list value */
2393 list *list = o->ptr;
2394 listNode *ln;
2395
2396 listRewind(list);
2397 if (rdbSaveLen(fp,listLength(list)) == -1) goto werr;
2398 while((ln = listYield(list))) {
2399 robj *eleobj = listNodeValue(ln);
2400
2401 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2402 }
2403 } else if (o->type == REDIS_SET) {
2404 /* Save a set value */
2405 dict *set = o->ptr;
2406 dictIterator *di = dictGetIterator(set);
2407 dictEntry *de;
2408
2409 if (rdbSaveLen(fp,dictSize(set)) == -1) goto werr;
2410 while((de = dictNext(di)) != NULL) {
2411 robj *eleobj = dictGetEntryKey(de);
2412
2413 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2414 }
2415 dictReleaseIterator(di);
2416 } else if (o->type == REDIS_ZSET) {
2417 /* Save a set value */
2418 zset *zs = o->ptr;
2419 dictIterator *di = dictGetIterator(zs->dict);
2420 dictEntry *de;
2421
2422 if (rdbSaveLen(fp,dictSize(zs->dict)) == -1) goto werr;
2423 while((de = dictNext(di)) != NULL) {
2424 robj *eleobj = dictGetEntryKey(de);
2425 double *score = dictGetEntryVal(de);
2426
2427 if (rdbSaveStringObject(fp,eleobj) == -1) goto werr;
2428 if (rdbSaveDoubleValue(fp,*score) == -1) goto werr;
2429 }
2430 dictReleaseIterator(di);
2431 } else {
2432 assert(0 != 0);
2433 }
2434 }
2435 dictReleaseIterator(di);
2436 }
2437 /* EOF opcode */
2438 if (rdbSaveType(fp,REDIS_EOF) == -1) goto werr;
2439
2440 /* Make sure data will not remain on the OS's output buffers */
2441 fflush(fp);
2442 fsync(fileno(fp));
2443 fclose(fp);
2444
2445 /* Use RENAME to make sure the DB file is changed atomically only
2446 * if the generate DB file is ok. */
2447 if (rename(tmpfile,filename) == -1) {
2448 redisLog(REDIS_WARNING,"Error moving temp DB file on the final destination: %s", strerror(errno));
2449 unlink(tmpfile);
2450 return REDIS_ERR;
2451 }
2452 redisLog(REDIS_NOTICE,"DB saved on disk");
2453 server.dirty = 0;
2454 server.lastsave = time(NULL);
2455 return REDIS_OK;
2456
2457 werr:
2458 fclose(fp);
2459 unlink(tmpfile);
2460 redisLog(REDIS_WARNING,"Write error saving DB on disk: %s", strerror(errno));
2461 if (di) dictReleaseIterator(di);
2462 return REDIS_ERR;
2463 }
2464
2465 static int rdbSaveBackground(char *filename) {
2466 pid_t childpid;
2467
2468 if (server.bgsaveinprogress) return REDIS_ERR;
2469 if ((childpid = fork()) == 0) {
2470 /* Child */
2471 close(server.fd);
2472 if (rdbSave(filename) == REDIS_OK) {
2473 exit(0);
2474 } else {
2475 exit(1);
2476 }
2477 } else {
2478 /* Parent */
2479 if (childpid == -1) {
2480 redisLog(REDIS_WARNING,"Can't save in background: fork: %s",
2481 strerror(errno));
2482 return REDIS_ERR;
2483 }
2484 redisLog(REDIS_NOTICE,"Background saving started by pid %d",childpid);
2485 server.bgsaveinprogress = 1;
2486 server.bgsavechildpid = childpid;
2487 return REDIS_OK;
2488 }
2489 return REDIS_OK; /* unreached */
2490 }
2491
2492 static void rdbRemoveTempFile(pid_t childpid) {
2493 char tmpfile[256];
2494
2495 snprintf(tmpfile,256,"temp-%d.rdb", (int) childpid);
2496 unlink(tmpfile);
2497 }
2498
2499 static int rdbLoadType(FILE *fp) {
2500 unsigned char type;
2501 if (fread(&type,1,1,fp) == 0) return -1;
2502 return type;
2503 }
2504
2505 static time_t rdbLoadTime(FILE *fp) {
2506 int32_t t32;
2507 if (fread(&t32,4,1,fp) == 0) return -1;
2508 return (time_t) t32;
2509 }
2510
2511 /* Load an encoded length from the DB, see the REDIS_RDB_* defines on the top
2512 * of this file for a description of how this are stored on disk.
2513 *
2514 * isencoded is set to 1 if the readed length is not actually a length but
2515 * an "encoding type", check the above comments for more info */
2516 static uint32_t rdbLoadLen(FILE *fp, int rdbver, int *isencoded) {
2517 unsigned char buf[2];
2518 uint32_t len;
2519
2520 if (isencoded) *isencoded = 0;
2521 if (rdbver == 0) {
2522 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2523 return ntohl(len);
2524 } else {
2525 int type;
2526
2527 if (fread(buf,1,1,fp) == 0) return REDIS_RDB_LENERR;
2528 type = (buf[0]&0xC0)>>6;
2529 if (type == REDIS_RDB_6BITLEN) {
2530 /* Read a 6 bit len */
2531 return buf[0]&0x3F;
2532 } else if (type == REDIS_RDB_ENCVAL) {
2533 /* Read a 6 bit len encoding type */
2534 if (isencoded) *isencoded = 1;
2535 return buf[0]&0x3F;
2536 } else if (type == REDIS_RDB_14BITLEN) {
2537 /* Read a 14 bit len */
2538 if (fread(buf+1,1,1,fp) == 0) return REDIS_RDB_LENERR;
2539 return ((buf[0]&0x3F)<<8)|buf[1];
2540 } else {
2541 /* Read a 32 bit len */
2542 if (fread(&len,4,1,fp) == 0) return REDIS_RDB_LENERR;
2543 return ntohl(len);
2544 }
2545 }
2546 }
2547
2548 static robj *rdbLoadIntegerObject(FILE *fp, int enctype) {
2549 unsigned char enc[4];
2550 long long val;
2551
2552 if (enctype == REDIS_RDB_ENC_INT8) {
2553 if (fread(enc,1,1,fp) == 0) return NULL;
2554 val = (signed char)enc[0];
2555 } else if (enctype == REDIS_RDB_ENC_INT16) {
2556 uint16_t v;
2557 if (fread(enc,2,1,fp) == 0) return NULL;
2558 v = enc[0]|(enc[1]<<8);
2559 val = (int16_t)v;
2560 } else if (enctype == REDIS_RDB_ENC_INT32) {
2561 uint32_t v;
2562 if (fread(enc,4,1,fp) == 0) return NULL;
2563 v = enc[0]|(enc[1]<<8)|(enc[2]<<16)|(enc[3]<<24);
2564 val = (int32_t)v;
2565 } else {
2566 val = 0; /* anti-warning */
2567 assert(0!=0);
2568 }
2569 return createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",val));
2570 }
2571
2572 static robj *rdbLoadLzfStringObject(FILE*fp, int rdbver) {
2573 unsigned int len, clen;
2574 unsigned char *c = NULL;
2575 sds val = NULL;
2576
2577 if ((clen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2578 if ((len = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR) return NULL;
2579 if ((c = zmalloc(clen)) == NULL) goto err;
2580 if ((val = sdsnewlen(NULL,len)) == NULL) goto err;
2581 if (fread(c,clen,1,fp) == 0) goto err;
2582 if (lzf_decompress(c,clen,val,len) == 0) goto err;
2583 zfree(c);
2584 return createObject(REDIS_STRING,val);
2585 err:
2586 zfree(c);
2587 sdsfree(val);
2588 return NULL;
2589 }
2590
2591 static robj *rdbLoadStringObject(FILE*fp, int rdbver) {
2592 int isencoded;
2593 uint32_t len;
2594 sds val;
2595
2596 len = rdbLoadLen(fp,rdbver,&isencoded);
2597 if (isencoded) {
2598 switch(len) {
2599 case REDIS_RDB_ENC_INT8:
2600 case REDIS_RDB_ENC_INT16:
2601 case REDIS_RDB_ENC_INT32:
2602 return tryObjectSharing(rdbLoadIntegerObject(fp,len));
2603 case REDIS_RDB_ENC_LZF:
2604 return tryObjectSharing(rdbLoadLzfStringObject(fp,rdbver));
2605 default:
2606 assert(0!=0);
2607 }
2608 }
2609
2610 if (len == REDIS_RDB_LENERR) return NULL;
2611 val = sdsnewlen(NULL,len);
2612 if (len && fread(val,len,1,fp) == 0) {
2613 sdsfree(val);
2614 return NULL;
2615 }
2616 return tryObjectSharing(createObject(REDIS_STRING,val));
2617 }
2618
2619 /* For information about double serialization check rdbSaveDoubleValue() */
2620 static int rdbLoadDoubleValue(FILE *fp, double *val) {
2621 char buf[128];
2622 unsigned char len;
2623
2624 if (fread(&len,1,1,fp) == 0) return -1;
2625 switch(len) {
2626 case 255: *val = R_NegInf; return 0;
2627 case 254: *val = R_PosInf; return 0;
2628 case 253: *val = R_Nan; return 0;
2629 default:
2630 if (fread(buf,len,1,fp) == 0) return -1;
2631 sscanf(buf, "%lg", val);
2632 return 0;
2633 }
2634 }
2635
2636 static int rdbLoad(char *filename) {
2637 FILE *fp;
2638 robj *keyobj = NULL;
2639 uint32_t dbid;
2640 int type, retval, rdbver;
2641 dict *d = server.db[0].dict;
2642 redisDb *db = server.db+0;
2643 char buf[1024];
2644 time_t expiretime = -1, now = time(NULL);
2645
2646 fp = fopen(filename,"r");
2647 if (!fp) return REDIS_ERR;
2648 if (fread(buf,9,1,fp) == 0) goto eoferr;
2649 buf[9] = '\0';
2650 if (memcmp(buf,"REDIS",5) != 0) {
2651 fclose(fp);
2652 redisLog(REDIS_WARNING,"Wrong signature trying to load DB from file");
2653 return REDIS_ERR;
2654 }
2655 rdbver = atoi(buf+5);
2656 if (rdbver > 1) {
2657 fclose(fp);
2658 redisLog(REDIS_WARNING,"Can't handle RDB format version %d",rdbver);
2659 return REDIS_ERR;
2660 }
2661 while(1) {
2662 robj *o;
2663
2664 /* Read type. */
2665 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2666 if (type == REDIS_EXPIRETIME) {
2667 if ((expiretime = rdbLoadTime(fp)) == -1) goto eoferr;
2668 /* We read the time so we need to read the object type again */
2669 if ((type = rdbLoadType(fp)) == -1) goto eoferr;
2670 }
2671 if (type == REDIS_EOF) break;
2672 /* Handle SELECT DB opcode as a special case */
2673 if (type == REDIS_SELECTDB) {
2674 if ((dbid = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2675 goto eoferr;
2676 if (dbid >= (unsigned)server.dbnum) {
2677 redisLog(REDIS_WARNING,"FATAL: Data file was created with a Redis server configured to handle more than %d databases. Exiting\n", server.dbnum);
2678 exit(1);
2679 }
2680 db = server.db+dbid;
2681 d = db->dict;
2682 continue;
2683 }
2684 /* Read key */
2685 if ((keyobj = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2686
2687 if (type == REDIS_STRING) {
2688 /* Read string value */
2689 if ((o = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2690 tryObjectEncoding(o);
2691 } else if (type == REDIS_LIST || type == REDIS_SET) {
2692 /* Read list/set value */
2693 uint32_t listlen;
2694
2695 if ((listlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2696 goto eoferr;
2697 o = (type == REDIS_LIST) ? createListObject() : createSetObject();
2698 /* Load every single element of the list/set */
2699 while(listlen--) {
2700 robj *ele;
2701
2702 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2703 tryObjectEncoding(ele);
2704 if (type == REDIS_LIST) {
2705 listAddNodeTail((list*)o->ptr,ele);
2706 } else {
2707 dictAdd((dict*)o->ptr,ele,NULL);
2708 }
2709 }
2710 } else if (type == REDIS_ZSET) {
2711 /* Read list/set value */
2712 uint32_t zsetlen;
2713 zset *zs;
2714
2715 if ((zsetlen = rdbLoadLen(fp,rdbver,NULL)) == REDIS_RDB_LENERR)
2716 goto eoferr;
2717 o = createZsetObject();
2718 zs = o->ptr;
2719 /* Load every single element of the list/set */
2720 while(zsetlen--) {
2721 robj *ele;
2722 double *score = zmalloc(sizeof(double));
2723
2724 if ((ele = rdbLoadStringObject(fp,rdbver)) == NULL) goto eoferr;
2725 tryObjectEncoding(ele);
2726 if (rdbLoadDoubleValue(fp,score) == -1) goto eoferr;
2727 dictAdd(zs->dict,ele,score);
2728 zslInsert(zs->zsl,*score,ele);
2729 incrRefCount(ele); /* added to skiplist */
2730 }
2731 } else {
2732 assert(0 != 0);
2733 }
2734 /* Add the new object in the hash table */
2735 retval = dictAdd(d,keyobj,o);
2736 if (retval == DICT_ERR) {
2737 redisLog(REDIS_WARNING,"Loading DB, duplicated key (%s) found! Unrecoverable error, exiting now.", keyobj->ptr);
2738 exit(1);
2739 }
2740 /* Set the expire time if needed */
2741 if (expiretime != -1) {
2742 setExpire(db,keyobj,expiretime);
2743 /* Delete this key if already expired */
2744 if (expiretime < now) deleteKey(db,keyobj);
2745 expiretime = -1;
2746 }
2747 keyobj = o = NULL;
2748 }
2749 fclose(fp);
2750 return REDIS_OK;
2751
2752 eoferr: /* unexpected end of file is handled here with a fatal exit */
2753 if (keyobj) decrRefCount(keyobj);
2754 redisLog(REDIS_WARNING,"Short read or OOM loading DB. Unrecoverable error, aborting now.");
2755 exit(1);
2756 return REDIS_ERR; /* Just to avoid warning */
2757 }
2758
2759 /*================================== Commands =============================== */
2760
2761 static void authCommand(redisClient *c) {
2762 if (!server.requirepass || !strcmp(c->argv[1]->ptr, server.requirepass)) {
2763 c->authenticated = 1;
2764 addReply(c,shared.ok);
2765 } else {
2766 c->authenticated = 0;
2767 addReplySds(c,sdscatprintf(sdsempty(),"-ERR invalid password\r\n"));
2768 }
2769 }
2770
2771 static void pingCommand(redisClient *c) {
2772 addReply(c,shared.pong);
2773 }
2774
2775 static void echoCommand(redisClient *c) {
2776 addReplyBulkLen(c,c->argv[1]);
2777 addReply(c,c->argv[1]);
2778 addReply(c,shared.crlf);
2779 }
2780
2781 /*=================================== Strings =============================== */
2782
2783 static void setGenericCommand(redisClient *c, int nx) {
2784 int retval;
2785
2786 retval = dictAdd(c->db->dict,c->argv[1],c->argv[2]);
2787 if (retval == DICT_ERR) {
2788 if (!nx) {
2789 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2790 incrRefCount(c->argv[2]);
2791 } else {
2792 addReply(c,shared.czero);
2793 return;
2794 }
2795 } else {
2796 incrRefCount(c->argv[1]);
2797 incrRefCount(c->argv[2]);
2798 }
2799 server.dirty++;
2800 removeExpire(c->db,c->argv[1]);
2801 addReply(c, nx ? shared.cone : shared.ok);
2802 }
2803
2804 static void setCommand(redisClient *c) {
2805 setGenericCommand(c,0);
2806 }
2807
2808 static void setnxCommand(redisClient *c) {
2809 setGenericCommand(c,1);
2810 }
2811
2812 static void getCommand(redisClient *c) {
2813 robj *o = lookupKeyRead(c->db,c->argv[1]);
2814
2815 if (o == NULL) {
2816 addReply(c,shared.nullbulk);
2817 } else {
2818 if (o->type != REDIS_STRING) {
2819 addReply(c,shared.wrongtypeerr);
2820 } else {
2821 addReplyBulkLen(c,o);
2822 addReply(c,o);
2823 addReply(c,shared.crlf);
2824 }
2825 }
2826 }
2827
2828 static void getsetCommand(redisClient *c) {
2829 getCommand(c);
2830 if (dictAdd(c->db->dict,c->argv[1],c->argv[2]) == DICT_ERR) {
2831 dictReplace(c->db->dict,c->argv[1],c->argv[2]);
2832 } else {
2833 incrRefCount(c->argv[1]);
2834 }
2835 incrRefCount(c->argv[2]);
2836 server.dirty++;
2837 removeExpire(c->db,c->argv[1]);
2838 }
2839
2840 static void mgetCommand(redisClient *c) {
2841 int j;
2842
2843 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",c->argc-1));
2844 for (j = 1; j < c->argc; j++) {
2845 robj *o = lookupKeyRead(c->db,c->argv[j]);
2846 if (o == NULL) {
2847 addReply(c,shared.nullbulk);
2848 } else {
2849 if (o->type != REDIS_STRING) {
2850 addReply(c,shared.nullbulk);
2851 } else {
2852 addReplyBulkLen(c,o);
2853 addReply(c,o);
2854 addReply(c,shared.crlf);
2855 }
2856 }
2857 }
2858 }
2859
2860 static void incrDecrCommand(redisClient *c, long long incr) {
2861 long long value;
2862 int retval;
2863 robj *o;
2864
2865 o = lookupKeyWrite(c->db,c->argv[1]);
2866 if (o == NULL) {
2867 value = 0;
2868 } else {
2869 if (o->type != REDIS_STRING) {
2870 value = 0;
2871 } else {
2872 char *eptr;
2873
2874 if (o->encoding == REDIS_ENCODING_RAW)
2875 value = strtoll(o->ptr, &eptr, 10);
2876 else if (o->encoding == REDIS_ENCODING_INT)
2877 value = (long)o->ptr;
2878 else
2879 assert(1 != 1);
2880 }
2881 }
2882
2883 value += incr;
2884 o = createObject(REDIS_STRING,sdscatprintf(sdsempty(),"%lld",value));
2885 tryObjectEncoding(o);
2886 retval = dictAdd(c->db->dict,c->argv[1],o);
2887 if (retval == DICT_ERR) {
2888 dictReplace(c->db->dict,c->argv[1],o);
2889 removeExpire(c->db,c->argv[1]);
2890 } else {
2891 incrRefCount(c->argv[1]);
2892 }
2893 server.dirty++;
2894 addReply(c,shared.colon);
2895 addReply(c,o);
2896 addReply(c,shared.crlf);
2897 }
2898
2899 static void incrCommand(redisClient *c) {
2900 incrDecrCommand(c,1);
2901 }
2902
2903 static void decrCommand(redisClient *c) {
2904 incrDecrCommand(c,-1);
2905 }
2906
2907 static void incrbyCommand(redisClient *c) {
2908 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
2909 incrDecrCommand(c,incr);
2910 }
2911
2912 static void decrbyCommand(redisClient *c) {
2913 long long incr = strtoll(c->argv[2]->ptr, NULL, 10);
2914 incrDecrCommand(c,-incr);
2915 }
2916
2917 /* ========================= Type agnostic commands ========================= */
2918
2919 static void delCommand(redisClient *c) {
2920 int deleted = 0, j;
2921
2922 for (j = 1; j < c->argc; j++) {
2923 if (deleteKey(c->db,c->argv[j])) {
2924 server.dirty++;
2925 deleted++;
2926 }
2927 }
2928 switch(deleted) {
2929 case 0:
2930 addReply(c,shared.czero);
2931 break;
2932 case 1:
2933 addReply(c,shared.cone);
2934 break;
2935 default:
2936 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",deleted));
2937 break;
2938 }
2939 }
2940
2941 static void existsCommand(redisClient *c) {
2942 addReply(c,lookupKeyRead(c->db,c->argv[1]) ? shared.cone : shared.czero);
2943 }
2944
2945 static void selectCommand(redisClient *c) {
2946 int id = atoi(c->argv[1]->ptr);
2947
2948 if (selectDb(c,id) == REDIS_ERR) {
2949 addReplySds(c,sdsnew("-ERR invalid DB index\r\n"));
2950 } else {
2951 addReply(c,shared.ok);
2952 }
2953 }
2954
2955 static void randomkeyCommand(redisClient *c) {
2956 dictEntry *de;
2957
2958 while(1) {
2959 de = dictGetRandomKey(c->db->dict);
2960 if (!de || expireIfNeeded(c->db,dictGetEntryKey(de)) == 0) break;
2961 }
2962 if (de == NULL) {
2963 addReply(c,shared.plus);
2964 addReply(c,shared.crlf);
2965 } else {
2966 addReply(c,shared.plus);
2967 addReply(c,dictGetEntryKey(de));
2968 addReply(c,shared.crlf);
2969 }
2970 }
2971
2972 static void keysCommand(redisClient *c) {
2973 dictIterator *di;
2974 dictEntry *de;
2975 sds pattern = c->argv[1]->ptr;
2976 int plen = sdslen(pattern);
2977 int numkeys = 0, keyslen = 0;
2978 robj *lenobj = createObject(REDIS_STRING,NULL);
2979
2980 di = dictGetIterator(c->db->dict);
2981 addReply(c,lenobj);
2982 decrRefCount(lenobj);
2983 while((de = dictNext(di)) != NULL) {
2984 robj *keyobj = dictGetEntryKey(de);
2985
2986 sds key = keyobj->ptr;
2987 if ((pattern[0] == '*' && pattern[1] == '\0') ||
2988 stringmatchlen(pattern,plen,key,sdslen(key),0)) {
2989 if (expireIfNeeded(c->db,keyobj) == 0) {
2990 if (numkeys != 0)
2991 addReply(c,shared.space);
2992 addReply(c,keyobj);
2993 numkeys++;
2994 keyslen += sdslen(key);
2995 }
2996 }
2997 }
2998 dictReleaseIterator(di);
2999 lenobj->ptr = sdscatprintf(sdsempty(),"$%lu\r\n",keyslen+(numkeys ? (numkeys-1) : 0));
3000 addReply(c,shared.crlf);
3001 }
3002
3003 static void dbsizeCommand(redisClient *c) {
3004 addReplySds(c,
3005 sdscatprintf(sdsempty(),":%lu\r\n",dictSize(c->db->dict)));
3006 }
3007
3008 static void lastsaveCommand(redisClient *c) {
3009 addReplySds(c,
3010 sdscatprintf(sdsempty(),":%lu\r\n",server.lastsave));
3011 }
3012
3013 static void typeCommand(redisClient *c) {
3014 robj *o;
3015 char *type;
3016
3017 o = lookupKeyRead(c->db,c->argv[1]);
3018 if (o == NULL) {
3019 type = "+none";
3020 } else {
3021 switch(o->type) {
3022 case REDIS_STRING: type = "+string"; break;
3023 case REDIS_LIST: type = "+list"; break;
3024 case REDIS_SET: type = "+set"; break;
3025 case REDIS_ZSET: type = "+zset"; break;
3026 default: type = "unknown"; break;
3027 }
3028 }
3029 addReplySds(c,sdsnew(type));
3030 addReply(c,shared.crlf);
3031 }
3032
3033 static void saveCommand(redisClient *c) {
3034 if (server.bgsaveinprogress) {
3035 addReplySds(c,sdsnew("-ERR background save in progress\r\n"));
3036 return;
3037 }
3038 if (rdbSave(server.dbfilename) == REDIS_OK) {
3039 addReply(c,shared.ok);
3040 } else {
3041 addReply(c,shared.err);
3042 }
3043 }
3044
3045 static void bgsaveCommand(redisClient *c) {
3046 if (server.bgsaveinprogress) {
3047 addReplySds(c,sdsnew("-ERR background save already in progress\r\n"));
3048 return;
3049 }
3050 if (rdbSaveBackground(server.dbfilename) == REDIS_OK) {
3051 addReply(c,shared.ok);
3052 } else {
3053 addReply(c,shared.err);
3054 }
3055 }
3056
3057 static void shutdownCommand(redisClient *c) {
3058 redisLog(REDIS_WARNING,"User requested shutdown, saving DB...");
3059 /* Kill the saving child if there is a background saving in progress.
3060 We want to avoid race conditions, for instance our saving child may
3061 overwrite the synchronous saving did by SHUTDOWN. */
3062 if (server.bgsaveinprogress) {
3063 redisLog(REDIS_WARNING,"There is a live saving child. Killing it!");
3064 kill(server.bgsavechildpid,SIGKILL);
3065 rdbRemoveTempFile(server.bgsavechildpid);
3066 }
3067 /* SYNC SAVE */
3068 if (rdbSave(server.dbfilename) == REDIS_OK) {
3069 if (server.daemonize)
3070 unlink(server.pidfile);
3071 redisLog(REDIS_WARNING,"%zu bytes used at exit",zmalloc_used_memory());
3072 redisLog(REDIS_WARNING,"Server exit now, bye bye...");
3073 exit(1);
3074 } else {
3075 /* Ooops.. error saving! The best we can do is to continue operating.
3076 * Note that if there was a background saving process, in the next
3077 * cron() Redis will be notified that the background saving aborted,
3078 * handling special stuff like slaves pending for synchronization... */
3079 redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit");
3080 addReplySds(c,sdsnew("-ERR can't quit, problems saving the DB\r\n"));
3081 }
3082 }
3083
3084 static void renameGenericCommand(redisClient *c, int nx) {
3085 robj *o;
3086
3087 /* To use the same key as src and dst is probably an error */
3088 if (sdscmp(c->argv[1]->ptr,c->argv[2]->ptr) == 0) {
3089 addReply(c,shared.sameobjecterr);
3090 return;
3091 }
3092
3093 o = lookupKeyWrite(c->db,c->argv[1]);
3094 if (o == NULL) {
3095 addReply(c,shared.nokeyerr);
3096 return;
3097 }
3098 incrRefCount(o);
3099 deleteIfVolatile(c->db,c->argv[2]);
3100 if (dictAdd(c->db->dict,c->argv[2],o) == DICT_ERR) {
3101 if (nx) {
3102 decrRefCount(o);
3103 addReply(c,shared.czero);
3104 return;
3105 }
3106 dictReplace(c->db->dict,c->argv[2],o);
3107 } else {
3108 incrRefCount(c->argv[2]);
3109 }
3110 deleteKey(c->db,c->argv[1]);
3111 server.dirty++;
3112 addReply(c,nx ? shared.cone : shared.ok);
3113 }
3114
3115 static void renameCommand(redisClient *c) {
3116 renameGenericCommand(c,0);
3117 }
3118
3119 static void renamenxCommand(redisClient *c) {
3120 renameGenericCommand(c,1);
3121 }
3122
3123 static void moveCommand(redisClient *c) {
3124 robj *o;
3125 redisDb *src, *dst;
3126 int srcid;
3127
3128 /* Obtain source and target DB pointers */
3129 src = c->db;
3130 srcid = c->db->id;
3131 if (selectDb(c,atoi(c->argv[2]->ptr)) == REDIS_ERR) {
3132 addReply(c,shared.outofrangeerr);
3133 return;
3134 }
3135 dst = c->db;
3136 selectDb(c,srcid); /* Back to the source DB */
3137
3138 /* If the user is moving using as target the same
3139 * DB as the source DB it is probably an error. */
3140 if (src == dst) {
3141 addReply(c,shared.sameobjecterr);
3142 return;
3143 }
3144
3145 /* Check if the element exists and get a reference */
3146 o = lookupKeyWrite(c->db,c->argv[1]);
3147 if (!o) {
3148 addReply(c,shared.czero);
3149 return;
3150 }
3151
3152 /* Try to add the element to the target DB */
3153 deleteIfVolatile(dst,c->argv[1]);
3154 if (dictAdd(dst->dict,c->argv[1],o) == DICT_ERR) {
3155 addReply(c,shared.czero);
3156 return;
3157 }
3158 incrRefCount(c->argv[1]);
3159 incrRefCount(o);
3160
3161 /* OK! key moved, free the entry in the source DB */
3162 deleteKey(src,c->argv[1]);
3163 server.dirty++;
3164 addReply(c,shared.cone);
3165 }
3166
3167 /* =================================== Lists ================================ */
3168 static void pushGenericCommand(redisClient *c, int where) {
3169 robj *lobj;
3170 list *list;
3171
3172 lobj = lookupKeyWrite(c->db,c->argv[1]);
3173 if (lobj == NULL) {
3174 lobj = createListObject();
3175 list = lobj->ptr;
3176 if (where == REDIS_HEAD) {
3177 listAddNodeHead(list,c->argv[2]);
3178 } else {
3179 listAddNodeTail(list,c->argv[2]);
3180 }
3181 dictAdd(c->db->dict,c->argv[1],lobj);
3182 incrRefCount(c->argv[1]);
3183 incrRefCount(c->argv[2]);
3184 } else {
3185 if (lobj->type != REDIS_LIST) {
3186 addReply(c,shared.wrongtypeerr);
3187 return;
3188 }
3189 list = lobj->ptr;
3190 if (where == REDIS_HEAD) {
3191 listAddNodeHead(list,c->argv[2]);
3192 } else {
3193 listAddNodeTail(list,c->argv[2]);
3194 }
3195 incrRefCount(c->argv[2]);
3196 }
3197 server.dirty++;
3198 addReply(c,shared.ok);
3199 }
3200
3201 static void lpushCommand(redisClient *c) {
3202 pushGenericCommand(c,REDIS_HEAD);
3203 }
3204
3205 static void rpushCommand(redisClient *c) {
3206 pushGenericCommand(c,REDIS_TAIL);
3207 }
3208
3209 static void llenCommand(redisClient *c) {
3210 robj *o;
3211 list *l;
3212
3213 o = lookupKeyRead(c->db,c->argv[1]);
3214 if (o == NULL) {
3215 addReply(c,shared.czero);
3216 return;
3217 } else {
3218 if (o->type != REDIS_LIST) {
3219 addReply(c,shared.wrongtypeerr);
3220 } else {
3221 l = o->ptr;
3222 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",listLength(l)));
3223 }
3224 }
3225 }
3226
3227 static void lindexCommand(redisClient *c) {
3228 robj *o;
3229 int index = atoi(c->argv[2]->ptr);
3230
3231 o = lookupKeyRead(c->db,c->argv[1]);
3232 if (o == NULL) {
3233 addReply(c,shared.nullbulk);
3234 } else {
3235 if (o->type != REDIS_LIST) {
3236 addReply(c,shared.wrongtypeerr);
3237 } else {
3238 list *list = o->ptr;
3239 listNode *ln;
3240
3241 ln = listIndex(list, index);
3242 if (ln == NULL) {
3243 addReply(c,shared.nullbulk);
3244 } else {
3245 robj *ele = listNodeValue(ln);
3246 addReplyBulkLen(c,ele);
3247 addReply(c,ele);
3248 addReply(c,shared.crlf);
3249 }
3250 }
3251 }
3252 }
3253
3254 static void lsetCommand(redisClient *c) {
3255 robj *o;
3256 int index = atoi(c->argv[2]->ptr);
3257
3258 o = lookupKeyWrite(c->db,c->argv[1]);
3259 if (o == NULL) {
3260 addReply(c,shared.nokeyerr);
3261 } else {
3262 if (o->type != REDIS_LIST) {
3263 addReply(c,shared.wrongtypeerr);
3264 } else {
3265 list *list = o->ptr;
3266 listNode *ln;
3267
3268 ln = listIndex(list, index);
3269 if (ln == NULL) {
3270 addReply(c,shared.outofrangeerr);
3271 } else {
3272 robj *ele = listNodeValue(ln);
3273
3274 decrRefCount(ele);
3275 listNodeValue(ln) = c->argv[3];
3276 incrRefCount(c->argv[3]);
3277 addReply(c,shared.ok);
3278 server.dirty++;
3279 }
3280 }
3281 }
3282 }
3283
3284 static void popGenericCommand(redisClient *c, int where) {
3285 robj *o;
3286
3287 o = lookupKeyWrite(c->db,c->argv[1]);
3288 if (o == NULL) {
3289 addReply(c,shared.nullbulk);
3290 } else {
3291 if (o->type != REDIS_LIST) {
3292 addReply(c,shared.wrongtypeerr);
3293 } else {
3294 list *list = o->ptr;
3295 listNode *ln;
3296
3297 if (where == REDIS_HEAD)
3298 ln = listFirst(list);
3299 else
3300 ln = listLast(list);
3301
3302 if (ln == NULL) {
3303 addReply(c,shared.nullbulk);
3304 } else {
3305 robj *ele = listNodeValue(ln);
3306 addReplyBulkLen(c,ele);
3307 addReply(c,ele);
3308 addReply(c,shared.crlf);
3309 listDelNode(list,ln);
3310 server.dirty++;
3311 }
3312 }
3313 }
3314 }
3315
3316 static void lpopCommand(redisClient *c) {
3317 popGenericCommand(c,REDIS_HEAD);
3318 }
3319
3320 static void rpopCommand(redisClient *c) {
3321 popGenericCommand(c,REDIS_TAIL);
3322 }
3323
3324 static void lrangeCommand(redisClient *c) {
3325 robj *o;
3326 int start = atoi(c->argv[2]->ptr);
3327 int end = atoi(c->argv[3]->ptr);
3328
3329 o = lookupKeyRead(c->db,c->argv[1]);
3330 if (o == NULL) {
3331 addReply(c,shared.nullmultibulk);
3332 } else {
3333 if (o->type != REDIS_LIST) {
3334 addReply(c,shared.wrongtypeerr);
3335 } else {
3336 list *list = o->ptr;
3337 listNode *ln;
3338 int llen = listLength(list);
3339 int rangelen, j;
3340 robj *ele;
3341
3342 /* convert negative indexes */
3343 if (start < 0) start = llen+start;
3344 if (end < 0) end = llen+end;
3345 if (start < 0) start = 0;
3346 if (end < 0) end = 0;
3347
3348 /* indexes sanity checks */
3349 if (start > end || start >= llen) {
3350 /* Out of range start or start > end result in empty list */
3351 addReply(c,shared.emptymultibulk);
3352 return;
3353 }
3354 if (end >= llen) end = llen-1;
3355 rangelen = (end-start)+1;
3356
3357 /* Return the result in form of a multi-bulk reply */
3358 ln = listIndex(list, start);
3359 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
3360 for (j = 0; j < rangelen; j++) {
3361 ele = listNodeValue(ln);
3362 addReplyBulkLen(c,ele);
3363 addReply(c,ele);
3364 addReply(c,shared.crlf);
3365 ln = ln->next;
3366 }
3367 }
3368 }
3369 }
3370
3371 static void ltrimCommand(redisClient *c) {
3372 robj *o;
3373 int start = atoi(c->argv[2]->ptr);
3374 int end = atoi(c->argv[3]->ptr);
3375
3376 o = lookupKeyWrite(c->db,c->argv[1]);
3377 if (o == NULL) {
3378 addReply(c,shared.nokeyerr);
3379 } else {
3380 if (o->type != REDIS_LIST) {
3381 addReply(c,shared.wrongtypeerr);
3382 } else {
3383 list *list = o->ptr;
3384 listNode *ln;
3385 int llen = listLength(list);
3386 int j, ltrim, rtrim;
3387
3388 /* convert negative indexes */
3389 if (start < 0) start = llen+start;
3390 if (end < 0) end = llen+end;
3391 if (start < 0) start = 0;
3392 if (end < 0) end = 0;
3393
3394 /* indexes sanity checks */
3395 if (start > end || start >= llen) {
3396 /* Out of range start or start > end result in empty list */
3397 ltrim = llen;
3398 rtrim = 0;
3399 } else {
3400 if (end >= llen) end = llen-1;
3401 ltrim = start;
3402 rtrim = llen-end-1;
3403 }
3404
3405 /* Remove list elements to perform the trim */
3406 for (j = 0; j < ltrim; j++) {
3407 ln = listFirst(list);
3408 listDelNode(list,ln);
3409 }
3410 for (j = 0; j < rtrim; j++) {
3411 ln = listLast(list);
3412 listDelNode(list,ln);
3413 }
3414 server.dirty++;
3415 addReply(c,shared.ok);
3416 }
3417 }
3418 }
3419
3420 static void lremCommand(redisClient *c) {
3421 robj *o;
3422
3423 o = lookupKeyWrite(c->db,c->argv[1]);
3424 if (o == NULL) {
3425 addReply(c,shared.czero);
3426 } else {
3427 if (o->type != REDIS_LIST) {
3428 addReply(c,shared.wrongtypeerr);
3429 } else {
3430 list *list = o->ptr;
3431 listNode *ln, *next;
3432 int toremove = atoi(c->argv[2]->ptr);
3433 int removed = 0;
3434 int fromtail = 0;
3435
3436 if (toremove < 0) {
3437 toremove = -toremove;
3438 fromtail = 1;
3439 }
3440 ln = fromtail ? list->tail : list->head;
3441 while (ln) {
3442 robj *ele = listNodeValue(ln);
3443
3444 next = fromtail ? ln->prev : ln->next;
3445 if (compareStringObjects(ele,c->argv[3]) == 0) {
3446 listDelNode(list,ln);
3447 server.dirty++;
3448 removed++;
3449 if (toremove && removed == toremove) break;
3450 }
3451 ln = next;
3452 }
3453 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",removed));
3454 }
3455 }
3456 }
3457
3458 /* ==================================== Sets ================================ */
3459
3460 static void saddCommand(redisClient *c) {
3461 robj *set;
3462
3463 set = lookupKeyWrite(c->db,c->argv[1]);
3464 if (set == NULL) {
3465 set = createSetObject();
3466 dictAdd(c->db->dict,c->argv[1],set);
3467 incrRefCount(c->argv[1]);
3468 } else {
3469 if (set->type != REDIS_SET) {
3470 addReply(c,shared.wrongtypeerr);
3471 return;
3472 }
3473 }
3474 if (dictAdd(set->ptr,c->argv[2],NULL) == DICT_OK) {
3475 incrRefCount(c->argv[2]);
3476 server.dirty++;
3477 addReply(c,shared.cone);
3478 } else {
3479 addReply(c,shared.czero);
3480 }
3481 }
3482
3483 static void sremCommand(redisClient *c) {
3484 robj *set;
3485
3486 set = lookupKeyWrite(c->db,c->argv[1]);
3487 if (set == NULL) {
3488 addReply(c,shared.czero);
3489 } else {
3490 if (set->type != REDIS_SET) {
3491 addReply(c,shared.wrongtypeerr);
3492 return;
3493 }
3494 if (dictDelete(set->ptr,c->argv[2]) == DICT_OK) {
3495 server.dirty++;
3496 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3497 addReply(c,shared.cone);
3498 } else {
3499 addReply(c,shared.czero);
3500 }
3501 }
3502 }
3503
3504 static void smoveCommand(redisClient *c) {
3505 robj *srcset, *dstset;
3506
3507 srcset = lookupKeyWrite(c->db,c->argv[1]);
3508 dstset = lookupKeyWrite(c->db,c->argv[2]);
3509
3510 /* If the source key does not exist return 0, if it's of the wrong type
3511 * raise an error */
3512 if (srcset == NULL || srcset->type != REDIS_SET) {
3513 addReply(c, srcset ? shared.wrongtypeerr : shared.czero);
3514 return;
3515 }
3516 /* Error if the destination key is not a set as well */
3517 if (dstset && dstset->type != REDIS_SET) {
3518 addReply(c,shared.wrongtypeerr);
3519 return;
3520 }
3521 /* Remove the element from the source set */
3522 if (dictDelete(srcset->ptr,c->argv[3]) == DICT_ERR) {
3523 /* Key not found in the src set! return zero */
3524 addReply(c,shared.czero);
3525 return;
3526 }
3527 server.dirty++;
3528 /* Add the element to the destination set */
3529 if (!dstset) {
3530 dstset = createSetObject();
3531 dictAdd(c->db->dict,c->argv[2],dstset);
3532 incrRefCount(c->argv[2]);
3533 }
3534 if (dictAdd(dstset->ptr,c->argv[3],NULL) == DICT_OK)
3535 incrRefCount(c->argv[3]);
3536 addReply(c,shared.cone);
3537 }
3538
3539 static void sismemberCommand(redisClient *c) {
3540 robj *set;
3541
3542 set = lookupKeyRead(c->db,c->argv[1]);
3543 if (set == NULL) {
3544 addReply(c,shared.czero);
3545 } else {
3546 if (set->type != REDIS_SET) {
3547 addReply(c,shared.wrongtypeerr);
3548 return;
3549 }
3550 if (dictFind(set->ptr,c->argv[2]))
3551 addReply(c,shared.cone);
3552 else
3553 addReply(c,shared.czero);
3554 }
3555 }
3556
3557 static void scardCommand(redisClient *c) {
3558 robj *o;
3559 dict *s;
3560
3561 o = lookupKeyRead(c->db,c->argv[1]);
3562 if (o == NULL) {
3563 addReply(c,shared.czero);
3564 return;
3565 } else {
3566 if (o->type != REDIS_SET) {
3567 addReply(c,shared.wrongtypeerr);
3568 } else {
3569 s = o->ptr;
3570 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3571 dictSize(s)));
3572 }
3573 }
3574 }
3575
3576 static void spopCommand(redisClient *c) {
3577 robj *set;
3578 dictEntry *de;
3579
3580 set = lookupKeyWrite(c->db,c->argv[1]);
3581 if (set == NULL) {
3582 addReply(c,shared.nullbulk);
3583 } else {
3584 if (set->type != REDIS_SET) {
3585 addReply(c,shared.wrongtypeerr);
3586 return;
3587 }
3588 de = dictGetRandomKey(set->ptr);
3589 if (de == NULL) {
3590 addReply(c,shared.nullbulk);
3591 } else {
3592 robj *ele = dictGetEntryKey(de);
3593
3594 addReplyBulkLen(c,ele);
3595 addReply(c,ele);
3596 addReply(c,shared.crlf);
3597 dictDelete(set->ptr,ele);
3598 if (htNeedsResize(set->ptr)) dictResize(set->ptr);
3599 server.dirty++;
3600 }
3601 }
3602 }
3603
3604 static void srandmemberCommand(redisClient *c) {
3605 robj *set;
3606 dictEntry *de;
3607
3608 set = lookupKeyRead(c->db,c->argv[1]);
3609 if (set == NULL) {
3610 addReply(c,shared.nullbulk);
3611 } else {
3612 if (set->type != REDIS_SET) {
3613 addReply(c,shared.wrongtypeerr);
3614 return;
3615 }
3616 de = dictGetRandomKey(set->ptr);
3617 if (de == NULL) {
3618 addReply(c,shared.nullbulk);
3619 } else {
3620 robj *ele = dictGetEntryKey(de);
3621
3622 addReplyBulkLen(c,ele);
3623 addReply(c,ele);
3624 addReply(c,shared.crlf);
3625 }
3626 }
3627 }
3628
3629 static int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
3630 dict **d1 = (void*) s1, **d2 = (void*) s2;
3631
3632 return dictSize(*d1)-dictSize(*d2);
3633 }
3634
3635 static void sinterGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey) {
3636 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3637 dictIterator *di;
3638 dictEntry *de;
3639 robj *lenobj = NULL, *dstset = NULL;
3640 int j, cardinality = 0;
3641
3642 for (j = 0; j < setsnum; j++) {
3643 robj *setobj;
3644
3645 setobj = dstkey ?
3646 lookupKeyWrite(c->db,setskeys[j]) :
3647 lookupKeyRead(c->db,setskeys[j]);
3648 if (!setobj) {
3649 zfree(dv);
3650 if (dstkey) {
3651 deleteKey(c->db,dstkey);
3652 addReply(c,shared.ok);
3653 } else {
3654 addReply(c,shared.nullmultibulk);
3655 }
3656 return;
3657 }
3658 if (setobj->type != REDIS_SET) {
3659 zfree(dv);
3660 addReply(c,shared.wrongtypeerr);
3661 return;
3662 }
3663 dv[j] = setobj->ptr;
3664 }
3665 /* Sort sets from the smallest to largest, this will improve our
3666 * algorithm's performace */
3667 qsort(dv,setsnum,sizeof(dict*),qsortCompareSetsByCardinality);
3668
3669 /* The first thing we should output is the total number of elements...
3670 * since this is a multi-bulk write, but at this stage we don't know
3671 * the intersection set size, so we use a trick, append an empty object
3672 * to the output list and save the pointer to later modify it with the
3673 * right length */
3674 if (!dstkey) {
3675 lenobj = createObject(REDIS_STRING,NULL);
3676 addReply(c,lenobj);
3677 decrRefCount(lenobj);
3678 } else {
3679 /* If we have a target key where to store the resulting set
3680 * create this key with an empty set inside */
3681 dstset = createSetObject();
3682 }
3683
3684 /* Iterate all the elements of the first (smallest) set, and test
3685 * the element against all the other sets, if at least one set does
3686 * not include the element it is discarded */
3687 di = dictGetIterator(dv[0]);
3688
3689 while((de = dictNext(di)) != NULL) {
3690 robj *ele;
3691
3692 for (j = 1; j < setsnum; j++)
3693 if (dictFind(dv[j],dictGetEntryKey(de)) == NULL) break;
3694 if (j != setsnum)
3695 continue; /* at least one set does not contain the member */
3696 ele = dictGetEntryKey(de);
3697 if (!dstkey) {
3698 addReplyBulkLen(c,ele);
3699 addReply(c,ele);
3700 addReply(c,shared.crlf);
3701 cardinality++;
3702 } else {
3703 dictAdd(dstset->ptr,ele,NULL);
3704 incrRefCount(ele);
3705 }
3706 }
3707 dictReleaseIterator(di);
3708
3709 if (dstkey) {
3710 /* Store the resulting set into the target */
3711 deleteKey(c->db,dstkey);
3712 dictAdd(c->db->dict,dstkey,dstset);
3713 incrRefCount(dstkey);
3714 }
3715
3716 if (!dstkey) {
3717 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",cardinality);
3718 } else {
3719 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3720 dictSize((dict*)dstset->ptr)));
3721 server.dirty++;
3722 }
3723 zfree(dv);
3724 }
3725
3726 static void sinterCommand(redisClient *c) {
3727 sinterGenericCommand(c,c->argv+1,c->argc-1,NULL);
3728 }
3729
3730 static void sinterstoreCommand(redisClient *c) {
3731 sinterGenericCommand(c,c->argv+2,c->argc-2,c->argv[1]);
3732 }
3733
3734 #define REDIS_OP_UNION 0
3735 #define REDIS_OP_DIFF 1
3736
3737 static void sunionDiffGenericCommand(redisClient *c, robj **setskeys, int setsnum, robj *dstkey, int op) {
3738 dict **dv = zmalloc(sizeof(dict*)*setsnum);
3739 dictIterator *di;
3740 dictEntry *de;
3741 robj *dstset = NULL;
3742 int j, cardinality = 0;
3743
3744 for (j = 0; j < setsnum; j++) {
3745 robj *setobj;
3746
3747 setobj = dstkey ?
3748 lookupKeyWrite(c->db,setskeys[j]) :
3749 lookupKeyRead(c->db,setskeys[j]);
3750 if (!setobj) {
3751 dv[j] = NULL;
3752 continue;
3753 }
3754 if (setobj->type != REDIS_SET) {
3755 zfree(dv);
3756 addReply(c,shared.wrongtypeerr);
3757 return;
3758 }
3759 dv[j] = setobj->ptr;
3760 }
3761
3762 /* We need a temp set object to store our union. If the dstkey
3763 * is not NULL (that is, we are inside an SUNIONSTORE operation) then
3764 * this set object will be the resulting object to set into the target key*/
3765 dstset = createSetObject();
3766
3767 /* Iterate all the elements of all the sets, add every element a single
3768 * time to the result set */
3769 for (j = 0; j < setsnum; j++) {
3770 if (op == REDIS_OP_DIFF && j == 0 && !dv[j]) break; /* result set is empty */
3771 if (!dv[j]) continue; /* non existing keys are like empty sets */
3772
3773 di = dictGetIterator(dv[j]);
3774
3775 while((de = dictNext(di)) != NULL) {
3776 robj *ele;
3777
3778 /* dictAdd will not add the same element multiple times */
3779 ele = dictGetEntryKey(de);
3780 if (op == REDIS_OP_UNION || j == 0) {
3781 if (dictAdd(dstset->ptr,ele,NULL) == DICT_OK) {
3782 incrRefCount(ele);
3783 cardinality++;
3784 }
3785 } else if (op == REDIS_OP_DIFF) {
3786 if (dictDelete(dstset->ptr,ele) == DICT_OK) {
3787 cardinality--;
3788 }
3789 }
3790 }
3791 dictReleaseIterator(di);
3792
3793 if (op == REDIS_OP_DIFF && cardinality == 0) break; /* result set is empty */
3794 }
3795
3796 /* Output the content of the resulting set, if not in STORE mode */
3797 if (!dstkey) {
3798 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",cardinality));
3799 di = dictGetIterator(dstset->ptr);
3800 while((de = dictNext(di)) != NULL) {
3801 robj *ele;
3802
3803 ele = dictGetEntryKey(de);
3804 addReplyBulkLen(c,ele);
3805 addReply(c,ele);
3806 addReply(c,shared.crlf);
3807 }
3808 dictReleaseIterator(di);
3809 } else {
3810 /* If we have a target key where to store the resulting set
3811 * create this key with the result set inside */
3812 deleteKey(c->db,dstkey);
3813 dictAdd(c->db->dict,dstkey,dstset);
3814 incrRefCount(dstkey);
3815 }
3816
3817 /* Cleanup */
3818 if (!dstkey) {
3819 decrRefCount(dstset);
3820 } else {
3821 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",
3822 dictSize((dict*)dstset->ptr)));
3823 server.dirty++;
3824 }
3825 zfree(dv);
3826 }
3827
3828 static void sunionCommand(redisClient *c) {
3829 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_UNION);
3830 }
3831
3832 static void sunionstoreCommand(redisClient *c) {
3833 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_UNION);
3834 }
3835
3836 static void sdiffCommand(redisClient *c) {
3837 sunionDiffGenericCommand(c,c->argv+1,c->argc-1,NULL,REDIS_OP_DIFF);
3838 }
3839
3840 static void sdiffstoreCommand(redisClient *c) {
3841 sunionDiffGenericCommand(c,c->argv+2,c->argc-2,c->argv[1],REDIS_OP_DIFF);
3842 }
3843
3844 /* ==================================== ZSets =============================== */
3845
3846 /* ZSETs are ordered sets using two data structures to hold the same elements
3847 * in order to get O(log(N)) INSERT and REMOVE operations into a sorted
3848 * data structure.
3849 *
3850 * The elements are added to an hash table mapping Redis objects to scores.
3851 * At the same time the elements are added to a skip list mapping scores
3852 * to Redis objects (so objects are sorted by scores in this "view"). */
3853
3854 /* This skiplist implementation is almost a C translation of the original
3855 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
3856 * Alternative to Balanced Trees", modified in three ways:
3857 * a) this implementation allows for repeated values.
3858 * b) the comparison is not just by key (our 'score') but by satellite data.
3859 * c) there is a back pointer, so it's a doubly linked list with the back
3860 * pointers being only at "level 1". This allows to traverse the list
3861 * from tail to head, useful for ZREVRANGE. */
3862
3863 static zskiplistNode *zslCreateNode(int level, double score, robj *obj) {
3864 zskiplistNode *zn = zmalloc(sizeof(*zn));
3865
3866 zn->forward = zmalloc(sizeof(zskiplistNode*) * level);
3867 zn->score = score;
3868 zn->obj = obj;
3869 return zn;
3870 }
3871
3872 static zskiplist *zslCreate(void) {
3873 int j;
3874 zskiplist *zsl;
3875
3876 zsl = zmalloc(sizeof(*zsl));
3877 zsl->level = 1;
3878 zsl->length = 0;
3879 zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
3880 for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++)
3881 zsl->header->forward[j] = NULL;
3882 zsl->header->backward = NULL;
3883 zsl->tail = NULL;
3884 return zsl;
3885 }
3886
3887 static void zslFreeNode(zskiplistNode *node) {
3888 decrRefCount(node->obj);
3889 zfree(node->forward);
3890 zfree(node);
3891 }
3892
3893 static void zslFree(zskiplist *zsl) {
3894 zskiplistNode *node = zsl->header->forward[0], *next;
3895
3896 zfree(zsl->header->forward);
3897 zfree(zsl->header);
3898 while(node) {
3899 next = node->forward[0];
3900 zslFreeNode(node);
3901 node = next;
3902 }
3903 zfree(zsl);
3904 }
3905
3906 static int zslRandomLevel(void) {
3907 int level = 1;
3908 while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
3909 level += 1;
3910 return level;
3911 }
3912
3913 static void zslInsert(zskiplist *zsl, double score, robj *obj) {
3914 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
3915 int i, level;
3916
3917 x = zsl->header;
3918 for (i = zsl->level-1; i >= 0; i--) {
3919 while (x->forward[i] &&
3920 (x->forward[i]->score < score ||
3921 (x->forward[i]->score == score &&
3922 compareStringObjects(x->forward[i]->obj,obj) < 0)))
3923 x = x->forward[i];
3924 update[i] = x;
3925 }
3926 /* we assume the key is not already inside, since we allow duplicated
3927 * scores, and the re-insertion of score and redis object should never
3928 * happpen since the caller of zslInsert() should test in the hash table
3929 * if the element is already inside or not. */
3930 level = zslRandomLevel();
3931 if (level > zsl->level) {
3932 for (i = zsl->level; i < level; i++)
3933 update[i] = zsl->header;
3934 zsl->level = level;
3935 }
3936 x = zslCreateNode(level,score,obj);
3937 for (i = 0; i < level; i++) {
3938 x->forward[i] = update[i]->forward[i];
3939 update[i]->forward[i] = x;
3940 }
3941 x->backward = (update[0] == zsl->header) ? NULL : update[0];
3942 if (x->forward[0])
3943 x->forward[0]->backward = x;
3944 else
3945 zsl->tail = x;
3946 zsl->length++;
3947 }
3948
3949 /* Delete an element with matching score/object from the skiplist. */
3950 static int zslDelete(zskiplist *zsl, double score, robj *obj) {
3951 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
3952 int i;
3953
3954 x = zsl->header;
3955 for (i = zsl->level-1; i >= 0; i--) {
3956 while (x->forward[i] &&
3957 (x->forward[i]->score < score ||
3958 (x->forward[i]->score == score &&
3959 compareStringObjects(x->forward[i]->obj,obj) < 0)))
3960 x = x->forward[i];
3961 update[i] = x;
3962 }
3963 /* We may have multiple elements with the same score, what we need
3964 * is to find the element with both the right score and object. */
3965 x = x->forward[0];
3966 if (x && score == x->score && compareStringObjects(x->obj,obj) == 0) {
3967 for (i = 0; i < zsl->level; i++) {
3968 if (update[i]->forward[i] != x) break;
3969 update[i]->forward[i] = x->forward[i];
3970 }
3971 if (x->forward[0]) {
3972 x->forward[0]->backward = (x->backward == zsl->header) ?
3973 NULL : x->backward;
3974 } else {
3975 zsl->tail = x->backward;
3976 }
3977 zslFreeNode(x);
3978 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
3979 zsl->level--;
3980 zsl->length--;
3981 return 1;
3982 } else {
3983 return 0; /* not found */
3984 }
3985 return 0; /* not found */
3986 }
3987
3988 /* Delete all the elements with score between min and max from the skiplist.
3989 * Min and mx are inclusive, so a score >= min || score <= max is deleted.
3990 * Note that this function takes the reference to the hash table view of the
3991 * sorted set, in order to remove the elements from the hash table too. */
3992 static unsigned long zslDeleteRange(zskiplist *zsl, double min, double max, dict *dict) {
3993 zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
3994 unsigned long removed = 0;
3995 int i;
3996
3997 x = zsl->header;
3998 for (i = zsl->level-1; i >= 0; i--) {
3999 while (x->forward[i] && x->forward[i]->score < min)
4000 x = x->forward[i];
4001 update[i] = x;
4002 }
4003 /* We may have multiple elements with the same score, what we need
4004 * is to find the element with both the right score and object. */
4005 x = x->forward[0];
4006 while (x && x->score <= max) {
4007 zskiplistNode *next;
4008
4009 for (i = 0; i < zsl->level; i++) {
4010 if (update[i]->forward[i] != x) break;
4011 update[i]->forward[i] = x->forward[i];
4012 }
4013 if (x->forward[0]) {
4014 x->forward[0]->backward = (x->backward == zsl->header) ?
4015 NULL : x->backward;
4016 } else {
4017 zsl->tail = x->backward;
4018 }
4019 next = x->forward[0];
4020 dictDelete(dict,x->obj);
4021 zslFreeNode(x);
4022 while(zsl->level > 1 && zsl->header->forward[zsl->level-1] == NULL)
4023 zsl->level--;
4024 zsl->length--;
4025 removed++;
4026 x = next;
4027 }
4028 return removed; /* not found */
4029 }
4030
4031 /* Find the first node having a score equal or greater than the specified one.
4032 * Returns NULL if there is no match. */
4033 static zskiplistNode *zslFirstWithScore(zskiplist *zsl, double score) {
4034 zskiplistNode *x;
4035 int i;
4036
4037 x = zsl->header;
4038 for (i = zsl->level-1; i >= 0; i--) {
4039 while (x->forward[i] && x->forward[i]->score < score)
4040 x = x->forward[i];
4041 }
4042 /* We may have multiple elements with the same score, what we need
4043 * is to find the element with both the right score and object. */
4044 return x->forward[0];
4045 }
4046
4047 /* The actual Z-commands implementations */
4048
4049 static void zaddCommand(redisClient *c) {
4050 robj *zsetobj;
4051 zset *zs;
4052 double *score;
4053
4054 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4055 if (zsetobj == NULL) {
4056 zsetobj = createZsetObject();
4057 dictAdd(c->db->dict,c->argv[1],zsetobj);
4058 incrRefCount(c->argv[1]);
4059 } else {
4060 if (zsetobj->type != REDIS_ZSET) {
4061 addReply(c,shared.wrongtypeerr);
4062 return;
4063 }
4064 }
4065 score = zmalloc(sizeof(double));
4066 *score = strtod(c->argv[2]->ptr,NULL);
4067 zs = zsetobj->ptr;
4068 if (dictAdd(zs->dict,c->argv[3],score) == DICT_OK) {
4069 /* case 1: New element */
4070 incrRefCount(c->argv[3]); /* added to hash */
4071 zslInsert(zs->zsl,*score,c->argv[3]);
4072 incrRefCount(c->argv[3]); /* added to skiplist */
4073 server.dirty++;
4074 addReply(c,shared.cone);
4075 } else {
4076 dictEntry *de;
4077 double *oldscore;
4078
4079 /* case 2: Score update operation */
4080 de = dictFind(zs->dict,c->argv[3]);
4081 assert(de != NULL);
4082 oldscore = dictGetEntryVal(de);
4083 if (*score != *oldscore) {
4084 int deleted;
4085
4086 deleted = zslDelete(zs->zsl,*oldscore,c->argv[3]);
4087 assert(deleted != 0);
4088 zslInsert(zs->zsl,*score,c->argv[3]);
4089 incrRefCount(c->argv[3]);
4090 dictReplace(zs->dict,c->argv[3],score);
4091 server.dirty++;
4092 } else {
4093 zfree(score);
4094 }
4095 addReply(c,shared.czero);
4096 }
4097 }
4098
4099 static void zremCommand(redisClient *c) {
4100 robj *zsetobj;
4101 zset *zs;
4102
4103 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4104 if (zsetobj == NULL) {
4105 addReply(c,shared.czero);
4106 } else {
4107 dictEntry *de;
4108 double *oldscore;
4109 int deleted;
4110
4111 if (zsetobj->type != REDIS_ZSET) {
4112 addReply(c,shared.wrongtypeerr);
4113 return;
4114 }
4115 zs = zsetobj->ptr;
4116 de = dictFind(zs->dict,c->argv[2]);
4117 if (de == NULL) {
4118 addReply(c,shared.czero);
4119 return;
4120 }
4121 /* Delete from the skiplist */
4122 oldscore = dictGetEntryVal(de);
4123 deleted = zslDelete(zs->zsl,*oldscore,c->argv[2]);
4124 assert(deleted != 0);
4125
4126 /* Delete from the hash table */
4127 dictDelete(zs->dict,c->argv[2]);
4128 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4129 server.dirty++;
4130 addReply(c,shared.cone);
4131 }
4132 }
4133
4134 static void zremrangebyscoreCommand(redisClient *c) {
4135 double min = strtod(c->argv[2]->ptr,NULL);
4136 double max = strtod(c->argv[3]->ptr,NULL);
4137 robj *zsetobj;
4138 zset *zs;
4139
4140 zsetobj = lookupKeyWrite(c->db,c->argv[1]);
4141 if (zsetobj == NULL) {
4142 addReply(c,shared.czero);
4143 } else {
4144 long deleted;
4145
4146 if (zsetobj->type != REDIS_ZSET) {
4147 addReply(c,shared.wrongtypeerr);
4148 return;
4149 }
4150 zs = zsetobj->ptr;
4151 deleted = zslDeleteRange(zs->zsl,min,max,zs->dict);
4152 if (htNeedsResize(zs->dict)) dictResize(zs->dict);
4153 server.dirty += deleted;
4154 addReplySds(c,sdscatprintf(sdsempty(),":%lu\r\n",deleted));
4155 }
4156 }
4157
4158 static void zrangeGenericCommand(redisClient *c, int reverse) {
4159 robj *o;
4160 int start = atoi(c->argv[2]->ptr);
4161 int end = atoi(c->argv[3]->ptr);
4162
4163 o = lookupKeyRead(c->db,c->argv[1]);
4164 if (o == NULL) {
4165 addReply(c,shared.nullmultibulk);
4166 } else {
4167 if (o->type != REDIS_ZSET) {
4168 addReply(c,shared.wrongtypeerr);
4169 } else {
4170 zset *zsetobj = o->ptr;
4171 zskiplist *zsl = zsetobj->zsl;
4172 zskiplistNode *ln;
4173
4174 int llen = zsl->length;
4175 int rangelen, j;
4176 robj *ele;
4177
4178 /* convert negative indexes */
4179 if (start < 0) start = llen+start;
4180 if (end < 0) end = llen+end;
4181 if (start < 0) start = 0;
4182 if (end < 0) end = 0;
4183
4184 /* indexes sanity checks */
4185 if (start > end || start >= llen) {
4186 /* Out of range start or start > end result in empty list */
4187 addReply(c,shared.emptymultibulk);
4188 return;
4189 }
4190 if (end >= llen) end = llen-1;
4191 rangelen = (end-start)+1;
4192
4193 /* Return the result in form of a multi-bulk reply */
4194 if (reverse) {
4195 ln = zsl->tail;
4196 while (start--)
4197 ln = ln->backward;
4198 } else {
4199 ln = zsl->header->forward[0];
4200 while (start--)
4201 ln = ln->forward[0];
4202 }
4203
4204 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",rangelen));
4205 for (j = 0; j < rangelen; j++) {
4206 ele = ln->obj;
4207 addReplyBulkLen(c,ele);
4208 addReply(c,ele);
4209 addReply(c,shared.crlf);
4210 ln = reverse ? ln->backward : ln->forward[0];
4211 }
4212 }
4213 }
4214 }
4215
4216 static void zrangeCommand(redisClient *c) {
4217 zrangeGenericCommand(c,0);
4218 }
4219
4220 static void zrevrangeCommand(redisClient *c) {
4221 zrangeGenericCommand(c,1);
4222 }
4223
4224 static void zrangebyscoreCommand(redisClient *c) {
4225 robj *o;
4226 double min = strtod(c->argv[2]->ptr,NULL);
4227 double max = strtod(c->argv[3]->ptr,NULL);
4228
4229 o = lookupKeyRead(c->db,c->argv[1]);
4230 if (o == NULL) {
4231 addReply(c,shared.nullmultibulk);
4232 } else {
4233 if (o->type != REDIS_ZSET) {
4234 addReply(c,shared.wrongtypeerr);
4235 } else {
4236 zset *zsetobj = o->ptr;
4237 zskiplist *zsl = zsetobj->zsl;
4238 zskiplistNode *ln;
4239 robj *ele, *lenobj;
4240 unsigned int rangelen = 0;
4241
4242 /* Get the first node with the score >= min */
4243 ln = zslFirstWithScore(zsl,min);
4244 if (ln == NULL) {
4245 /* No element matching the speciifed interval */
4246 addReply(c,shared.emptymultibulk);
4247 return;
4248 }
4249
4250 /* We don't know in advance how many matching elements there
4251 * are in the list, so we push this object that will represent
4252 * the multi-bulk length in the output buffer, and will "fix"
4253 * it later */
4254 lenobj = createObject(REDIS_STRING,NULL);
4255 addReply(c,lenobj);
4256
4257 while(ln && ln->score <= max) {
4258 ele = ln->obj;
4259 addReplyBulkLen(c,ele);
4260 addReply(c,ele);
4261 addReply(c,shared.crlf);
4262 ln = ln->forward[0];
4263 rangelen++;
4264 }
4265 lenobj->ptr = sdscatprintf(sdsempty(),"*%d\r\n",rangelen);
4266 }
4267 }
4268 }
4269
4270 static void zcardCommand(redisClient *c) {
4271 robj *o;
4272 zset *zs;
4273
4274 o = lookupKeyRead(c->db,c->argv[1]);
4275 if (o == NULL) {
4276 addReply(c,shared.czero);
4277 return;
4278 } else {
4279 if (o->type != REDIS_ZSET) {
4280 addReply(c,shared.wrongtypeerr);
4281 } else {
4282 zs = o->ptr;
4283 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",zs->zsl->length));
4284 }
4285 }
4286 }
4287
4288 static void zscoreCommand(redisClient *c) {
4289 robj *o;
4290 zset *zs;
4291
4292 o = lookupKeyRead(c->db,c->argv[1]);
4293 if (o == NULL) {
4294 addReply(c,shared.czero);
4295 return;
4296 } else {
4297 if (o->type != REDIS_ZSET) {
4298 addReply(c,shared.wrongtypeerr);
4299 } else {
4300 dictEntry *de;
4301
4302 zs = o->ptr;
4303 de = dictFind(zs->dict,c->argv[2]);
4304 if (!de) {
4305 addReply(c,shared.nullbulk);
4306 } else {
4307 char buf[128];
4308 double *score = dictGetEntryVal(de);
4309
4310 snprintf(buf,sizeof(buf),"%.16g",*score);
4311 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n%s\r\n",
4312 strlen(buf),buf));
4313 }
4314 }
4315 }
4316 }
4317
4318 /* ========================= Non type-specific commands ==================== */
4319
4320 static void flushdbCommand(redisClient *c) {
4321 server.dirty += dictSize(c->db->dict);
4322 dictEmpty(c->db->dict);
4323 dictEmpty(c->db->expires);
4324 addReply(c,shared.ok);
4325 }
4326
4327 static void flushallCommand(redisClient *c) {
4328 server.dirty += emptyDb();
4329 addReply(c,shared.ok);
4330 rdbSave(server.dbfilename);
4331 server.dirty++;
4332 }
4333
4334 static redisSortOperation *createSortOperation(int type, robj *pattern) {
4335 redisSortOperation *so = zmalloc(sizeof(*so));
4336 so->type = type;
4337 so->pattern = pattern;
4338 return so;
4339 }
4340
4341 /* Return the value associated to the key with a name obtained
4342 * substituting the first occurence of '*' in 'pattern' with 'subst' */
4343 static robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
4344 char *p;
4345 sds spat, ssub;
4346 robj keyobj;
4347 int prefixlen, sublen, postfixlen;
4348 /* Expoit the internal sds representation to create a sds string allocated on the stack in order to make this function faster */
4349 struct {
4350 long len;
4351 long free;
4352 char buf[REDIS_SORTKEY_MAX+1];
4353 } keyname;
4354
4355 if (subst->encoding == REDIS_ENCODING_RAW)
4356 incrRefCount(subst);
4357 else {
4358 subst = getDecodedObject(subst);
4359 }
4360
4361 spat = pattern->ptr;
4362 ssub = subst->ptr;
4363 if (sdslen(spat)+sdslen(ssub)-1 > REDIS_SORTKEY_MAX) return NULL;
4364 p = strchr(spat,'*');
4365 if (!p) return NULL;
4366
4367 prefixlen = p-spat;
4368 sublen = sdslen(ssub);
4369 postfixlen = sdslen(spat)-(prefixlen+1);
4370 memcpy(keyname.buf,spat,prefixlen);
4371 memcpy(keyname.buf+prefixlen,ssub,sublen);
4372 memcpy(keyname.buf+prefixlen+sublen,p+1,postfixlen);
4373 keyname.buf[prefixlen+sublen+postfixlen] = '\0';
4374 keyname.len = prefixlen+sublen+postfixlen;
4375
4376 keyobj.refcount = 1;
4377 keyobj.type = REDIS_STRING;
4378 keyobj.ptr = ((char*)&keyname)+(sizeof(long)*2);
4379
4380 decrRefCount(subst);
4381
4382 /* printf("lookup '%s' => %p\n", keyname.buf,de); */
4383 return lookupKeyRead(db,&keyobj);
4384 }
4385
4386 /* sortCompare() is used by qsort in sortCommand(). Given that qsort_r with
4387 * the additional parameter is not standard but a BSD-specific we have to
4388 * pass sorting parameters via the global 'server' structure */
4389 static int sortCompare(const void *s1, const void *s2) {
4390 const redisSortObject *so1 = s1, *so2 = s2;
4391 int cmp;
4392
4393 if (!server.sort_alpha) {
4394 /* Numeric sorting. Here it's trivial as we precomputed scores */
4395 if (so1->u.score > so2->u.score) {
4396 cmp = 1;
4397 } else if (so1->u.score < so2->u.score) {
4398 cmp = -1;
4399 } else {
4400 cmp = 0;
4401 }
4402 } else {
4403 /* Alphanumeric sorting */
4404 if (server.sort_bypattern) {
4405 if (!so1->u.cmpobj || !so2->u.cmpobj) {
4406 /* At least one compare object is NULL */
4407 if (so1->u.cmpobj == so2->u.cmpobj)
4408 cmp = 0;
4409 else if (so1->u.cmpobj == NULL)
4410 cmp = -1;
4411 else
4412 cmp = 1;
4413 } else {
4414 /* We have both the objects, use strcoll */
4415 cmp = strcoll(so1->u.cmpobj->ptr,so2->u.cmpobj->ptr);
4416 }
4417 } else {
4418 /* Compare elements directly */
4419 if (so1->obj->encoding == REDIS_ENCODING_RAW &&
4420 so2->obj->encoding == REDIS_ENCODING_RAW) {
4421 cmp = strcoll(so1->obj->ptr,so2->obj->ptr);
4422 } else {
4423 robj *dec1, *dec2;
4424
4425 dec1 = so1->obj->encoding == REDIS_ENCODING_RAW ?
4426 so1->obj : getDecodedObject(so1->obj);
4427 dec2 = so2->obj->encoding == REDIS_ENCODING_RAW ?
4428 so2->obj : getDecodedObject(so2->obj);
4429 cmp = strcoll(dec1->ptr,dec2->ptr);
4430 if (dec1 != so1->obj) decrRefCount(dec1);
4431 if (dec2 != so2->obj) decrRefCount(dec2);
4432 }
4433 }
4434 }
4435 return server.sort_desc ? -cmp : cmp;
4436 }
4437
4438 /* The SORT command is the most complex command in Redis. Warning: this code
4439 * is optimized for speed and a bit less for readability */
4440 static void sortCommand(redisClient *c) {
4441 list *operations;
4442 int outputlen = 0;
4443 int desc = 0, alpha = 0;
4444 int limit_start = 0, limit_count = -1, start, end;
4445 int j, dontsort = 0, vectorlen;
4446 int getop = 0; /* GET operation counter */
4447 robj *sortval, *sortby = NULL, *storekey = NULL;
4448 redisSortObject *vector; /* Resulting vector to sort */
4449
4450 /* Lookup the key to sort. It must be of the right types */
4451 sortval = lookupKeyRead(c->db,c->argv[1]);
4452 if (sortval == NULL) {
4453 addReply(c,shared.nokeyerr);
4454 return;
4455 }
4456 if (sortval->type != REDIS_SET && sortval->type != REDIS_LIST) {
4457 addReply(c,shared.wrongtypeerr);
4458 return;
4459 }
4460
4461 /* Create a list of operations to perform for every sorted element.
4462 * Operations can be GET/DEL/INCR/DECR */
4463 operations = listCreate();
4464 listSetFreeMethod(operations,zfree);
4465 j = 2;
4466
4467 /* Now we need to protect sortval incrementing its count, in the future
4468 * SORT may have options able to overwrite/delete keys during the sorting
4469 * and the sorted key itself may get destroied */
4470 incrRefCount(sortval);
4471
4472 /* The SORT command has an SQL-alike syntax, parse it */
4473 while(j < c->argc) {
4474 int leftargs = c->argc-j-1;
4475 if (!strcasecmp(c->argv[j]->ptr,"asc")) {
4476 desc = 0;
4477 } else if (!strcasecmp(c->argv[j]->ptr,"desc")) {
4478 desc = 1;
4479 } else if (!strcasecmp(c->argv[j]->ptr,"alpha")) {
4480 alpha = 1;
4481 } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
4482 limit_start = atoi(c->argv[j+1]->ptr);
4483 limit_count = atoi(c->argv[j+2]->ptr);
4484 j+=2;
4485 } else if (!strcasecmp(c->argv[j]->ptr,"store") && leftargs >= 1) {
4486 storekey = c->argv[j+1];
4487 j++;
4488 } else if (!strcasecmp(c->argv[j]->ptr,"by") && leftargs >= 1) {
4489 sortby = c->argv[j+1];
4490 /* If the BY pattern does not contain '*', i.e. it is constant,
4491 * we don't need to sort nor to lookup the weight keys. */
4492 if (strchr(c->argv[j+1]->ptr,'*') == NULL) dontsort = 1;
4493 j++;
4494 } else if (!strcasecmp(c->argv[j]->ptr,"get") && leftargs >= 1) {
4495 listAddNodeTail(operations,createSortOperation(
4496 REDIS_SORT_GET,c->argv[j+1]));
4497 getop++;
4498 j++;
4499 } else {
4500 decrRefCount(sortval);
4501 listRelease(operations);
4502 addReply(c,shared.syntaxerr);
4503 return;
4504 }
4505 j++;
4506 }
4507
4508 /* Load the sorting vector with all the objects to sort */
4509 vectorlen = (sortval->type == REDIS_LIST) ?
4510 listLength((list*)sortval->ptr) :
4511 dictSize((dict*)sortval->ptr);
4512 vector = zmalloc(sizeof(redisSortObject)*vectorlen);
4513 j = 0;
4514 if (sortval->type == REDIS_LIST) {
4515 list *list = sortval->ptr;
4516 listNode *ln;
4517
4518 listRewind(list);
4519 while((ln = listYield(list))) {
4520 robj *ele = ln->value;
4521 vector[j].obj = ele;
4522 vector[j].u.score = 0;
4523 vector[j].u.cmpobj = NULL;
4524 j++;
4525 }
4526 } else {
4527 dict *set = sortval->ptr;
4528 dictIterator *di;
4529 dictEntry *setele;
4530
4531 di = dictGetIterator(set);
4532 while((setele = dictNext(di)) != NULL) {
4533 vector[j].obj = dictGetEntryKey(setele);
4534 vector[j].u.score = 0;
4535 vector[j].u.cmpobj = NULL;
4536 j++;
4537 }
4538 dictReleaseIterator(di);
4539 }
4540 assert(j == vectorlen);
4541
4542 /* Now it's time to load the right scores in the sorting vector */
4543 if (dontsort == 0) {
4544 for (j = 0; j < vectorlen; j++) {
4545 if (sortby) {
4546 robj *byval;
4547
4548 byval = lookupKeyByPattern(c->db,sortby,vector[j].obj);
4549 if (!byval || byval->type != REDIS_STRING) continue;
4550 if (alpha) {
4551 if (byval->encoding == REDIS_ENCODING_RAW) {
4552 vector[j].u.cmpobj = byval;
4553 incrRefCount(byval);
4554 } else {
4555 vector[j].u.cmpobj = getDecodedObject(byval);
4556 }
4557 } else {
4558 if (byval->encoding == REDIS_ENCODING_RAW) {
4559 vector[j].u.score = strtod(byval->ptr,NULL);
4560 } else {
4561 if (byval->encoding == REDIS_ENCODING_INT) {
4562 vector[j].u.score = (long)byval->ptr;
4563 } else
4564 assert(1 != 1);
4565 }
4566 }
4567 } else {
4568 if (!alpha) {
4569 if (vector[j].obj->encoding == REDIS_ENCODING_RAW)
4570 vector[j].u.score = strtod(vector[j].obj->ptr,NULL);
4571 else {
4572 if (vector[j].obj->encoding == REDIS_ENCODING_INT)
4573 vector[j].u.score = (long) vector[j].obj->ptr;
4574 else
4575 assert(1 != 1);
4576 }
4577 }
4578 }
4579 }
4580 }
4581
4582 /* We are ready to sort the vector... perform a bit of sanity check
4583 * on the LIMIT option too. We'll use a partial version of quicksort. */
4584 start = (limit_start < 0) ? 0 : limit_start;
4585 end = (limit_count < 0) ? vectorlen-1 : start+limit_count-1;
4586 if (start >= vectorlen) {
4587 start = vectorlen-1;
4588 end = vectorlen-2;
4589 }
4590 if (end >= vectorlen) end = vectorlen-1;
4591
4592 if (dontsort == 0) {
4593 server.sort_desc = desc;
4594 server.sort_alpha = alpha;
4595 server.sort_bypattern = sortby ? 1 : 0;
4596 if (sortby && (start != 0 || end != vectorlen-1))
4597 pqsort(vector,vectorlen,sizeof(redisSortObject),sortCompare, start,end);
4598 else
4599 qsort(vector,vectorlen,sizeof(redisSortObject),sortCompare);
4600 }
4601
4602 /* Send command output to the output buffer, performing the specified
4603 * GET/DEL/INCR/DECR operations if any. */
4604 outputlen = getop ? getop*(end-start+1) : end-start+1;
4605 if (storekey == NULL) {
4606 /* STORE option not specified, sent the sorting result to client */
4607 addReplySds(c,sdscatprintf(sdsempty(),"*%d\r\n",outputlen));
4608 for (j = start; j <= end; j++) {
4609 listNode *ln;
4610 if (!getop) {
4611 addReplyBulkLen(c,vector[j].obj);
4612 addReply(c,vector[j].obj);
4613 addReply(c,shared.crlf);
4614 }
4615 listRewind(operations);
4616 while((ln = listYield(operations))) {
4617 redisSortOperation *sop = ln->value;
4618 robj *val = lookupKeyByPattern(c->db,sop->pattern,
4619 vector[j].obj);
4620
4621 if (sop->type == REDIS_SORT_GET) {
4622 if (!val || val->type != REDIS_STRING) {
4623 addReply(c,shared.nullbulk);
4624 } else {
4625 addReplyBulkLen(c,val);
4626 addReply(c,val);
4627 addReply(c,shared.crlf);
4628 }
4629 } else {
4630 assert(sop->type == REDIS_SORT_GET); /* always fails */
4631 }
4632 }
4633 }
4634 } else {
4635 robj *listObject = createListObject();
4636 list *listPtr = (list*) listObject->ptr;
4637
4638 /* STORE option specified, set the sorting result as a List object */
4639 for (j = start; j <= end; j++) {
4640 listNode *ln;
4641 if (!getop) {
4642 listAddNodeTail(listPtr,vector[j].obj);
4643 incrRefCount(vector[j].obj);
4644 }
4645 listRewind(operations);
4646 while((ln = listYield(operations))) {
4647 redisSortOperation *sop = ln->value;
4648 robj *val = lookupKeyByPattern(c->db,sop->pattern,
4649 vector[j].obj);
4650
4651 if (sop->type == REDIS_SORT_GET) {
4652 if (!val || val->type != REDIS_STRING) {
4653 listAddNodeTail(listPtr,createStringObject("",0));
4654 } else {
4655 listAddNodeTail(listPtr,val);
4656 incrRefCount(val);
4657 }
4658 } else {
4659 assert(sop->type == REDIS_SORT_GET); /* always fails */
4660 }
4661 }
4662 }
4663 dictReplace(c->db->dict,storekey,listObject);
4664 /* Note: we add 1 because the DB is dirty anyway since even if the
4665 * SORT result is empty a new key is set and maybe the old content
4666 * replaced. */
4667 server.dirty += 1+outputlen;
4668 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",outputlen));
4669 }
4670
4671 /* Cleanup */
4672 decrRefCount(sortval);
4673 listRelease(operations);
4674 for (j = 0; j < vectorlen; j++) {
4675 if (sortby && alpha && vector[j].u.cmpobj)
4676 decrRefCount(vector[j].u.cmpobj);
4677 }
4678 zfree(vector);
4679 }
4680
4681 static void infoCommand(redisClient *c) {
4682 sds info;
4683 time_t uptime = time(NULL)-server.stat_starttime;
4684 int j;
4685
4686 info = sdscatprintf(sdsempty(),
4687 "redis_version:%s\r\n"
4688 "arch_bits:%s\r\n"
4689 "uptime_in_seconds:%d\r\n"
4690 "uptime_in_days:%d\r\n"
4691 "connected_clients:%d\r\n"
4692 "connected_slaves:%d\r\n"
4693 "used_memory:%zu\r\n"
4694 "changes_since_last_save:%lld\r\n"
4695 "bgsave_in_progress:%d\r\n"
4696 "last_save_time:%d\r\n"
4697 "total_connections_received:%lld\r\n"
4698 "total_commands_processed:%lld\r\n"
4699 "role:%s\r\n"
4700 ,REDIS_VERSION,
4701 (sizeof(long) == 8) ? "64" : "32",
4702 uptime,
4703 uptime/(3600*24),
4704 listLength(server.clients)-listLength(server.slaves),
4705 listLength(server.slaves),
4706 server.usedmemory,
4707 server.dirty,
4708 server.bgsaveinprogress,
4709 server.lastsave,
4710 server.stat_numconnections,
4711 server.stat_numcommands,
4712 server.masterhost == NULL ? "master" : "slave"
4713 );
4714 if (server.masterhost) {
4715 info = sdscatprintf(info,
4716 "master_host:%s\r\n"
4717 "master_port:%d\r\n"
4718 "master_link_status:%s\r\n"
4719 "master_last_io_seconds_ago:%d\r\n"
4720 ,server.masterhost,
4721 server.masterport,
4722 (server.replstate == REDIS_REPL_CONNECTED) ?
4723 "up" : "down",
4724 server.master ? ((int)(time(NULL)-server.master->lastinteraction)) : -1
4725 );
4726 }
4727 for (j = 0; j < server.dbnum; j++) {
4728 long long keys, vkeys;
4729
4730 keys = dictSize(server.db[j].dict);
4731 vkeys = dictSize(server.db[j].expires);
4732 if (keys || vkeys) {
4733 info = sdscatprintf(info, "db%d: keys=%lld,expires=%lld\r\n",
4734 j, keys, vkeys);
4735 }
4736 }
4737 addReplySds(c,sdscatprintf(sdsempty(),"$%d\r\n",sdslen(info)));
4738 addReplySds(c,info);
4739 addReply(c,shared.crlf);
4740 }
4741
4742 static void monitorCommand(redisClient *c) {
4743 /* ignore MONITOR if aleady slave or in monitor mode */
4744 if (c->flags & REDIS_SLAVE) return;
4745
4746 c->flags |= (REDIS_SLAVE|REDIS_MONITOR);
4747 c->slaveseldb = 0;
4748 listAddNodeTail(server.monitors,c);
4749 addReply(c,shared.ok);
4750 }
4751
4752 /* ================================= Expire ================================= */
4753 static int removeExpire(redisDb *db, robj *key) {
4754 if (dictDelete(db->expires,key) == DICT_OK) {
4755 return 1;
4756 } else {
4757 return 0;
4758 }
4759 }
4760
4761 static int setExpire(redisDb *db, robj *key, time_t when) {
4762 if (dictAdd(db->expires,key,(void*)when) == DICT_ERR) {
4763 return 0;
4764 } else {
4765 incrRefCount(key);
4766 return 1;
4767 }
4768 }
4769
4770 /* Return the expire time of the specified key, or -1 if no expire
4771 * is associated with this key (i.e. the key is non volatile) */
4772 static time_t getExpire(redisDb *db, robj *key) {
4773 dictEntry *de;
4774
4775 /* No expire? return ASAP */
4776 if (dictSize(db->expires) == 0 ||
4777 (de = dictFind(db->expires,key)) == NULL) return -1;
4778
4779 return (time_t) dictGetEntryVal(de);
4780 }
4781
4782 static int expireIfNeeded(redisDb *db, robj *key) {
4783 time_t when;
4784 dictEntry *de;
4785
4786 /* No expire? return ASAP */
4787 if (dictSize(db->expires) == 0 ||
4788 (de = dictFind(db->expires,key)) == NULL) return 0;
4789
4790 /* Lookup the expire */
4791 when = (time_t) dictGetEntryVal(de);
4792 if (time(NULL) <= when) return 0;
4793
4794 /* Delete the key */
4795 dictDelete(db->expires,key);
4796 return dictDelete(db->dict,key) == DICT_OK;
4797 }
4798
4799 static int deleteIfVolatile(redisDb *db, robj *key) {
4800 dictEntry *de;
4801
4802 /* No expire? return ASAP */
4803 if (dictSize(db->expires) == 0 ||
4804 (de = dictFind(db->expires,key)) == NULL) return 0;
4805
4806 /* Delete the key */
4807 server.dirty++;
4808 dictDelete(db->expires,key);
4809 return dictDelete(db->dict,key) == DICT_OK;
4810 }
4811
4812 static void expireGenericCommand(redisClient *c, robj *key, time_t seconds) {
4813 dictEntry *de;
4814
4815 de = dictFind(c->db->dict,key);
4816 if (de == NULL) {
4817 addReply(c,shared.czero);
4818 return;
4819 }
4820 if (seconds < 0) {
4821 if (deleteKey(c->db,key)) server.dirty++;
4822 addReply(c, shared.cone);
4823 return;
4824 } else {
4825 time_t when = time(NULL)+seconds;
4826 if (setExpire(c->db,key,when)) {
4827 addReply(c,shared.cone);
4828 server.dirty++;
4829 } else {
4830 addReply(c,shared.czero);
4831 }
4832 return;
4833 }
4834 }
4835
4836 static void expireCommand(redisClient *c) {
4837 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10));
4838 }
4839
4840 static void expireatCommand(redisClient *c) {
4841 expireGenericCommand(c,c->argv[1],strtol(c->argv[2]->ptr,NULL,10)-time(NULL));
4842 }
4843
4844 static void ttlCommand(redisClient *c) {
4845 time_t expire;
4846 int ttl = -1;
4847
4848 expire = getExpire(c->db,c->argv[1]);
4849 if (expire != -1) {
4850 ttl = (int) (expire-time(NULL));
4851 if (ttl < 0) ttl = -1;
4852 }
4853 addReplySds(c,sdscatprintf(sdsempty(),":%d\r\n",ttl));
4854 }
4855
4856 static void msetGenericCommand(redisClient *c, int nx) {
4857 int j;
4858
4859 if ((c->argc % 2) == 0) {
4860 addReplySds(c,sdsnew("-ERR wrong number of arguments\r\n"));
4861 return;
4862 }
4863 /* Handle the NX flag. The MSETNX semantic is to return zero and don't
4864 * set nothing at all if at least one already key exists. */
4865 if (nx) {
4866 for (j = 1; j < c->argc; j += 2) {
4867 if (dictFind(c->db->dict,c->argv[j]) != NULL) {
4868 addReply(c, shared.czero);
4869 return;
4870 }
4871 }
4872 }
4873
4874 for (j = 1; j < c->argc; j += 2) {
4875 int retval;
4876
4877 retval = dictAdd(c->db->dict,c->argv[j],c->argv[j+1]);
4878 if (retval == DICT_ERR) {
4879 dictReplace(c->db->dict,c->argv[j],c->argv[j+1]);
4880 incrRefCount(c->argv[j+1]);
4881 } else {
4882 incrRefCount(c->argv[j]);
4883 incrRefCount(c->argv[j+1]);
4884 }
4885 removeExpire(c->db,c->argv[j]);
4886 }
4887 server.dirty += (c->argc-1)/2;
4888 addReply(c, nx ? shared.cone : shared.ok);
4889 }
4890
4891 static void msetCommand(redisClient *c) {
4892 msetGenericCommand(c,0);
4893 }
4894
4895 static void msetnxCommand(redisClient *c) {
4896 msetGenericCommand(c,1);
4897 }
4898
4899 /* =============================== Replication ============================= */
4900
4901 static int syncWrite(int fd, char *ptr, ssize_t size, int timeout) {
4902 ssize_t nwritten, ret = size;
4903 time_t start = time(NULL);
4904
4905 timeout++;
4906 while(size) {
4907 if (aeWait(fd,AE_WRITABLE,1000) & AE_WRITABLE) {
4908 nwritten = write(fd,ptr,size);
4909 if (nwritten == -1) return -1;
4910 ptr += nwritten;
4911 size -= nwritten;
4912 }
4913 if ((time(NULL)-start) > timeout) {
4914 errno = ETIMEDOUT;
4915 return -1;
4916 }
4917 }
4918 return ret;
4919 }
4920
4921 static int syncRead(int fd, char *ptr, ssize_t size, int timeout) {
4922 ssize_t nread, totread = 0;
4923 time_t start = time(NULL);
4924
4925 timeout++;
4926 while(size) {
4927 if (aeWait(fd,AE_READABLE,1000) & AE_READABLE) {
4928 nread = read(fd,ptr,size);
4929 if (nread == -1) return -1;
4930 ptr += nread;
4931 size -= nread;
4932 totread += nread;
4933 }
4934 if ((time(NULL)-start) > timeout) {
4935 errno = ETIMEDOUT;
4936 return -1;
4937 }
4938 }
4939 return totread;
4940 }
4941
4942 static int syncReadLine(int fd, char *ptr, ssize_t size, int timeout) {
4943 ssize_t nread = 0;
4944
4945 size--;
4946 while(size) {
4947 char c;
4948
4949 if (syncRead(fd,&c,1,timeout) == -1) return -1;
4950 if (c == '\n') {
4951 *ptr = '\0';
4952 if (nread && *(ptr-1) == '\r') *(ptr-1) = '\0';
4953 return nread;
4954 } else {
4955 *ptr++ = c;
4956 *ptr = '\0';
4957 nread++;
4958 }
4959 }
4960 return nread;
4961 }
4962
4963 static void syncCommand(redisClient *c) {
4964 /* ignore SYNC if aleady slave or in monitor mode */
4965 if (c->flags & REDIS_SLAVE) return;
4966
4967 /* SYNC can't be issued when the server has pending data to send to
4968 * the client about already issued commands. We need a fresh reply
4969 * buffer registering the differences between the BGSAVE and the current
4970 * dataset, so that we can copy to other slaves if needed. */
4971 if (listLength(c->reply) != 0) {
4972 addReplySds(c,sdsnew("-ERR SYNC is invalid with pending input\r\n"));
4973 return;
4974 }
4975
4976 redisLog(REDIS_NOTICE,"Slave ask for synchronization");
4977 /* Here we need to check if there is a background saving operation
4978 * in progress, or if it is required to start one */
4979 if (server.bgsaveinprogress) {
4980 /* Ok a background save is in progress. Let's check if it is a good
4981 * one for replication, i.e. if there is another slave that is
4982 * registering differences since the server forked to save */
4983 redisClient *slave;
4984 listNode *ln;
4985
4986 listRewind(server.slaves);
4987 while((ln = listYield(server.slaves))) {
4988 slave = ln->value;
4989 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) break;
4990 }
4991 if (ln) {
4992 /* Perfect, the server is already registering differences for
4993 * another slave. Set the right state, and copy the buffer. */
4994 listRelease(c->reply);
4995 c->reply = listDup(slave->reply);
4996 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
4997 redisLog(REDIS_NOTICE,"Waiting for end of BGSAVE for SYNC");
4998 } else {
4999 /* No way, we need to wait for the next BGSAVE in order to
5000 * register differences */
5001 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5002 redisLog(REDIS_NOTICE,"Waiting for next BGSAVE for SYNC");
5003 }
5004 } else {
5005 /* Ok we don't have a BGSAVE in progress, let's start one */
5006 redisLog(REDIS_NOTICE,"Starting BGSAVE for SYNC");
5007 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5008 redisLog(REDIS_NOTICE,"Replication failed, can't BGSAVE");
5009 addReplySds(c,sdsnew("-ERR Unalbe to perform background save\r\n"));
5010 return;
5011 }
5012 c->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5013 }
5014 c->repldbfd = -1;
5015 c->flags |= REDIS_SLAVE;
5016 c->slaveseldb = 0;
5017 listAddNodeTail(server.slaves,c);
5018 return;
5019 }
5020
5021 static void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
5022 redisClient *slave = privdata;
5023 REDIS_NOTUSED(el);
5024 REDIS_NOTUSED(mask);
5025 char buf[REDIS_IOBUF_LEN];
5026 ssize_t nwritten, buflen;
5027
5028 if (slave->repldboff == 0) {
5029 /* Write the bulk write count before to transfer the DB. In theory here
5030 * we don't know how much room there is in the output buffer of the
5031 * socket, but in pratice SO_SNDLOWAT (the minimum count for output
5032 * operations) will never be smaller than the few bytes we need. */
5033 sds bulkcount;
5034
5035 bulkcount = sdscatprintf(sdsempty(),"$%lld\r\n",(unsigned long long)
5036 slave->repldbsize);
5037 if (write(fd,bulkcount,sdslen(bulkcount)) != (signed)sdslen(bulkcount))
5038 {
5039 sdsfree(bulkcount);
5040 freeClient(slave);
5041 return;
5042 }
5043 sdsfree(bulkcount);
5044 }
5045 lseek(slave->repldbfd,slave->repldboff,SEEK_SET);
5046 buflen = read(slave->repldbfd,buf,REDIS_IOBUF_LEN);
5047 if (buflen <= 0) {
5048 redisLog(REDIS_WARNING,"Read error sending DB to slave: %s",
5049 (buflen == 0) ? "premature EOF" : strerror(errno));
5050 freeClient(slave);
5051 return;
5052 }
5053 if ((nwritten = write(fd,buf,buflen)) == -1) {
5054 redisLog(REDIS_DEBUG,"Write error sending DB to slave: %s",
5055 strerror(errno));
5056 freeClient(slave);
5057 return;
5058 }
5059 slave->repldboff += nwritten;
5060 if (slave->repldboff == slave->repldbsize) {
5061 close(slave->repldbfd);
5062 slave->repldbfd = -1;
5063 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5064 slave->replstate = REDIS_REPL_ONLINE;
5065 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
5066 sendReplyToClient, slave, NULL) == AE_ERR) {
5067 freeClient(slave);
5068 return;
5069 }
5070 addReplySds(slave,sdsempty());
5071 redisLog(REDIS_NOTICE,"Synchronization with slave succeeded");
5072 }
5073 }
5074
5075 /* This function is called at the end of every backgrond saving.
5076 * The argument bgsaveerr is REDIS_OK if the background saving succeeded
5077 * otherwise REDIS_ERR is passed to the function.
5078 *
5079 * The goal of this function is to handle slaves waiting for a successful
5080 * background saving in order to perform non-blocking synchronization. */
5081 static void updateSlavesWaitingBgsave(int bgsaveerr) {
5082 listNode *ln;
5083 int startbgsave = 0;
5084
5085 listRewind(server.slaves);
5086 while((ln = listYield(server.slaves))) {
5087 redisClient *slave = ln->value;
5088
5089 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START) {
5090 startbgsave = 1;
5091 slave->replstate = REDIS_REPL_WAIT_BGSAVE_END;
5092 } else if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_END) {
5093 struct redis_stat buf;
5094
5095 if (bgsaveerr != REDIS_OK) {
5096 freeClient(slave);
5097 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE child returned an error");
5098 continue;
5099 }
5100 if ((slave->repldbfd = open(server.dbfilename,O_RDONLY)) == -1 ||
5101 redis_fstat(slave->repldbfd,&buf) == -1) {
5102 freeClient(slave);
5103 redisLog(REDIS_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
5104 continue;
5105 }
5106 slave->repldboff = 0;
5107 slave->repldbsize = buf.st_size;
5108 slave->replstate = REDIS_REPL_SEND_BULK;
5109 aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
5110 if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave, NULL) == AE_ERR) {
5111 freeClient(slave);
5112 continue;
5113 }
5114 }
5115 }
5116 if (startbgsave) {
5117 if (rdbSaveBackground(server.dbfilename) != REDIS_OK) {
5118 listRewind(server.slaves);
5119 redisLog(REDIS_WARNING,"SYNC failed. BGSAVE failed");
5120 while((ln = listYield(server.slaves))) {
5121 redisClient *slave = ln->value;
5122
5123 if (slave->replstate == REDIS_REPL_WAIT_BGSAVE_START)
5124 freeClient(slave);
5125 }
5126 }
5127 }
5128 }
5129
5130 static int syncWithMaster(void) {
5131 char buf[1024], tmpfile[256];
5132 int dumpsize;
5133 int fd = anetTcpConnect(NULL,server.masterhost,server.masterport);
5134 int dfd;
5135
5136 if (fd == -1) {
5137 redisLog(REDIS_WARNING,"Unable to connect to MASTER: %s",
5138 strerror(errno));
5139 return REDIS_ERR;
5140 }
5141 /* Issue the SYNC command */
5142 if (syncWrite(fd,"SYNC \r\n",7,5) == -1) {
5143 close(fd);
5144 redisLog(REDIS_WARNING,"I/O error writing to MASTER: %s",
5145 strerror(errno));
5146 return REDIS_ERR;
5147 }
5148 /* Read the bulk write count */
5149 if (syncReadLine(fd,buf,1024,3600) == -1) {
5150 close(fd);
5151 redisLog(REDIS_WARNING,"I/O error reading bulk count from MASTER: %s",
5152 strerror(errno));
5153 return REDIS_ERR;
5154 }
5155 if (buf[0] != '$') {
5156 close(fd);
5157 redisLog(REDIS_WARNING,"Bad protocol from MASTER, the first byte is not '$', are you sure the host and port are right?");
5158 return REDIS_ERR;
5159 }
5160 dumpsize = atoi(buf+1);
5161 redisLog(REDIS_NOTICE,"Receiving %d bytes data dump from MASTER",dumpsize);
5162 /* Read the bulk write data on a temp file */
5163 snprintf(tmpfile,256,"temp-%d.%ld.rdb",(int)time(NULL),(long int)random());
5164 dfd = open(tmpfile,O_CREAT|O_WRONLY,0644);
5165 if (dfd == -1) {
5166 close(fd);
5167 redisLog(REDIS_WARNING,"Opening the temp file needed for MASTER <-> SLAVE synchronization: %s",strerror(errno));
5168 return REDIS_ERR;
5169 }
5170 while(dumpsize) {
5171 int nread, nwritten;
5172
5173 nread = read(fd,buf,(dumpsize < 1024)?dumpsize:1024);
5174 if (nread == -1) {
5175 redisLog(REDIS_WARNING,"I/O error trying to sync with MASTER: %s",
5176 strerror(errno));
5177 close(fd);
5178 close(dfd);
5179 return REDIS_ERR;
5180 }
5181 nwritten = write(dfd,buf,nread);
5182 if (nwritten == -1) {
5183 redisLog(REDIS_WARNING,"Write error writing to the DB dump file needed for MASTER <-> SLAVE synchrnonization: %s", strerror(errno));
5184 close(fd);
5185 close(dfd);
5186 return REDIS_ERR;
5187 }
5188 dumpsize -= nread;
5189 }
5190 close(dfd);
5191 if (rename(tmpfile,server.dbfilename) == -1) {
5192 redisLog(REDIS_WARNING,"Failed trying to rename the temp DB into dump.rdb in MASTER <-> SLAVE synchronization: %s", strerror(errno));
5193 unlink(tmpfile);
5194 close(fd);
5195 return REDIS_ERR;
5196 }
5197 emptyDb();
5198 if (rdbLoad(server.dbfilename) != REDIS_OK) {
5199 redisLog(REDIS_WARNING,"Failed trying to load the MASTER synchronization DB from disk");
5200 close(fd);
5201 return REDIS_ERR;
5202 }
5203 server.master = createClient(fd);
5204 server.master->flags |= REDIS_MASTER;
5205 server.replstate = REDIS_REPL_CONNECTED;
5206 return REDIS_OK;
5207 }
5208
5209 static void slaveofCommand(redisClient *c) {
5210 if (!strcasecmp(c->argv[1]->ptr,"no") &&
5211 !strcasecmp(c->argv[2]->ptr,"one")) {
5212 if (server.masterhost) {
5213 sdsfree(server.masterhost);
5214 server.masterhost = NULL;
5215 if (server.master) freeClient(server.master);
5216 server.replstate = REDIS_REPL_NONE;
5217 redisLog(REDIS_NOTICE,"MASTER MODE enabled (user request)");
5218 }
5219 } else {
5220 sdsfree(server.masterhost);
5221 server.masterhost = sdsdup(c->argv[1]->ptr);
5222 server.masterport = atoi(c->argv[2]->ptr);
5223 if (server.master) freeClient(server.master);
5224 server.replstate = REDIS_REPL_CONNECT;
5225 redisLog(REDIS_NOTICE,"SLAVE OF %s:%d enabled (user request)",
5226 server.masterhost, server.masterport);
5227 }
5228 addReply(c,shared.ok);
5229 }
5230
5231 /* ============================ Maxmemory directive ======================== */
5232
5233 /* This function gets called when 'maxmemory' is set on the config file to limit
5234 * the max memory used by the server, and we are out of memory.
5235 * This function will try to, in order:
5236 *
5237 * - Free objects from the free list
5238 * - Try to remove keys with an EXPIRE set
5239 *
5240 * It is not possible to free enough memory to reach used-memory < maxmemory
5241 * the server will start refusing commands that will enlarge even more the
5242 * memory usage.
5243 */
5244 static void freeMemoryIfNeeded(void) {
5245 while (server.maxmemory && zmalloc_used_memory() > server.maxmemory) {
5246 if (listLength(server.objfreelist)) {
5247 robj *o;
5248
5249 listNode *head = listFirst(server.objfreelist);
5250 o = listNodeValue(head);
5251 listDelNode(server.objfreelist,head);
5252 zfree(o);
5253 } else {
5254 int j, k, freed = 0;
5255
5256 for (j = 0; j < server.dbnum; j++) {
5257 int minttl = -1;
5258 robj *minkey = NULL;
5259 struct dictEntry *de;
5260
5261 if (dictSize(server.db[j].expires)) {
5262 freed = 1;
5263 /* From a sample of three keys drop the one nearest to
5264 * the natural expire */
5265 for (k = 0; k < 3; k++) {
5266 time_t t;
5267
5268 de = dictGetRandomKey(server.db[j].expires);
5269 t = (time_t) dictGetEntryVal(de);
5270 if (minttl == -1 || t < minttl) {
5271 minkey = dictGetEntryKey(de);
5272 minttl = t;
5273 }
5274 }
5275 deleteKey(server.db+j,minkey);
5276 }
5277 }
5278 if (!freed) return; /* nothing to free... */
5279 }
5280 }
5281 }
5282
5283 /* ============================== Append Only file ========================== */
5284
5285 static void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
5286 sds buf = sdsempty();
5287 int j;
5288 ssize_t nwritten;
5289 time_t now;
5290 robj *tmpargv[3];
5291
5292 /* The DB this command was targetting is not the same as the last command
5293 * we appendend. To issue a SELECT command is needed. */
5294 if (dictid != server.appendseldb) {
5295 char seldb[64];
5296
5297 snprintf(seldb,sizeof(seldb),"%d",dictid);
5298 buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
5299 strlen(seldb),seldb);
5300 server.appendseldb = dictid;
5301 }
5302
5303 /* "Fix" the argv vector if the command is EXPIRE. We want to translate
5304 * EXPIREs into EXPIREATs calls */
5305 if (cmd->proc == expireCommand) {
5306 long when;
5307
5308 tmpargv[0] = createStringObject("EXPIREAT",8);
5309 tmpargv[1] = argv[1];
5310 incrRefCount(argv[1]);
5311 when = time(NULL)+strtol(argv[2]->ptr,NULL,10);
5312 tmpargv[2] = createObject(REDIS_STRING,
5313 sdscatprintf(sdsempty(),"%ld",when));
5314 argv = tmpargv;
5315 }
5316
5317 /* Append the actual command */
5318 buf = sdscatprintf(buf,"*%d\r\n",argc);
5319 for (j = 0; j < argc; j++) {
5320 robj *o = argv[j];
5321
5322 if (o->encoding != REDIS_ENCODING_RAW)
5323 o = getDecodedObject(o);
5324 buf = sdscatprintf(buf,"$%d\r\n",sdslen(o->ptr));
5325 buf = sdscatlen(buf,o->ptr,sdslen(o->ptr));
5326 buf = sdscatlen(buf,"\r\n",2);
5327 if (o != argv[j])
5328 decrRefCount(o);
5329 }
5330
5331 /* Free the objects from the modified argv for EXPIREAT */
5332 if (cmd->proc == expireCommand) {
5333 for (j = 0; j < 3; j++)
5334 decrRefCount(argv[j]);
5335 }
5336
5337 /* We want to perform a single write. This should be guaranteed atomic
5338 * at least if the filesystem we are writing is a real physical one.
5339 * While this will save us against the server being killed I don't think
5340 * there is much to do about the whole server stopping for power problems
5341 * or alike */
5342 nwritten = write(server.appendfd,buf,sdslen(buf));
5343 if (nwritten != (signed)sdslen(buf)) {
5344 /* Ooops, we are in troubles. The best thing to do for now is
5345 * to simply exit instead to give the illusion that everything is
5346 * working as expected. */
5347 if (nwritten == -1) {
5348 redisLog(REDIS_WARNING,"Exiting on error writing to the append-only file: %s",strerror(errno));
5349 } else {
5350 redisLog(REDIS_WARNING,"Exiting on short write while writing to the append-only file: %s",strerror(errno));
5351 }
5352 exit(1);
5353 }
5354 now = time(NULL);
5355 if (server.appendfsync == APPENDFSYNC_ALWAYS ||
5356 (server.appendfsync == APPENDFSYNC_EVERYSEC &&
5357 now-server.lastfsync > 1))
5358 {
5359 fsync(server.appendfd); /* Let's try to get this data on the disk */
5360 server.lastfsync = now;
5361 }
5362 }
5363
5364 /* In Redis commands are always executed in the context of a client, so in
5365 * order to load the append only file we need to create a fake client. */
5366 static struct redisClient *createFakeClient(void) {
5367 struct redisClient *c = zmalloc(sizeof(*c));
5368
5369 selectDb(c,0);
5370 c->fd = -1;
5371 c->querybuf = sdsempty();
5372 c->argc = 0;
5373 c->argv = NULL;
5374 c->flags = 0;
5375 /* We set the fake client as a slave waiting for the synchronization
5376 * so that Redis will not try to send replies to this client. */
5377 c->replstate = REDIS_REPL_WAIT_BGSAVE_START;
5378 c->reply = listCreate();
5379 listSetFreeMethod(c->reply,decrRefCount);
5380 listSetDupMethod(c->reply,dupClientReplyValue);
5381 return c;
5382 }
5383
5384 static void freeFakeClient(struct redisClient *c) {
5385 sdsfree(c->querybuf);
5386 listRelease(c->reply);
5387 zfree(c);
5388 }
5389
5390 /* Replay the append log file. On error REDIS_OK is returned. On non fatal
5391 * error (the append only file is zero-length) REDIS_ERR is returned. On
5392 * fatal error an error message is logged and the program exists. */
5393 int loadAppendOnlyFile(char *filename) {
5394 struct redisClient *fakeClient;
5395 FILE *fp = fopen(filename,"r");
5396 struct redis_stat sb;
5397
5398 if (redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0)
5399 return REDIS_ERR;
5400
5401 if (fp == NULL) {
5402 redisLog(REDIS_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
5403 exit(1);
5404 }
5405
5406 fakeClient = createFakeClient();
5407 while(1) {
5408 int argc, j;
5409 unsigned long len;
5410 robj **argv;
5411 char buf[128];
5412 sds argsds;
5413 struct redisCommand *cmd;
5414
5415 if (fgets(buf,sizeof(buf),fp) == NULL) {
5416 if (feof(fp))
5417 break;
5418 else
5419 goto readerr;
5420 }
5421 if (buf[0] != '*') goto fmterr;
5422 argc = atoi(buf+1);
5423 argv = zmalloc(sizeof(robj*)*argc);
5424 for (j = 0; j < argc; j++) {
5425 if (fgets(buf,sizeof(buf),fp) == NULL) goto readerr;
5426 if (buf[0] != '$') goto fmterr;
5427 len = strtol(buf+1,NULL,10);
5428 argsds = sdsnewlen(NULL,len);
5429 if (fread(argsds,len,1,fp) == 0) goto fmterr;
5430 argv[j] = createObject(REDIS_STRING,argsds);
5431 if (fread(buf,2,1,fp) == 0) goto fmterr; /* discard CRLF */
5432 }
5433
5434 /* Command lookup */
5435 cmd = lookupCommand(argv[0]->ptr);
5436 if (!cmd) {
5437 redisLog(REDIS_WARNING,"Unknown command '%s' reading the append only file", argv[0]->ptr);
5438 exit(1);
5439 }
5440 /* Try object sharing and encoding */
5441 if (server.shareobjects) {
5442 int j;
5443 for(j = 1; j < argc; j++)
5444 argv[j] = tryObjectSharing(argv[j]);
5445 }
5446 if (cmd->flags & REDIS_CMD_BULK)
5447 tryObjectEncoding(argv[argc-1]);
5448 /* Run the command in the context of a fake client */
5449 fakeClient->argc = argc;
5450 fakeClient->argv = argv;
5451 cmd->proc(fakeClient);
5452 /* Discard the reply objects list from the fake client */
5453 while(listLength(fakeClient->reply))
5454 listDelNode(fakeClient->reply,listFirst(fakeClient->reply));
5455 /* Clean up, ready for the next command */
5456 for (j = 0; j < argc; j++) decrRefCount(argv[j]);
5457 zfree(argv);
5458 }
5459 fclose(fp);
5460 freeFakeClient(fakeClient);
5461 return REDIS_OK;
5462
5463 readerr:
5464 if (feof(fp)) {
5465 redisLog(REDIS_WARNING,"Unexpected end of file reading the append only file");
5466 } else {
5467 redisLog(REDIS_WARNING,"Unrecoverable error reading the append only file: %s", strerror(errno));
5468 }
5469 exit(1);
5470 fmterr:
5471 redisLog(REDIS_WARNING,"Bad file format reading the append only file");
5472 exit(1);
5473 }
5474
5475 /* ================================= Debugging ============================== */
5476
5477 static void debugCommand(redisClient *c) {
5478 if (!strcasecmp(c->argv[1]->ptr,"segfault")) {
5479 *((char*)-1) = 'x';
5480 } else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
5481 dictEntry *de = dictFind(c->db->dict,c->argv[2]);
5482 robj *key, *val;
5483
5484 if (!de) {
5485 addReply(c,shared.nokeyerr);
5486 return;
5487 }
5488 key = dictGetEntryKey(de);
5489 val = dictGetEntryVal(de);
5490 addReplySds(c,sdscatprintf(sdsempty(),
5491 "+Key at:%p refcount:%d, value at:%p refcount:%d encoding:%d\r\n",
5492 key, key->refcount, val, val->refcount, val->encoding));
5493 } else {
5494 addReplySds(c,sdsnew(
5495 "-ERR Syntax error, try DEBUG [SEGFAULT|OBJECT <key>]\r\n"));
5496 }
5497 }
5498
5499 #ifdef HAVE_BACKTRACE
5500 static struct redisFunctionSym symsTable[] = {
5501 {"compareStringObjects", (unsigned long)compareStringObjects},
5502 {"isStringRepresentableAsLong", (unsigned long)isStringRepresentableAsLong},
5503 {"dictEncObjKeyCompare", (unsigned long)dictEncObjKeyCompare},
5504 {"dictEncObjHash", (unsigned long)dictEncObjHash},
5505 {"incrDecrCommand", (unsigned long)incrDecrCommand},
5506 {"freeStringObject", (unsigned long)freeStringObject},
5507 {"freeListObject", (unsigned long)freeListObject},
5508 {"freeSetObject", (unsigned long)freeSetObject},
5509 {"decrRefCount", (unsigned long)decrRefCount},
5510 {"createObject", (unsigned long)createObject},
5511 {"freeClient", (unsigned long)freeClient},
5512 {"rdbLoad", (unsigned long)rdbLoad},
5513 {"rdbSaveStringObject", (unsigned long)rdbSaveStringObject},
5514 {"rdbSaveStringObjectRaw", (unsigned long)rdbSaveStringObjectRaw},
5515 {"addReply", (unsigned long)addReply},
5516 {"addReplySds", (unsigned long)addReplySds},
5517 {"incrRefCount", (unsigned long)incrRefCount},
5518 {"rdbSaveBackground", (unsigned long)rdbSaveBackground},
5519 {"createStringObject", (unsigned long)createStringObject},
5520 {"replicationFeedSlaves", (unsigned long)replicationFeedSlaves},
5521 {"syncWithMaster", (unsigned long)syncWithMaster},
5522 {"tryObjectSharing", (unsigned long)tryObjectSharing},
5523 {"tryObjectEncoding", (unsigned long)tryObjectEncoding},
5524 {"getDecodedObject", (unsigned long)getDecodedObject},
5525 {"removeExpire", (unsigned long)removeExpire},
5526 {"expireIfNeeded", (unsigned long)expireIfNeeded},
5527 {"deleteIfVolatile", (unsigned long)deleteIfVolatile},
5528 {"deleteKey", (unsigned long)deleteKey},
5529 {"getExpire", (unsigned long)getExpire},
5530 {"setExpire", (unsigned long)setExpire},
5531 {"updateSlavesWaitingBgsave", (unsigned long)updateSlavesWaitingBgsave},
5532 {"freeMemoryIfNeeded", (unsigned long)freeMemoryIfNeeded},
5533 {"authCommand", (unsigned long)authCommand},
5534 {"pingCommand", (unsigned long)pingCommand},
5535 {"echoCommand", (unsigned long)echoCommand},
5536 {"setCommand", (unsigned long)setCommand},
5537 {"setnxCommand", (unsigned long)setnxCommand},
5538 {"getCommand", (unsigned long)getCommand},
5539 {"delCommand", (unsigned long)delCommand},
5540 {"existsCommand", (unsigned long)existsCommand},
5541 {"incrCommand", (unsigned long)incrCommand},
5542 {"decrCommand", (unsigned long)decrCommand},
5543 {"incrbyCommand", (unsigned long)incrbyCommand},
5544 {"decrbyCommand", (unsigned long)decrbyCommand},
5545 {"selectCommand", (unsigned long)selectCommand},
5546 {"randomkeyCommand", (unsigned long)randomkeyCommand},
5547 {"keysCommand", (unsigned long)keysCommand},
5548 {"dbsizeCommand", (unsigned long)dbsizeCommand},
5549 {"lastsaveCommand", (unsigned long)lastsaveCommand},
5550 {"saveCommand", (unsigned long)saveCommand},
5551 {"bgsaveCommand", (unsigned long)bgsaveCommand},
5552 {"shutdownCommand", (unsigned long)shutdownCommand},
5553 {"moveCommand", (unsigned long)moveCommand},
5554 {"renameCommand", (unsigned long)renameCommand},
5555 {"renamenxCommand", (unsigned long)renamenxCommand},
5556 {"lpushCommand", (unsigned long)lpushCommand},
5557 {"rpushCommand", (unsigned long)rpushCommand},
5558 {"lpopCommand", (unsigned long)lpopCommand},
5559 {"rpopCommand", (unsigned long)rpopCommand},
5560 {"llenCommand", (unsigned long)llenCommand},
5561 {"lindexCommand", (unsigned long)lindexCommand},
5562 {"lrangeCommand", (unsigned long)lrangeCommand},
5563 {"ltrimCommand", (unsigned long)ltrimCommand},
5564 {"typeCommand", (unsigned long)typeCommand},
5565 {"lsetCommand", (unsigned long)lsetCommand},
5566 {"saddCommand", (unsigned long)saddCommand},
5567 {"sremCommand", (unsigned long)sremCommand},
5568 {"smoveCommand", (unsigned long)smoveCommand},
5569 {"sismemberCommand", (unsigned long)sismemberCommand},
5570 {"scardCommand", (unsigned long)scardCommand},
5571 {"spopCommand", (unsigned long)spopCommand},
5572 {"srandmemberCommand", (unsigned long)srandmemberCommand},
5573 {"sinterCommand", (unsigned long)sinterCommand},
5574 {"sinterstoreCommand", (unsigned long)sinterstoreCommand},
5575 {"sunionCommand", (unsigned long)sunionCommand},
5576 {"sunionstoreCommand", (unsigned long)sunionstoreCommand},
5577 {"sdiffCommand", (unsigned long)sdiffCommand},
5578 {"sdiffstoreCommand", (unsigned long)sdiffstoreCommand},
5579 {"syncCommand", (unsigned long)syncCommand},
5580 {"flushdbCommand", (unsigned long)flushdbCommand},
5581 {"flushallCommand", (unsigned long)flushallCommand},
5582 {"sortCommand", (unsigned long)sortCommand},
5583 {"lremCommand", (unsigned long)lremCommand},
5584 {"infoCommand", (unsigned long)infoCommand},
5585 {"mgetCommand", (unsigned long)mgetCommand},
5586 {"monitorCommand", (unsigned long)monitorCommand},
5587 {"expireCommand", (unsigned long)expireCommand},
5588 {"expireatCommand", (unsigned long)expireatCommand},
5589 {"getsetCommand", (unsigned long)getsetCommand},
5590 {"ttlCommand", (unsigned long)ttlCommand},
5591 {"slaveofCommand", (unsigned long)slaveofCommand},
5592 {"debugCommand", (unsigned long)debugCommand},
5593 {"processCommand", (unsigned long)processCommand},
5594 {"setupSigSegvAction", (unsigned long)setupSigSegvAction},
5595 {"readQueryFromClient", (unsigned long)readQueryFromClient},
5596 {"rdbRemoveTempFile", (unsigned long)rdbRemoveTempFile},
5597 {"msetGenericCommand", (unsigned long)msetGenericCommand},
5598 {"msetCommand", (unsigned long)msetCommand},
5599 {"msetnxCommand", (unsigned long)msetnxCommand},
5600 {"zslCreateNode", (unsigned long)zslCreateNode},
5601 {"zslCreate", (unsigned long)zslCreate},
5602 {"zslFreeNode",(unsigned long)zslFreeNode},
5603 {"zslFree",(unsigned long)zslFree},
5604 {"zslRandomLevel",(unsigned long)zslRandomLevel},
5605 {"zslInsert",(unsigned long)zslInsert},
5606 {"zslDelete",(unsigned long)zslDelete},
5607 {"createZsetObject",(unsigned long)createZsetObject},
5608 {"zaddCommand",(unsigned long)zaddCommand},
5609 {"zrangeGenericCommand",(unsigned long)zrangeGenericCommand},
5610 {"zrangeCommand",(unsigned long)zrangeCommand},
5611 {"zrevrangeCommand",(unsigned long)zrevrangeCommand},
5612 {"zremCommand",(unsigned long)zremCommand},
5613 {"rdbSaveDoubleValue",(unsigned long)rdbSaveDoubleValue},
5614 {"rdbLoadDoubleValue",(unsigned long)rdbLoadDoubleValue},
5615 {"feedAppendOnlyFile",(unsigned long)feedAppendOnlyFile},
5616 {NULL,0}
5617 };
5618
5619 /* This function try to convert a pointer into a function name. It's used in
5620 * oreder to provide a backtrace under segmentation fault that's able to
5621 * display functions declared as static (otherwise the backtrace is useless). */
5622 static char *findFuncName(void *pointer, unsigned long *offset){
5623 int i, ret = -1;
5624 unsigned long off, minoff = 0;
5625
5626 /* Try to match against the Symbol with the smallest offset */
5627 for (i=0; symsTable[i].pointer; i++) {
5628 unsigned long lp = (unsigned long) pointer;
5629
5630 if (lp != (unsigned long)-1 && lp >= symsTable[i].pointer) {
5631 off=lp-symsTable[i].pointer;
5632 if (ret < 0 || off < minoff) {
5633 minoff=off;
5634 ret=i;
5635 }
5636 }
5637 }
5638 if (ret == -1) return NULL;
5639 *offset = minoff;
5640 return symsTable[ret].name;
5641 }
5642
5643 static void *getMcontextEip(ucontext_t *uc) {
5644 #if defined(__FreeBSD__)
5645 return (void*) uc->uc_mcontext.mc_eip;
5646 #elif defined(__dietlibc__)
5647 return (void*) uc->uc_mcontext.eip;
5648 #elif defined(__APPLE__) && !defined(MAC_OS_X_VERSION_10_6)
5649 return (void*) uc->uc_mcontext->__ss.__eip;
5650 #elif defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_6)
5651 #if defined(_STRUCT_X86_THREAD_STATE64) && !defined(__i386__)
5652 return (void*) uc->uc_mcontext->__ss.__rip;
5653 #else
5654 return (void*) uc->uc_mcontext->__ss.__eip;
5655 #endif
5656 #elif defined(__i386__) || defined(__X86_64__) /* Linux x86 */
5657 return (void*) uc->uc_mcontext.gregs[REG_EIP];
5658 #elif defined(__ia64__) /* Linux IA64 */
5659 return (void*) uc->uc_mcontext.sc_ip;
5660 #else
5661 return NULL;
5662 #endif
5663 }
5664
5665 static void segvHandler(int sig, siginfo_t *info, void *secret) {
5666 void *trace[100];
5667 char **messages = NULL;
5668 int i, trace_size = 0;
5669 unsigned long offset=0;
5670 time_t uptime = time(NULL)-server.stat_starttime;
5671 ucontext_t *uc = (ucontext_t*) secret;
5672 REDIS_NOTUSED(info);
5673
5674 redisLog(REDIS_WARNING,
5675 "======= Ooops! Redis %s got signal: -%d- =======", REDIS_VERSION, sig);
5676 redisLog(REDIS_WARNING, "%s", sdscatprintf(sdsempty(),
5677 "redis_version:%s; "
5678 "uptime_in_seconds:%d; "
5679 "connected_clients:%d; "
5680 "connected_slaves:%d; "
5681 "used_memory:%zu; "
5682 "changes_since_last_save:%lld; "
5683 "bgsave_in_progress:%d; "
5684 "last_save_time:%d; "
5685 "total_connections_received:%lld; "
5686 "total_commands_processed:%lld; "
5687 "role:%s;"
5688 ,REDIS_VERSION,
5689 uptime,
5690 listLength(server.clients)-listLength(server.slaves),
5691 listLength(server.slaves),
5692 server.usedmemory,
5693 server.dirty,
5694 server.bgsaveinprogress,
5695 server.lastsave,
5696 server.stat_numconnections,
5697 server.stat_numcommands,
5698 server.masterhost == NULL ? "master" : "slave"
5699 ));
5700
5701 trace_size = backtrace(trace, 100);
5702 /* overwrite sigaction with caller's address */
5703 if (getMcontextEip(uc) != NULL) {
5704 trace[1] = getMcontextEip(uc);
5705 }
5706 messages = backtrace_symbols(trace, trace_size);
5707
5708 for (i=1; i<trace_size; ++i) {
5709 char *fn = findFuncName(trace[i], &offset), *p;
5710
5711 p = strchr(messages[i],'+');
5712 if (!fn || (p && ((unsigned long)strtol(p+1,NULL,10)) < offset)) {
5713 redisLog(REDIS_WARNING,"%s", messages[i]);
5714 } else {
5715 redisLog(REDIS_WARNING,"%d redis-server %p %s + %d", i, trace[i], fn, (unsigned int)offset);
5716 }
5717 }
5718 free(messages);
5719 exit(0);
5720 }
5721
5722 static void setupSigSegvAction(void) {
5723 struct sigaction act;
5724
5725 sigemptyset (&act.sa_mask);
5726 /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction
5727 * is used. Otherwise, sa_handler is used */
5728 act.sa_flags = SA_NODEFER | SA_ONSTACK | SA_RESETHAND | SA_SIGINFO;
5729 act.sa_sigaction = segvHandler;
5730 sigaction (SIGSEGV, &act, NULL);
5731 sigaction (SIGBUS, &act, NULL);
5732 sigaction (SIGFPE, &act, NULL);
5733 sigaction (SIGILL, &act, NULL);
5734 sigaction (SIGBUS, &act, NULL);
5735 return;
5736 }
5737 #else /* HAVE_BACKTRACE */
5738 static void setupSigSegvAction(void) {
5739 }
5740 #endif /* HAVE_BACKTRACE */
5741
5742 /* =================================== Main! ================================ */
5743
5744 #ifdef __linux__
5745 int linuxOvercommitMemoryValue(void) {
5746 FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
5747 char buf[64];
5748
5749 if (!fp) return -1;
5750 if (fgets(buf,64,fp) == NULL) {
5751 fclose(fp);
5752 return -1;
5753 }
5754 fclose(fp);
5755
5756 return atoi(buf);
5757 }
5758
5759 void linuxOvercommitMemoryWarning(void) {
5760 if (linuxOvercommitMemoryValue() == 0) {
5761 redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low condition memory. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
5762 }
5763 }
5764 #endif /* __linux__ */
5765
5766 static void daemonize(void) {
5767 int fd;
5768 FILE *fp;
5769
5770 if (fork() != 0) exit(0); /* parent exits */
5771 setsid(); /* create a new session */
5772
5773 /* Every output goes to /dev/null. If Redis is daemonized but
5774 * the 'logfile' is set to 'stdout' in the configuration file
5775 * it will not log at all. */
5776 if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
5777 dup2(fd, STDIN_FILENO);
5778 dup2(fd, STDOUT_FILENO);
5779 dup2(fd, STDERR_FILENO);
5780 if (fd > STDERR_FILENO) close(fd);
5781 }
5782 /* Try to write the pid file */
5783 fp = fopen(server.pidfile,"w");
5784 if (fp) {
5785 fprintf(fp,"%d\n",getpid());
5786 fclose(fp);
5787 }
5788 }
5789
5790 int main(int argc, char **argv) {
5791 initServerConfig();
5792 if (argc == 2) {
5793 ResetServerSaveParams();
5794 loadServerConfig(argv[1]);
5795 } else if (argc > 2) {
5796 fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf]\n");
5797 exit(1);
5798 } else {
5799 redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'");
5800 }
5801 initServer();
5802 if (server.daemonize) daemonize();
5803 redisLog(REDIS_NOTICE,"Server started, Redis version " REDIS_VERSION);
5804 #ifdef __linux__
5805 linuxOvercommitMemoryWarning();
5806 #endif
5807 if (server.appendonly) {
5808 if (loadAppendOnlyFile(server.appendfilename) == REDIS_OK)
5809 redisLog(REDIS_NOTICE,"DB loaded from append only file");
5810 } else {
5811 if (rdbLoad(server.dbfilename) == REDIS_OK)
5812 redisLog(REDIS_NOTICE,"DB loaded from disk");
5813 }
5814 if (aeCreateFileEvent(server.el, server.fd, AE_READABLE,
5815 acceptHandler, NULL, NULL) == AE_ERR) oom("creating file event");
5816 redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port);
5817 aeMain(server.el);
5818 aeDeleteEventLoop(server.el);
5819 return 0;
5820 }